xref: /netbsd-src/external/bsd/zstd/dist/lib/compress/zstd_opt.c (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1*3117ece4Schristos /*
2*3117ece4Schristos  * Copyright (c) Meta Platforms, Inc. and affiliates.
3*3117ece4Schristos  * All rights reserved.
4*3117ece4Schristos  *
5*3117ece4Schristos  * This source code is licensed under both the BSD-style license (found in the
6*3117ece4Schristos  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*3117ece4Schristos  * in the COPYING file in the root directory of this source tree).
8*3117ece4Schristos  * You may select, at your option, one of the above-listed licenses.
9*3117ece4Schristos  */
10*3117ece4Schristos 
11*3117ece4Schristos #include "zstd_compress_internal.h"
12*3117ece4Schristos #include "hist.h"
13*3117ece4Schristos #include "zstd_opt.h"
14*3117ece4Schristos 
15*3117ece4Schristos #if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
16*3117ece4Schristos  || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
17*3117ece4Schristos  || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
18*3117ece4Schristos 
19*3117ece4Schristos #define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
20*3117ece4Schristos #define ZSTD_MAX_PRICE     (1<<30)
21*3117ece4Schristos 
22*3117ece4Schristos #define ZSTD_PREDEF_THRESHOLD 8   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
23*3117ece4Schristos 
24*3117ece4Schristos 
25*3117ece4Schristos /*-*************************************
26*3117ece4Schristos *  Price functions for optimal parser
27*3117ece4Schristos ***************************************/
28*3117ece4Schristos 
29*3117ece4Schristos #if 0    /* approximation at bit level (for tests) */
30*3117ece4Schristos #  define BITCOST_ACCURACY 0
31*3117ece4Schristos #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
32*3117ece4Schristos #  define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
33*3117ece4Schristos #elif 0  /* fractional bit accuracy (for tests) */
34*3117ece4Schristos #  define BITCOST_ACCURACY 8
35*3117ece4Schristos #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
36*3117ece4Schristos #  define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
37*3117ece4Schristos #else    /* opt==approx, ultra==accurate */
38*3117ece4Schristos #  define BITCOST_ACCURACY 8
39*3117ece4Schristos #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
40*3117ece4Schristos #  define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
41*3117ece4Schristos #endif
42*3117ece4Schristos 
43*3117ece4Schristos /* ZSTD_bitWeight() :
44*3117ece4Schristos  * provide estimated "cost" of a stat in full bits only */
45*3117ece4Schristos MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
46*3117ece4Schristos {
47*3117ece4Schristos     return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
48*3117ece4Schristos }
49*3117ece4Schristos 
50*3117ece4Schristos /* ZSTD_fracWeight() :
51*3117ece4Schristos  * provide fractional-bit "cost" of a stat,
52*3117ece4Schristos  * using linear interpolation approximation */
53*3117ece4Schristos MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
54*3117ece4Schristos {
55*3117ece4Schristos     U32 const stat = rawStat + 1;
56*3117ece4Schristos     U32 const hb = ZSTD_highbit32(stat);
57*3117ece4Schristos     U32 const BWeight = hb * BITCOST_MULTIPLIER;
58*3117ece4Schristos     /* Fweight was meant for "Fractional weight"
59*3117ece4Schristos      * but it's effectively a value between 1 and 2
60*3117ece4Schristos      * using fixed point arithmetic */
61*3117ece4Schristos     U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
62*3117ece4Schristos     U32 const weight = BWeight + FWeight;
63*3117ece4Schristos     assert(hb + BITCOST_ACCURACY < 31);
64*3117ece4Schristos     return weight;
65*3117ece4Schristos }
66*3117ece4Schristos 
67*3117ece4Schristos #if (DEBUGLEVEL>=2)
68*3117ece4Schristos /* debugging function,
69*3117ece4Schristos  * @return price in bytes as fractional value
70*3117ece4Schristos  * for debug messages only */
71*3117ece4Schristos MEM_STATIC double ZSTD_fCost(int price)
72*3117ece4Schristos {
73*3117ece4Schristos     return (double)price / (BITCOST_MULTIPLIER*8);
74*3117ece4Schristos }
75*3117ece4Schristos #endif
76*3117ece4Schristos 
77*3117ece4Schristos static int ZSTD_compressedLiterals(optState_t const* const optPtr)
78*3117ece4Schristos {
79*3117ece4Schristos     return optPtr->literalCompressionMode != ZSTD_ps_disable;
80*3117ece4Schristos }
81*3117ece4Schristos 
82*3117ece4Schristos static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
83*3117ece4Schristos {
84*3117ece4Schristos     if (ZSTD_compressedLiterals(optPtr))
85*3117ece4Schristos         optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
86*3117ece4Schristos     optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
87*3117ece4Schristos     optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
88*3117ece4Schristos     optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
89*3117ece4Schristos }
90*3117ece4Schristos 
91*3117ece4Schristos 
92*3117ece4Schristos static U32 sum_u32(const unsigned table[], size_t nbElts)
93*3117ece4Schristos {
94*3117ece4Schristos     size_t n;
95*3117ece4Schristos     U32 total = 0;
96*3117ece4Schristos     for (n=0; n<nbElts; n++) {
97*3117ece4Schristos         total += table[n];
98*3117ece4Schristos     }
99*3117ece4Schristos     return total;
100*3117ece4Schristos }
101*3117ece4Schristos 
102*3117ece4Schristos typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
103*3117ece4Schristos 
104*3117ece4Schristos static U32
105*3117ece4Schristos ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
106*3117ece4Schristos {
107*3117ece4Schristos     U32 s, sum=0;
108*3117ece4Schristos     DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
109*3117ece4Schristos             (unsigned)lastEltIndex+1, (unsigned)shift );
110*3117ece4Schristos     assert(shift < 30);
111*3117ece4Schristos     for (s=0; s<lastEltIndex+1; s++) {
112*3117ece4Schristos         unsigned const base = base1 ? 1 : (table[s]>0);
113*3117ece4Schristos         unsigned const newStat = base + (table[s] >> shift);
114*3117ece4Schristos         sum += newStat;
115*3117ece4Schristos         table[s] = newStat;
116*3117ece4Schristos     }
117*3117ece4Schristos     return sum;
118*3117ece4Schristos }
119*3117ece4Schristos 
120*3117ece4Schristos /* ZSTD_scaleStats() :
121*3117ece4Schristos  * reduce all elt frequencies in table if sum too large
122*3117ece4Schristos  * return the resulting sum of elements */
123*3117ece4Schristos static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
124*3117ece4Schristos {
125*3117ece4Schristos     U32 const prevsum = sum_u32(table, lastEltIndex+1);
126*3117ece4Schristos     U32 const factor = prevsum >> logTarget;
127*3117ece4Schristos     DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
128*3117ece4Schristos     assert(logTarget < 30);
129*3117ece4Schristos     if (factor <= 1) return prevsum;
130*3117ece4Schristos     return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
131*3117ece4Schristos }
132*3117ece4Schristos 
133*3117ece4Schristos /* ZSTD_rescaleFreqs() :
134*3117ece4Schristos  * if first block (detected by optPtr->litLengthSum == 0) : init statistics
135*3117ece4Schristos  *    take hints from dictionary if there is one
136*3117ece4Schristos  *    and init from zero if there is none,
137*3117ece4Schristos  *    using src for literals stats, and baseline stats for sequence symbols
138*3117ece4Schristos  * otherwise downscale existing stats, to be used as seed for next block.
139*3117ece4Schristos  */
140*3117ece4Schristos static void
141*3117ece4Schristos ZSTD_rescaleFreqs(optState_t* const optPtr,
142*3117ece4Schristos             const BYTE* const src, size_t const srcSize,
143*3117ece4Schristos                   int const optLevel)
144*3117ece4Schristos {
145*3117ece4Schristos     int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
146*3117ece4Schristos     DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
147*3117ece4Schristos     optPtr->priceType = zop_dynamic;
148*3117ece4Schristos 
149*3117ece4Schristos     if (optPtr->litLengthSum == 0) {  /* no literals stats collected -> first block assumed -> init */
150*3117ece4Schristos 
151*3117ece4Schristos         /* heuristic: use pre-defined stats for too small inputs */
152*3117ece4Schristos         if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
153*3117ece4Schristos             DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
154*3117ece4Schristos             optPtr->priceType = zop_predef;
155*3117ece4Schristos         }
156*3117ece4Schristos 
157*3117ece4Schristos         assert(optPtr->symbolCosts != NULL);
158*3117ece4Schristos         if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
159*3117ece4Schristos 
160*3117ece4Schristos             /* huffman stats covering the full value set : table presumed generated by dictionary */
161*3117ece4Schristos             optPtr->priceType = zop_dynamic;
162*3117ece4Schristos 
163*3117ece4Schristos             if (compressedLiterals) {
164*3117ece4Schristos                 /* generate literals statistics from huffman table */
165*3117ece4Schristos                 unsigned lit;
166*3117ece4Schristos                 assert(optPtr->litFreq != NULL);
167*3117ece4Schristos                 optPtr->litSum = 0;
168*3117ece4Schristos                 for (lit=0; lit<=MaxLit; lit++) {
169*3117ece4Schristos                     U32 const scaleLog = 11;   /* scale to 2K */
170*3117ece4Schristos                     U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
171*3117ece4Schristos                     assert(bitCost <= scaleLog);
172*3117ece4Schristos                     optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
173*3117ece4Schristos                     optPtr->litSum += optPtr->litFreq[lit];
174*3117ece4Schristos             }   }
175*3117ece4Schristos 
176*3117ece4Schristos             {   unsigned ll;
177*3117ece4Schristos                 FSE_CState_t llstate;
178*3117ece4Schristos                 FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
179*3117ece4Schristos                 optPtr->litLengthSum = 0;
180*3117ece4Schristos                 for (ll=0; ll<=MaxLL; ll++) {
181*3117ece4Schristos                     U32 const scaleLog = 10;   /* scale to 1K */
182*3117ece4Schristos                     U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
183*3117ece4Schristos                     assert(bitCost < scaleLog);
184*3117ece4Schristos                     optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
185*3117ece4Schristos                     optPtr->litLengthSum += optPtr->litLengthFreq[ll];
186*3117ece4Schristos             }   }
187*3117ece4Schristos 
188*3117ece4Schristos             {   unsigned ml;
189*3117ece4Schristos                 FSE_CState_t mlstate;
190*3117ece4Schristos                 FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
191*3117ece4Schristos                 optPtr->matchLengthSum = 0;
192*3117ece4Schristos                 for (ml=0; ml<=MaxML; ml++) {
193*3117ece4Schristos                     U32 const scaleLog = 10;
194*3117ece4Schristos                     U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
195*3117ece4Schristos                     assert(bitCost < scaleLog);
196*3117ece4Schristos                     optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
197*3117ece4Schristos                     optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
198*3117ece4Schristos             }   }
199*3117ece4Schristos 
200*3117ece4Schristos             {   unsigned of;
201*3117ece4Schristos                 FSE_CState_t ofstate;
202*3117ece4Schristos                 FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
203*3117ece4Schristos                 optPtr->offCodeSum = 0;
204*3117ece4Schristos                 for (of=0; of<=MaxOff; of++) {
205*3117ece4Schristos                     U32 const scaleLog = 10;
206*3117ece4Schristos                     U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
207*3117ece4Schristos                     assert(bitCost < scaleLog);
208*3117ece4Schristos                     optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
209*3117ece4Schristos                     optPtr->offCodeSum += optPtr->offCodeFreq[of];
210*3117ece4Schristos             }   }
211*3117ece4Schristos 
212*3117ece4Schristos         } else {  /* first block, no dictionary */
213*3117ece4Schristos 
214*3117ece4Schristos             assert(optPtr->litFreq != NULL);
215*3117ece4Schristos             if (compressedLiterals) {
216*3117ece4Schristos                 /* base initial cost of literals on direct frequency within src */
217*3117ece4Schristos                 unsigned lit = MaxLit;
218*3117ece4Schristos                 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
219*3117ece4Schristos                 optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
220*3117ece4Schristos             }
221*3117ece4Schristos 
222*3117ece4Schristos             {   unsigned const baseLLfreqs[MaxLL+1] = {
223*3117ece4Schristos                     4, 2, 1, 1, 1, 1, 1, 1,
224*3117ece4Schristos                     1, 1, 1, 1, 1, 1, 1, 1,
225*3117ece4Schristos                     1, 1, 1, 1, 1, 1, 1, 1,
226*3117ece4Schristos                     1, 1, 1, 1, 1, 1, 1, 1,
227*3117ece4Schristos                     1, 1, 1, 1
228*3117ece4Schristos                 };
229*3117ece4Schristos                 ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
230*3117ece4Schristos                 optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
231*3117ece4Schristos             }
232*3117ece4Schristos 
233*3117ece4Schristos             {   unsigned ml;
234*3117ece4Schristos                 for (ml=0; ml<=MaxML; ml++)
235*3117ece4Schristos                     optPtr->matchLengthFreq[ml] = 1;
236*3117ece4Schristos             }
237*3117ece4Schristos             optPtr->matchLengthSum = MaxML+1;
238*3117ece4Schristos 
239*3117ece4Schristos             {   unsigned const baseOFCfreqs[MaxOff+1] = {
240*3117ece4Schristos                     6, 2, 1, 1, 2, 3, 4, 4,
241*3117ece4Schristos                     4, 3, 2, 1, 1, 1, 1, 1,
242*3117ece4Schristos                     1, 1, 1, 1, 1, 1, 1, 1,
243*3117ece4Schristos                     1, 1, 1, 1, 1, 1, 1, 1
244*3117ece4Schristos                 };
245*3117ece4Schristos                 ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
246*3117ece4Schristos                 optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
247*3117ece4Schristos             }
248*3117ece4Schristos 
249*3117ece4Schristos         }
250*3117ece4Schristos 
251*3117ece4Schristos     } else {   /* new block : scale down accumulated statistics */
252*3117ece4Schristos 
253*3117ece4Schristos         if (compressedLiterals)
254*3117ece4Schristos             optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
255*3117ece4Schristos         optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
256*3117ece4Schristos         optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
257*3117ece4Schristos         optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
258*3117ece4Schristos     }
259*3117ece4Schristos 
260*3117ece4Schristos     ZSTD_setBasePrices(optPtr, optLevel);
261*3117ece4Schristos }
262*3117ece4Schristos 
263*3117ece4Schristos /* ZSTD_rawLiteralsCost() :
264*3117ece4Schristos  * price of literals (only) in specified segment (which length can be 0).
265*3117ece4Schristos  * does not include price of literalLength symbol */
266*3117ece4Schristos static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
267*3117ece4Schristos                                 const optState_t* const optPtr,
268*3117ece4Schristos                                 int optLevel)
269*3117ece4Schristos {
270*3117ece4Schristos     DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
271*3117ece4Schristos     if (litLength == 0) return 0;
272*3117ece4Schristos 
273*3117ece4Schristos     if (!ZSTD_compressedLiterals(optPtr))
274*3117ece4Schristos         return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
275*3117ece4Schristos 
276*3117ece4Schristos     if (optPtr->priceType == zop_predef)
277*3117ece4Schristos         return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
278*3117ece4Schristos 
279*3117ece4Schristos     /* dynamic statistics */
280*3117ece4Schristos     {   U32 price = optPtr->litSumBasePrice * litLength;
281*3117ece4Schristos         U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
282*3117ece4Schristos         U32 u;
283*3117ece4Schristos         assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
284*3117ece4Schristos         for (u=0; u < litLength; u++) {
285*3117ece4Schristos             U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
286*3117ece4Schristos             if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
287*3117ece4Schristos             price -= litPrice;
288*3117ece4Schristos         }
289*3117ece4Schristos         return price;
290*3117ece4Schristos     }
291*3117ece4Schristos }
292*3117ece4Schristos 
293*3117ece4Schristos /* ZSTD_litLengthPrice() :
294*3117ece4Schristos  * cost of literalLength symbol */
295*3117ece4Schristos static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
296*3117ece4Schristos {
297*3117ece4Schristos     assert(litLength <= ZSTD_BLOCKSIZE_MAX);
298*3117ece4Schristos     if (optPtr->priceType == zop_predef)
299*3117ece4Schristos         return WEIGHT(litLength, optLevel);
300*3117ece4Schristos 
301*3117ece4Schristos     /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
302*3117ece4Schristos      * because it isn't representable in the zstd format.
303*3117ece4Schristos      * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
304*3117ece4Schristos      * In such a case, the block would be all literals.
305*3117ece4Schristos      */
306*3117ece4Schristos     if (litLength == ZSTD_BLOCKSIZE_MAX)
307*3117ece4Schristos         return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
308*3117ece4Schristos 
309*3117ece4Schristos     /* dynamic statistics */
310*3117ece4Schristos     {   U32 const llCode = ZSTD_LLcode(litLength);
311*3117ece4Schristos         return (LL_bits[llCode] * BITCOST_MULTIPLIER)
312*3117ece4Schristos              + optPtr->litLengthSumBasePrice
313*3117ece4Schristos              - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
314*3117ece4Schristos     }
315*3117ece4Schristos }
316*3117ece4Schristos 
317*3117ece4Schristos /* ZSTD_getMatchPrice() :
318*3117ece4Schristos  * Provides the cost of the match part (offset + matchLength) of a sequence.
319*3117ece4Schristos  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
320*3117ece4Schristos  * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
321*3117ece4Schristos  * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
322*3117ece4Schristos  */
323*3117ece4Schristos FORCE_INLINE_TEMPLATE U32
324*3117ece4Schristos ZSTD_getMatchPrice(U32 const offBase,
325*3117ece4Schristos                    U32 const matchLength,
326*3117ece4Schristos              const optState_t* const optPtr,
327*3117ece4Schristos                    int const optLevel)
328*3117ece4Schristos {
329*3117ece4Schristos     U32 price;
330*3117ece4Schristos     U32 const offCode = ZSTD_highbit32(offBase);
331*3117ece4Schristos     U32 const mlBase = matchLength - MINMATCH;
332*3117ece4Schristos     assert(matchLength >= MINMATCH);
333*3117ece4Schristos 
334*3117ece4Schristos     if (optPtr->priceType == zop_predef)  /* fixed scheme, does not use statistics */
335*3117ece4Schristos         return WEIGHT(mlBase, optLevel)
336*3117ece4Schristos              + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
337*3117ece4Schristos 
338*3117ece4Schristos     /* dynamic statistics */
339*3117ece4Schristos     price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
340*3117ece4Schristos     if ((optLevel<2) /*static*/ && offCode >= 20)
341*3117ece4Schristos         price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
342*3117ece4Schristos 
343*3117ece4Schristos     /* match Length */
344*3117ece4Schristos     {   U32 const mlCode = ZSTD_MLcode(mlBase);
345*3117ece4Schristos         price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
346*3117ece4Schristos     }
347*3117ece4Schristos 
348*3117ece4Schristos     price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
349*3117ece4Schristos 
350*3117ece4Schristos     DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
351*3117ece4Schristos     return price;
352*3117ece4Schristos }
353*3117ece4Schristos 
354*3117ece4Schristos /* ZSTD_updateStats() :
355*3117ece4Schristos  * assumption : literals + litLength <= iend */
356*3117ece4Schristos static void ZSTD_updateStats(optState_t* const optPtr,
357*3117ece4Schristos                              U32 litLength, const BYTE* literals,
358*3117ece4Schristos                              U32 offBase, U32 matchLength)
359*3117ece4Schristos {
360*3117ece4Schristos     /* literals */
361*3117ece4Schristos     if (ZSTD_compressedLiterals(optPtr)) {
362*3117ece4Schristos         U32 u;
363*3117ece4Schristos         for (u=0; u < litLength; u++)
364*3117ece4Schristos             optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
365*3117ece4Schristos         optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
366*3117ece4Schristos     }
367*3117ece4Schristos 
368*3117ece4Schristos     /* literal Length */
369*3117ece4Schristos     {   U32 const llCode = ZSTD_LLcode(litLength);
370*3117ece4Schristos         optPtr->litLengthFreq[llCode]++;
371*3117ece4Schristos         optPtr->litLengthSum++;
372*3117ece4Schristos     }
373*3117ece4Schristos 
374*3117ece4Schristos     /* offset code : follows storeSeq() numeric representation */
375*3117ece4Schristos     {   U32 const offCode = ZSTD_highbit32(offBase);
376*3117ece4Schristos         assert(offCode <= MaxOff);
377*3117ece4Schristos         optPtr->offCodeFreq[offCode]++;
378*3117ece4Schristos         optPtr->offCodeSum++;
379*3117ece4Schristos     }
380*3117ece4Schristos 
381*3117ece4Schristos     /* match Length */
382*3117ece4Schristos     {   U32 const mlBase = matchLength - MINMATCH;
383*3117ece4Schristos         U32 const mlCode = ZSTD_MLcode(mlBase);
384*3117ece4Schristos         optPtr->matchLengthFreq[mlCode]++;
385*3117ece4Schristos         optPtr->matchLengthSum++;
386*3117ece4Schristos     }
387*3117ece4Schristos }
388*3117ece4Schristos 
389*3117ece4Schristos 
390*3117ece4Schristos /* ZSTD_readMINMATCH() :
391*3117ece4Schristos  * function safe only for comparisons
392*3117ece4Schristos  * assumption : memPtr must be at least 4 bytes before end of buffer */
393*3117ece4Schristos MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
394*3117ece4Schristos {
395*3117ece4Schristos     switch (length)
396*3117ece4Schristos     {
397*3117ece4Schristos     default :
398*3117ece4Schristos     case 4 : return MEM_read32(memPtr);
399*3117ece4Schristos     case 3 : if (MEM_isLittleEndian())
400*3117ece4Schristos                 return MEM_read32(memPtr)<<8;
401*3117ece4Schristos              else
402*3117ece4Schristos                 return MEM_read32(memPtr)>>8;
403*3117ece4Schristos     }
404*3117ece4Schristos }
405*3117ece4Schristos 
406*3117ece4Schristos 
407*3117ece4Schristos /* Update hashTable3 up to ip (excluded)
408*3117ece4Schristos    Assumption : always within prefix (i.e. not within extDict) */
409*3117ece4Schristos static
410*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
411*3117ece4Schristos U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
412*3117ece4Schristos                                        U32* nextToUpdate3,
413*3117ece4Schristos                                        const BYTE* const ip)
414*3117ece4Schristos {
415*3117ece4Schristos     U32* const hashTable3 = ms->hashTable3;
416*3117ece4Schristos     U32 const hashLog3 = ms->hashLog3;
417*3117ece4Schristos     const BYTE* const base = ms->window.base;
418*3117ece4Schristos     U32 idx = *nextToUpdate3;
419*3117ece4Schristos     U32 const target = (U32)(ip - base);
420*3117ece4Schristos     size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
421*3117ece4Schristos     assert(hashLog3 > 0);
422*3117ece4Schristos 
423*3117ece4Schristos     while(idx < target) {
424*3117ece4Schristos         hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
425*3117ece4Schristos         idx++;
426*3117ece4Schristos     }
427*3117ece4Schristos 
428*3117ece4Schristos     *nextToUpdate3 = target;
429*3117ece4Schristos     return hashTable3[hash3];
430*3117ece4Schristos }
431*3117ece4Schristos 
432*3117ece4Schristos 
433*3117ece4Schristos /*-*************************************
434*3117ece4Schristos *  Binary Tree search
435*3117ece4Schristos ***************************************/
436*3117ece4Schristos /** ZSTD_insertBt1() : add one or multiple positions to tree.
437*3117ece4Schristos  * @param ip assumed <= iend-8 .
438*3117ece4Schristos  * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
439*3117ece4Schristos  * @return : nb of positions added */
440*3117ece4Schristos static
441*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
442*3117ece4Schristos U32 ZSTD_insertBt1(
443*3117ece4Schristos                 const ZSTD_matchState_t* ms,
444*3117ece4Schristos                 const BYTE* const ip, const BYTE* const iend,
445*3117ece4Schristos                 U32 const target,
446*3117ece4Schristos                 U32 const mls, const int extDict)
447*3117ece4Schristos {
448*3117ece4Schristos     const ZSTD_compressionParameters* const cParams = &ms->cParams;
449*3117ece4Schristos     U32*   const hashTable = ms->hashTable;
450*3117ece4Schristos     U32    const hashLog = cParams->hashLog;
451*3117ece4Schristos     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
452*3117ece4Schristos     U32*   const bt = ms->chainTable;
453*3117ece4Schristos     U32    const btLog  = cParams->chainLog - 1;
454*3117ece4Schristos     U32    const btMask = (1 << btLog) - 1;
455*3117ece4Schristos     U32 matchIndex = hashTable[h];
456*3117ece4Schristos     size_t commonLengthSmaller=0, commonLengthLarger=0;
457*3117ece4Schristos     const BYTE* const base = ms->window.base;
458*3117ece4Schristos     const BYTE* const dictBase = ms->window.dictBase;
459*3117ece4Schristos     const U32 dictLimit = ms->window.dictLimit;
460*3117ece4Schristos     const BYTE* const dictEnd = dictBase + dictLimit;
461*3117ece4Schristos     const BYTE* const prefixStart = base + dictLimit;
462*3117ece4Schristos     const BYTE* match;
463*3117ece4Schristos     const U32 curr = (U32)(ip-base);
464*3117ece4Schristos     const U32 btLow = btMask >= curr ? 0 : curr - btMask;
465*3117ece4Schristos     U32* smallerPtr = bt + 2*(curr&btMask);
466*3117ece4Schristos     U32* largerPtr  = smallerPtr + 1;
467*3117ece4Schristos     U32 dummy32;   /* to be nullified at the end */
468*3117ece4Schristos     /* windowLow is based on target because
469*3117ece4Schristos      * we only need positions that will be in the window at the end of the tree update.
470*3117ece4Schristos      */
471*3117ece4Schristos     U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
472*3117ece4Schristos     U32 matchEndIdx = curr+8+1;
473*3117ece4Schristos     size_t bestLength = 8;
474*3117ece4Schristos     U32 nbCompares = 1U << cParams->searchLog;
475*3117ece4Schristos #ifdef ZSTD_C_PREDICT
476*3117ece4Schristos     U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
477*3117ece4Schristos     U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
478*3117ece4Schristos     predictedSmall += (predictedSmall>0);
479*3117ece4Schristos     predictedLarge += (predictedLarge>0);
480*3117ece4Schristos #endif /* ZSTD_C_PREDICT */
481*3117ece4Schristos 
482*3117ece4Schristos     DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
483*3117ece4Schristos 
484*3117ece4Schristos     assert(curr <= target);
485*3117ece4Schristos     assert(ip <= iend-8);   /* required for h calculation */
486*3117ece4Schristos     hashTable[h] = curr;   /* Update Hash Table */
487*3117ece4Schristos 
488*3117ece4Schristos     assert(windowLow > 0);
489*3117ece4Schristos     for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
490*3117ece4Schristos         U32* const nextPtr = bt + 2*(matchIndex & btMask);
491*3117ece4Schristos         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
492*3117ece4Schristos         assert(matchIndex < curr);
493*3117ece4Schristos 
494*3117ece4Schristos #ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
495*3117ece4Schristos         const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
496*3117ece4Schristos         if (matchIndex == predictedSmall) {
497*3117ece4Schristos             /* no need to check length, result known */
498*3117ece4Schristos             *smallerPtr = matchIndex;
499*3117ece4Schristos             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
500*3117ece4Schristos             smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
501*3117ece4Schristos             matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
502*3117ece4Schristos             predictedSmall = predictPtr[1] + (predictPtr[1]>0);
503*3117ece4Schristos             continue;
504*3117ece4Schristos         }
505*3117ece4Schristos         if (matchIndex == predictedLarge) {
506*3117ece4Schristos             *largerPtr = matchIndex;
507*3117ece4Schristos             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
508*3117ece4Schristos             largerPtr = nextPtr;
509*3117ece4Schristos             matchIndex = nextPtr[0];
510*3117ece4Schristos             predictedLarge = predictPtr[0] + (predictPtr[0]>0);
511*3117ece4Schristos             continue;
512*3117ece4Schristos         }
513*3117ece4Schristos #endif
514*3117ece4Schristos 
515*3117ece4Schristos         if (!extDict || (matchIndex+matchLength >= dictLimit)) {
516*3117ece4Schristos             assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
517*3117ece4Schristos             match = base + matchIndex;
518*3117ece4Schristos             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
519*3117ece4Schristos         } else {
520*3117ece4Schristos             match = dictBase + matchIndex;
521*3117ece4Schristos             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
522*3117ece4Schristos             if (matchIndex+matchLength >= dictLimit)
523*3117ece4Schristos                 match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
524*3117ece4Schristos         }
525*3117ece4Schristos 
526*3117ece4Schristos         if (matchLength > bestLength) {
527*3117ece4Schristos             bestLength = matchLength;
528*3117ece4Schristos             if (matchLength > matchEndIdx - matchIndex)
529*3117ece4Schristos                 matchEndIdx = matchIndex + (U32)matchLength;
530*3117ece4Schristos         }
531*3117ece4Schristos 
532*3117ece4Schristos         if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
533*3117ece4Schristos             break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
534*3117ece4Schristos         }
535*3117ece4Schristos 
536*3117ece4Schristos         if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
537*3117ece4Schristos             /* match is smaller than current */
538*3117ece4Schristos             *smallerPtr = matchIndex;             /* update smaller idx */
539*3117ece4Schristos             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
540*3117ece4Schristos             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
541*3117ece4Schristos             smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
542*3117ece4Schristos             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
543*3117ece4Schristos         } else {
544*3117ece4Schristos             /* match is larger than current */
545*3117ece4Schristos             *largerPtr = matchIndex;
546*3117ece4Schristos             commonLengthLarger = matchLength;
547*3117ece4Schristos             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
548*3117ece4Schristos             largerPtr = nextPtr;
549*3117ece4Schristos             matchIndex = nextPtr[0];
550*3117ece4Schristos     }   }
551*3117ece4Schristos 
552*3117ece4Schristos     *smallerPtr = *largerPtr = 0;
553*3117ece4Schristos     {   U32 positions = 0;
554*3117ece4Schristos         if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
555*3117ece4Schristos         assert(matchEndIdx > curr + 8);
556*3117ece4Schristos         return MAX(positions, matchEndIdx - (curr + 8));
557*3117ece4Schristos     }
558*3117ece4Schristos }
559*3117ece4Schristos 
560*3117ece4Schristos FORCE_INLINE_TEMPLATE
561*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
562*3117ece4Schristos void ZSTD_updateTree_internal(
563*3117ece4Schristos                 ZSTD_matchState_t* ms,
564*3117ece4Schristos                 const BYTE* const ip, const BYTE* const iend,
565*3117ece4Schristos                 const U32 mls, const ZSTD_dictMode_e dictMode)
566*3117ece4Schristos {
567*3117ece4Schristos     const BYTE* const base = ms->window.base;
568*3117ece4Schristos     U32 const target = (U32)(ip - base);
569*3117ece4Schristos     U32 idx = ms->nextToUpdate;
570*3117ece4Schristos     DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
571*3117ece4Schristos                 idx, target, dictMode);
572*3117ece4Schristos 
573*3117ece4Schristos     while(idx < target) {
574*3117ece4Schristos         U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
575*3117ece4Schristos         assert(idx < (U32)(idx + forward));
576*3117ece4Schristos         idx += forward;
577*3117ece4Schristos     }
578*3117ece4Schristos     assert((size_t)(ip - base) <= (size_t)(U32)(-1));
579*3117ece4Schristos     assert((size_t)(iend - base) <= (size_t)(U32)(-1));
580*3117ece4Schristos     ms->nextToUpdate = target;
581*3117ece4Schristos }
582*3117ece4Schristos 
583*3117ece4Schristos void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
584*3117ece4Schristos     ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
585*3117ece4Schristos }
586*3117ece4Schristos 
587*3117ece4Schristos FORCE_INLINE_TEMPLATE
588*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
589*3117ece4Schristos U32
590*3117ece4Schristos ZSTD_insertBtAndGetAllMatches (
591*3117ece4Schristos                 ZSTD_match_t* matches,  /* store result (found matches) in this table (presumed large enough) */
592*3117ece4Schristos                 ZSTD_matchState_t* ms,
593*3117ece4Schristos                 U32* nextToUpdate3,
594*3117ece4Schristos                 const BYTE* const ip, const BYTE* const iLimit,
595*3117ece4Schristos                 const ZSTD_dictMode_e dictMode,
596*3117ece4Schristos                 const U32 rep[ZSTD_REP_NUM],
597*3117ece4Schristos                 const U32 ll0,  /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
598*3117ece4Schristos                 const U32 lengthToBeat,
599*3117ece4Schristos                 const U32 mls /* template */)
600*3117ece4Schristos {
601*3117ece4Schristos     const ZSTD_compressionParameters* const cParams = &ms->cParams;
602*3117ece4Schristos     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
603*3117ece4Schristos     const BYTE* const base = ms->window.base;
604*3117ece4Schristos     U32 const curr = (U32)(ip-base);
605*3117ece4Schristos     U32 const hashLog = cParams->hashLog;
606*3117ece4Schristos     U32 const minMatch = (mls==3) ? 3 : 4;
607*3117ece4Schristos     U32* const hashTable = ms->hashTable;
608*3117ece4Schristos     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
609*3117ece4Schristos     U32 matchIndex  = hashTable[h];
610*3117ece4Schristos     U32* const bt   = ms->chainTable;
611*3117ece4Schristos     U32 const btLog = cParams->chainLog - 1;
612*3117ece4Schristos     U32 const btMask= (1U << btLog) - 1;
613*3117ece4Schristos     size_t commonLengthSmaller=0, commonLengthLarger=0;
614*3117ece4Schristos     const BYTE* const dictBase = ms->window.dictBase;
615*3117ece4Schristos     U32 const dictLimit = ms->window.dictLimit;
616*3117ece4Schristos     const BYTE* const dictEnd = dictBase + dictLimit;
617*3117ece4Schristos     const BYTE* const prefixStart = base + dictLimit;
618*3117ece4Schristos     U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
619*3117ece4Schristos     U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
620*3117ece4Schristos     U32 const matchLow = windowLow ? windowLow : 1;
621*3117ece4Schristos     U32* smallerPtr = bt + 2*(curr&btMask);
622*3117ece4Schristos     U32* largerPtr  = bt + 2*(curr&btMask) + 1;
623*3117ece4Schristos     U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
624*3117ece4Schristos     U32 dummy32;   /* to be nullified at the end */
625*3117ece4Schristos     U32 mnum = 0;
626*3117ece4Schristos     U32 nbCompares = 1U << cParams->searchLog;
627*3117ece4Schristos 
628*3117ece4Schristos     const ZSTD_matchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
629*3117ece4Schristos     const ZSTD_compressionParameters* const dmsCParams =
630*3117ece4Schristos                                       dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
631*3117ece4Schristos     const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
632*3117ece4Schristos     const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
633*3117ece4Schristos     U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
634*3117ece4Schristos     U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
635*3117ece4Schristos     U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
636*3117ece4Schristos     U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
637*3117ece4Schristos     U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
638*3117ece4Schristos     U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
639*3117ece4Schristos     U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
640*3117ece4Schristos 
641*3117ece4Schristos     size_t bestLength = lengthToBeat-1;
642*3117ece4Schristos     DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
643*3117ece4Schristos 
644*3117ece4Schristos     /* check repCode */
645*3117ece4Schristos     assert(ll0 <= 1);   /* necessarily 1 or 0 */
646*3117ece4Schristos     {   U32 const lastR = ZSTD_REP_NUM + ll0;
647*3117ece4Schristos         U32 repCode;
648*3117ece4Schristos         for (repCode = ll0; repCode < lastR; repCode++) {
649*3117ece4Schristos             U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
650*3117ece4Schristos             U32 const repIndex = curr - repOffset;
651*3117ece4Schristos             U32 repLen = 0;
652*3117ece4Schristos             assert(curr >= dictLimit);
653*3117ece4Schristos             if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
654*3117ece4Schristos                 /* We must validate the repcode offset because when we're using a dictionary the
655*3117ece4Schristos                  * valid offset range shrinks when the dictionary goes out of bounds.
656*3117ece4Schristos                  */
657*3117ece4Schristos                 if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
658*3117ece4Schristos                     repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
659*3117ece4Schristos                 }
660*3117ece4Schristos             } else {  /* repIndex < dictLimit || repIndex >= curr */
661*3117ece4Schristos                 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
662*3117ece4Schristos                                              dmsBase + repIndex - dmsIndexDelta :
663*3117ece4Schristos                                              dictBase + repIndex;
664*3117ece4Schristos                 assert(curr >= windowLow);
665*3117ece4Schristos                 if ( dictMode == ZSTD_extDict
666*3117ece4Schristos                   && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
667*3117ece4Schristos                      & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
668*3117ece4Schristos                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
669*3117ece4Schristos                     repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
670*3117ece4Schristos                 }
671*3117ece4Schristos                 if (dictMode == ZSTD_dictMatchState
672*3117ece4Schristos                   && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
673*3117ece4Schristos                      & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
674*3117ece4Schristos                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
675*3117ece4Schristos                     repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
676*3117ece4Schristos             }   }
677*3117ece4Schristos             /* save longer solution */
678*3117ece4Schristos             if (repLen > bestLength) {
679*3117ece4Schristos                 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
680*3117ece4Schristos                             repCode, ll0, repOffset, repLen);
681*3117ece4Schristos                 bestLength = repLen;
682*3117ece4Schristos                 matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1);  /* expect value between 1 and 3 */
683*3117ece4Schristos                 matches[mnum].len = (U32)repLen;
684*3117ece4Schristos                 mnum++;
685*3117ece4Schristos                 if ( (repLen > sufficient_len)
686*3117ece4Schristos                    | (ip+repLen == iLimit) ) {  /* best possible */
687*3117ece4Schristos                     return mnum;
688*3117ece4Schristos     }   }   }   }
689*3117ece4Schristos 
690*3117ece4Schristos     /* HC3 match finder */
691*3117ece4Schristos     if ((mls == 3) /*static*/ && (bestLength < mls)) {
692*3117ece4Schristos         U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
693*3117ece4Schristos         if ((matchIndex3 >= matchLow)
694*3117ece4Schristos           & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
695*3117ece4Schristos             size_t mlen;
696*3117ece4Schristos             if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
697*3117ece4Schristos                 const BYTE* const match = base + matchIndex3;
698*3117ece4Schristos                 mlen = ZSTD_count(ip, match, iLimit);
699*3117ece4Schristos             } else {
700*3117ece4Schristos                 const BYTE* const match = dictBase + matchIndex3;
701*3117ece4Schristos                 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
702*3117ece4Schristos             }
703*3117ece4Schristos 
704*3117ece4Schristos             /* save best solution */
705*3117ece4Schristos             if (mlen >= mls /* == 3 > bestLength */) {
706*3117ece4Schristos                 DEBUGLOG(8, "found small match with hlog3, of length %u",
707*3117ece4Schristos                             (U32)mlen);
708*3117ece4Schristos                 bestLength = mlen;
709*3117ece4Schristos                 assert(curr > matchIndex3);
710*3117ece4Schristos                 assert(mnum==0);  /* no prior solution */
711*3117ece4Schristos                 matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
712*3117ece4Schristos                 matches[0].len = (U32)mlen;
713*3117ece4Schristos                 mnum = 1;
714*3117ece4Schristos                 if ( (mlen > sufficient_len) |
715*3117ece4Schristos                      (ip+mlen == iLimit) ) {  /* best possible length */
716*3117ece4Schristos                     ms->nextToUpdate = curr+1;  /* skip insertion */
717*3117ece4Schristos                     return 1;
718*3117ece4Schristos         }   }   }
719*3117ece4Schristos         /* no dictMatchState lookup: dicts don't have a populated HC3 table */
720*3117ece4Schristos     }  /* if (mls == 3) */
721*3117ece4Schristos 
722*3117ece4Schristos     hashTable[h] = curr;   /* Update Hash Table */
723*3117ece4Schristos 
724*3117ece4Schristos     for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
725*3117ece4Schristos         U32* const nextPtr = bt + 2*(matchIndex & btMask);
726*3117ece4Schristos         const BYTE* match;
727*3117ece4Schristos         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
728*3117ece4Schristos         assert(curr > matchIndex);
729*3117ece4Schristos 
730*3117ece4Schristos         if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
731*3117ece4Schristos             assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
732*3117ece4Schristos             match = base + matchIndex;
733*3117ece4Schristos             if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
734*3117ece4Schristos             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
735*3117ece4Schristos         } else {
736*3117ece4Schristos             match = dictBase + matchIndex;
737*3117ece4Schristos             assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
738*3117ece4Schristos             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
739*3117ece4Schristos             if (matchIndex+matchLength >= dictLimit)
740*3117ece4Schristos                 match = base + matchIndex;   /* prepare for match[matchLength] read */
741*3117ece4Schristos         }
742*3117ece4Schristos 
743*3117ece4Schristos         if (matchLength > bestLength) {
744*3117ece4Schristos             DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
745*3117ece4Schristos                     (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
746*3117ece4Schristos             assert(matchEndIdx > matchIndex);
747*3117ece4Schristos             if (matchLength > matchEndIdx - matchIndex)
748*3117ece4Schristos                 matchEndIdx = matchIndex + (U32)matchLength;
749*3117ece4Schristos             bestLength = matchLength;
750*3117ece4Schristos             matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
751*3117ece4Schristos             matches[mnum].len = (U32)matchLength;
752*3117ece4Schristos             mnum++;
753*3117ece4Schristos             if ( (matchLength > ZSTD_OPT_NUM)
754*3117ece4Schristos                | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
755*3117ece4Schristos                 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
756*3117ece4Schristos                 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
757*3117ece4Schristos         }   }
758*3117ece4Schristos 
759*3117ece4Schristos         if (match[matchLength] < ip[matchLength]) {
760*3117ece4Schristos             /* match smaller than current */
761*3117ece4Schristos             *smallerPtr = matchIndex;             /* update smaller idx */
762*3117ece4Schristos             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
763*3117ece4Schristos             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
764*3117ece4Schristos             smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
765*3117ece4Schristos             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
766*3117ece4Schristos         } else {
767*3117ece4Schristos             *largerPtr = matchIndex;
768*3117ece4Schristos             commonLengthLarger = matchLength;
769*3117ece4Schristos             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
770*3117ece4Schristos             largerPtr = nextPtr;
771*3117ece4Schristos             matchIndex = nextPtr[0];
772*3117ece4Schristos     }   }
773*3117ece4Schristos 
774*3117ece4Schristos     *smallerPtr = *largerPtr = 0;
775*3117ece4Schristos 
776*3117ece4Schristos     assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
777*3117ece4Schristos     if (dictMode == ZSTD_dictMatchState && nbCompares) {
778*3117ece4Schristos         size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
779*3117ece4Schristos         U32 dictMatchIndex = dms->hashTable[dmsH];
780*3117ece4Schristos         const U32* const dmsBt = dms->chainTable;
781*3117ece4Schristos         commonLengthSmaller = commonLengthLarger = 0;
782*3117ece4Schristos         for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
783*3117ece4Schristos             const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
784*3117ece4Schristos             size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
785*3117ece4Schristos             const BYTE* match = dmsBase + dictMatchIndex;
786*3117ece4Schristos             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
787*3117ece4Schristos             if (dictMatchIndex+matchLength >= dmsHighLimit)
788*3117ece4Schristos                 match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
789*3117ece4Schristos 
790*3117ece4Schristos             if (matchLength > bestLength) {
791*3117ece4Schristos                 matchIndex = dictMatchIndex + dmsIndexDelta;
792*3117ece4Schristos                 DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
793*3117ece4Schristos                         (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
794*3117ece4Schristos                 if (matchLength > matchEndIdx - matchIndex)
795*3117ece4Schristos                     matchEndIdx = matchIndex + (U32)matchLength;
796*3117ece4Schristos                 bestLength = matchLength;
797*3117ece4Schristos                 matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
798*3117ece4Schristos                 matches[mnum].len = (U32)matchLength;
799*3117ece4Schristos                 mnum++;
800*3117ece4Schristos                 if ( (matchLength > ZSTD_OPT_NUM)
801*3117ece4Schristos                    | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
802*3117ece4Schristos                     break;   /* drop, to guarantee consistency (miss a little bit of compression) */
803*3117ece4Schristos             }   }
804*3117ece4Schristos 
805*3117ece4Schristos             if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
806*3117ece4Schristos             if (match[matchLength] < ip[matchLength]) {
807*3117ece4Schristos                 commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
808*3117ece4Schristos                 dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
809*3117ece4Schristos             } else {
810*3117ece4Schristos                 /* match is larger than current */
811*3117ece4Schristos                 commonLengthLarger = matchLength;
812*3117ece4Schristos                 dictMatchIndex = nextPtr[0];
813*3117ece4Schristos     }   }   }  /* if (dictMode == ZSTD_dictMatchState) */
814*3117ece4Schristos 
815*3117ece4Schristos     assert(matchEndIdx > curr+8);
816*3117ece4Schristos     ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
817*3117ece4Schristos     return mnum;
818*3117ece4Schristos }
819*3117ece4Schristos 
820*3117ece4Schristos typedef U32 (*ZSTD_getAllMatchesFn)(
821*3117ece4Schristos     ZSTD_match_t*,
822*3117ece4Schristos     ZSTD_matchState_t*,
823*3117ece4Schristos     U32*,
824*3117ece4Schristos     const BYTE*,
825*3117ece4Schristos     const BYTE*,
826*3117ece4Schristos     const U32 rep[ZSTD_REP_NUM],
827*3117ece4Schristos     U32 const ll0,
828*3117ece4Schristos     U32 const lengthToBeat);
829*3117ece4Schristos 
830*3117ece4Schristos FORCE_INLINE_TEMPLATE
831*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
832*3117ece4Schristos U32 ZSTD_btGetAllMatches_internal(
833*3117ece4Schristos         ZSTD_match_t* matches,
834*3117ece4Schristos         ZSTD_matchState_t* ms,
835*3117ece4Schristos         U32* nextToUpdate3,
836*3117ece4Schristos         const BYTE* ip,
837*3117ece4Schristos         const BYTE* const iHighLimit,
838*3117ece4Schristos         const U32 rep[ZSTD_REP_NUM],
839*3117ece4Schristos         U32 const ll0,
840*3117ece4Schristos         U32 const lengthToBeat,
841*3117ece4Schristos         const ZSTD_dictMode_e dictMode,
842*3117ece4Schristos         const U32 mls)
843*3117ece4Schristos {
844*3117ece4Schristos     assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
845*3117ece4Schristos     DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
846*3117ece4Schristos     if (ip < ms->window.base + ms->nextToUpdate)
847*3117ece4Schristos         return 0;   /* skipped area */
848*3117ece4Schristos     ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
849*3117ece4Schristos     return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
850*3117ece4Schristos }
851*3117ece4Schristos 
852*3117ece4Schristos #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
853*3117ece4Schristos 
854*3117ece4Schristos #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls)            \
855*3117ece4Schristos     static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)(      \
856*3117ece4Schristos             ZSTD_match_t* matches,                             \
857*3117ece4Schristos             ZSTD_matchState_t* ms,                             \
858*3117ece4Schristos             U32* nextToUpdate3,                                \
859*3117ece4Schristos             const BYTE* ip,                                    \
860*3117ece4Schristos             const BYTE* const iHighLimit,                      \
861*3117ece4Schristos             const U32 rep[ZSTD_REP_NUM],                       \
862*3117ece4Schristos             U32 const ll0,                                     \
863*3117ece4Schristos             U32 const lengthToBeat)                            \
864*3117ece4Schristos     {                                                          \
865*3117ece4Schristos         return ZSTD_btGetAllMatches_internal(                  \
866*3117ece4Schristos                 matches, ms, nextToUpdate3, ip, iHighLimit,    \
867*3117ece4Schristos                 rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
868*3117ece4Schristos     }
869*3117ece4Schristos 
870*3117ece4Schristos #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode)  \
871*3117ece4Schristos     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3)  \
872*3117ece4Schristos     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4)  \
873*3117ece4Schristos     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5)  \
874*3117ece4Schristos     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
875*3117ece4Schristos 
876*3117ece4Schristos GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
877*3117ece4Schristos GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
878*3117ece4Schristos GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
879*3117ece4Schristos 
880*3117ece4Schristos #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode)  \
881*3117ece4Schristos     {                                            \
882*3117ece4Schristos         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
883*3117ece4Schristos         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
884*3117ece4Schristos         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
885*3117ece4Schristos         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6)  \
886*3117ece4Schristos     }
887*3117ece4Schristos 
888*3117ece4Schristos static ZSTD_getAllMatchesFn
889*3117ece4Schristos ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
890*3117ece4Schristos {
891*3117ece4Schristos     ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
892*3117ece4Schristos         ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
893*3117ece4Schristos         ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
894*3117ece4Schristos         ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
895*3117ece4Schristos     };
896*3117ece4Schristos     U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
897*3117ece4Schristos     assert((U32)dictMode < 3);
898*3117ece4Schristos     assert(mls - 3 < 4);
899*3117ece4Schristos     return getAllMatchesFns[(int)dictMode][mls - 3];
900*3117ece4Schristos }
901*3117ece4Schristos 
902*3117ece4Schristos /*************************
903*3117ece4Schristos *  LDM helper functions  *
904*3117ece4Schristos *************************/
905*3117ece4Schristos 
906*3117ece4Schristos /* Struct containing info needed to make decision about ldm inclusion */
907*3117ece4Schristos typedef struct {
908*3117ece4Schristos     rawSeqStore_t seqStore;   /* External match candidates store for this block */
909*3117ece4Schristos     U32 startPosInBlock;      /* Start position of the current match candidate */
910*3117ece4Schristos     U32 endPosInBlock;        /* End position of the current match candidate */
911*3117ece4Schristos     U32 offset;               /* Offset of the match candidate */
912*3117ece4Schristos } ZSTD_optLdm_t;
913*3117ece4Schristos 
914*3117ece4Schristos /* ZSTD_optLdm_skipRawSeqStoreBytes():
915*3117ece4Schristos  * Moves forward in @rawSeqStore by @nbBytes,
916*3117ece4Schristos  * which will update the fields 'pos' and 'posInSequence'.
917*3117ece4Schristos  */
918*3117ece4Schristos static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
919*3117ece4Schristos {
920*3117ece4Schristos     U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
921*3117ece4Schristos     while (currPos && rawSeqStore->pos < rawSeqStore->size) {
922*3117ece4Schristos         rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
923*3117ece4Schristos         if (currPos >= currSeq.litLength + currSeq.matchLength) {
924*3117ece4Schristos             currPos -= currSeq.litLength + currSeq.matchLength;
925*3117ece4Schristos             rawSeqStore->pos++;
926*3117ece4Schristos         } else {
927*3117ece4Schristos             rawSeqStore->posInSequence = currPos;
928*3117ece4Schristos             break;
929*3117ece4Schristos         }
930*3117ece4Schristos     }
931*3117ece4Schristos     if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
932*3117ece4Schristos         rawSeqStore->posInSequence = 0;
933*3117ece4Schristos     }
934*3117ece4Schristos }
935*3117ece4Schristos 
936*3117ece4Schristos /* ZSTD_opt_getNextMatchAndUpdateSeqStore():
937*3117ece4Schristos  * Calculates the beginning and end of the next match in the current block.
938*3117ece4Schristos  * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
939*3117ece4Schristos  */
940*3117ece4Schristos static void
941*3117ece4Schristos ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
942*3117ece4Schristos                                        U32 blockBytesRemaining)
943*3117ece4Schristos {
944*3117ece4Schristos     rawSeq currSeq;
945*3117ece4Schristos     U32 currBlockEndPos;
946*3117ece4Schristos     U32 literalsBytesRemaining;
947*3117ece4Schristos     U32 matchBytesRemaining;
948*3117ece4Schristos 
949*3117ece4Schristos     /* Setting match end position to MAX to ensure we never use an LDM during this block */
950*3117ece4Schristos     if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
951*3117ece4Schristos         optLdm->startPosInBlock = UINT_MAX;
952*3117ece4Schristos         optLdm->endPosInBlock = UINT_MAX;
953*3117ece4Schristos         return;
954*3117ece4Schristos     }
955*3117ece4Schristos     /* Calculate appropriate bytes left in matchLength and litLength
956*3117ece4Schristos      * after adjusting based on ldmSeqStore->posInSequence */
957*3117ece4Schristos     currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
958*3117ece4Schristos     assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
959*3117ece4Schristos     currBlockEndPos = currPosInBlock + blockBytesRemaining;
960*3117ece4Schristos     literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
961*3117ece4Schristos             currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
962*3117ece4Schristos             0;
963*3117ece4Schristos     matchBytesRemaining = (literalsBytesRemaining == 0) ?
964*3117ece4Schristos             currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
965*3117ece4Schristos             currSeq.matchLength;
966*3117ece4Schristos 
967*3117ece4Schristos     /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
968*3117ece4Schristos     if (literalsBytesRemaining >= blockBytesRemaining) {
969*3117ece4Schristos         optLdm->startPosInBlock = UINT_MAX;
970*3117ece4Schristos         optLdm->endPosInBlock = UINT_MAX;
971*3117ece4Schristos         ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
972*3117ece4Schristos         return;
973*3117ece4Schristos     }
974*3117ece4Schristos 
975*3117ece4Schristos     /* Matches may be < MINMATCH by this process. In that case, we will reject them
976*3117ece4Schristos        when we are deciding whether or not to add the ldm */
977*3117ece4Schristos     optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
978*3117ece4Schristos     optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
979*3117ece4Schristos     optLdm->offset = currSeq.offset;
980*3117ece4Schristos 
981*3117ece4Schristos     if (optLdm->endPosInBlock > currBlockEndPos) {
982*3117ece4Schristos         /* Match ends after the block ends, we can't use the whole match */
983*3117ece4Schristos         optLdm->endPosInBlock = currBlockEndPos;
984*3117ece4Schristos         ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
985*3117ece4Schristos     } else {
986*3117ece4Schristos         /* Consume nb of bytes equal to size of sequence left */
987*3117ece4Schristos         ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
988*3117ece4Schristos     }
989*3117ece4Schristos }
990*3117ece4Schristos 
991*3117ece4Schristos /* ZSTD_optLdm_maybeAddMatch():
992*3117ece4Schristos  * Adds a match if it's long enough,
993*3117ece4Schristos  * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
994*3117ece4Schristos  * into 'matches'. Maintains the correct ordering of 'matches'.
995*3117ece4Schristos  */
996*3117ece4Schristos static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
997*3117ece4Schristos                                       const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
998*3117ece4Schristos {
999*3117ece4Schristos     U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
1000*3117ece4Schristos     /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
1001*3117ece4Schristos     U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
1002*3117ece4Schristos 
1003*3117ece4Schristos     /* Ensure that current block position is not outside of the match */
1004*3117ece4Schristos     if (currPosInBlock < optLdm->startPosInBlock
1005*3117ece4Schristos       || currPosInBlock >= optLdm->endPosInBlock
1006*3117ece4Schristos       || candidateMatchLength < MINMATCH) {
1007*3117ece4Schristos         return;
1008*3117ece4Schristos     }
1009*3117ece4Schristos 
1010*3117ece4Schristos     if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
1011*3117ece4Schristos         U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
1012*3117ece4Schristos         DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
1013*3117ece4Schristos                  candidateOffBase, candidateMatchLength, currPosInBlock);
1014*3117ece4Schristos         matches[*nbMatches].len = candidateMatchLength;
1015*3117ece4Schristos         matches[*nbMatches].off = candidateOffBase;
1016*3117ece4Schristos         (*nbMatches)++;
1017*3117ece4Schristos     }
1018*3117ece4Schristos }
1019*3117ece4Schristos 
1020*3117ece4Schristos /* ZSTD_optLdm_processMatchCandidate():
1021*3117ece4Schristos  * Wrapper function to update ldm seq store and call ldm functions as necessary.
1022*3117ece4Schristos  */
1023*3117ece4Schristos static void
1024*3117ece4Schristos ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
1025*3117ece4Schristos                                   ZSTD_match_t* matches, U32* nbMatches,
1026*3117ece4Schristos                                   U32 currPosInBlock, U32 remainingBytes)
1027*3117ece4Schristos {
1028*3117ece4Schristos     if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
1029*3117ece4Schristos         return;
1030*3117ece4Schristos     }
1031*3117ece4Schristos 
1032*3117ece4Schristos     if (currPosInBlock >= optLdm->endPosInBlock) {
1033*3117ece4Schristos         if (currPosInBlock > optLdm->endPosInBlock) {
1034*3117ece4Schristos             /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
1035*3117ece4Schristos              * at the end of a match from the ldm seq store, and will often be some bytes
1036*3117ece4Schristos              * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1037*3117ece4Schristos              */
1038*3117ece4Schristos             U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1039*3117ece4Schristos             ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
1040*3117ece4Schristos         }
1041*3117ece4Schristos         ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1042*3117ece4Schristos     }
1043*3117ece4Schristos     ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
1044*3117ece4Schristos }
1045*3117ece4Schristos 
1046*3117ece4Schristos 
1047*3117ece4Schristos /*-*******************************
1048*3117ece4Schristos *  Optimal parser
1049*3117ece4Schristos *********************************/
1050*3117ece4Schristos 
1051*3117ece4Schristos #if 0 /* debug */
1052*3117ece4Schristos 
1053*3117ece4Schristos static void
1054*3117ece4Schristos listStats(const U32* table, int lastEltID)
1055*3117ece4Schristos {
1056*3117ece4Schristos     int const nbElts = lastEltID + 1;
1057*3117ece4Schristos     int enb;
1058*3117ece4Schristos     for (enb=0; enb < nbElts; enb++) {
1059*3117ece4Schristos         (void)table;
1060*3117ece4Schristos         /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
1061*3117ece4Schristos         RAWLOG(2, "%4i,", table[enb]);
1062*3117ece4Schristos     }
1063*3117ece4Schristos     RAWLOG(2, " \n");
1064*3117ece4Schristos }
1065*3117ece4Schristos 
1066*3117ece4Schristos #endif
1067*3117ece4Schristos 
1068*3117ece4Schristos #define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
1069*3117ece4Schristos #define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
1070*3117ece4Schristos #define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
1071*3117ece4Schristos 
1072*3117ece4Schristos FORCE_INLINE_TEMPLATE
1073*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1074*3117ece4Schristos size_t
1075*3117ece4Schristos ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1076*3117ece4Schristos                                seqStore_t* seqStore,
1077*3117ece4Schristos                                U32 rep[ZSTD_REP_NUM],
1078*3117ece4Schristos                          const void* src, size_t srcSize,
1079*3117ece4Schristos                          const int optLevel,
1080*3117ece4Schristos                          const ZSTD_dictMode_e dictMode)
1081*3117ece4Schristos {
1082*3117ece4Schristos     optState_t* const optStatePtr = &ms->opt;
1083*3117ece4Schristos     const BYTE* const istart = (const BYTE*)src;
1084*3117ece4Schristos     const BYTE* ip = istart;
1085*3117ece4Schristos     const BYTE* anchor = istart;
1086*3117ece4Schristos     const BYTE* const iend = istart + srcSize;
1087*3117ece4Schristos     const BYTE* const ilimit = iend - 8;
1088*3117ece4Schristos     const BYTE* const base = ms->window.base;
1089*3117ece4Schristos     const BYTE* const prefixStart = base + ms->window.dictLimit;
1090*3117ece4Schristos     const ZSTD_compressionParameters* const cParams = &ms->cParams;
1091*3117ece4Schristos 
1092*3117ece4Schristos     ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1093*3117ece4Schristos 
1094*3117ece4Schristos     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1095*3117ece4Schristos     U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1096*3117ece4Schristos     U32 nextToUpdate3 = ms->nextToUpdate;
1097*3117ece4Schristos 
1098*3117ece4Schristos     ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1099*3117ece4Schristos     ZSTD_match_t* const matches = optStatePtr->matchTable;
1100*3117ece4Schristos     ZSTD_optimal_t lastStretch;
1101*3117ece4Schristos     ZSTD_optLdm_t optLdm;
1102*3117ece4Schristos 
1103*3117ece4Schristos     ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
1104*3117ece4Schristos 
1105*3117ece4Schristos     optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1106*3117ece4Schristos     optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1107*3117ece4Schristos     ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1108*3117ece4Schristos 
1109*3117ece4Schristos     /* init */
1110*3117ece4Schristos     DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1111*3117ece4Schristos                 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1112*3117ece4Schristos     assert(optLevel <= 2);
1113*3117ece4Schristos     ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1114*3117ece4Schristos     ip += (ip==prefixStart);
1115*3117ece4Schristos 
1116*3117ece4Schristos     /* Match Loop */
1117*3117ece4Schristos     while (ip < ilimit) {
1118*3117ece4Schristos         U32 cur, last_pos = 0;
1119*3117ece4Schristos 
1120*3117ece4Schristos         /* find first match */
1121*3117ece4Schristos         {   U32 const litlen = (U32)(ip - anchor);
1122*3117ece4Schristos             U32 const ll0 = !litlen;
1123*3117ece4Schristos             U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1124*3117ece4Schristos             ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1125*3117ece4Schristos                                               (U32)(ip-istart), (U32)(iend-ip));
1126*3117ece4Schristos             if (!nbMatches) {
1127*3117ece4Schristos                 DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
1128*3117ece4Schristos                 ip++;
1129*3117ece4Schristos                 continue;
1130*3117ece4Schristos             }
1131*3117ece4Schristos 
1132*3117ece4Schristos             /* Match found: let's store this solution, and eventually find more candidates.
1133*3117ece4Schristos              * During this forward pass, @opt is used to store stretches,
1134*3117ece4Schristos              * defined as "a match followed by N literals".
1135*3117ece4Schristos              * Note how this is different from a Sequence, which is "N literals followed by a match".
1136*3117ece4Schristos              * Storing stretches allows us to store different match predecessors
1137*3117ece4Schristos              * for each literal position part of a literals run. */
1138*3117ece4Schristos 
1139*3117ece4Schristos             /* initialize opt[0] */
1140*3117ece4Schristos             opt[0].mlen = 0;  /* there are only literals so far */
1141*3117ece4Schristos             opt[0].litlen = litlen;
1142*3117ece4Schristos             /* No need to include the actual price of the literals before the first match
1143*3117ece4Schristos              * because it is static for the duration of the forward pass, and is included
1144*3117ece4Schristos              * in every subsequent price. But, we include the literal length because
1145*3117ece4Schristos              * the cost variation of litlen depends on the value of litlen.
1146*3117ece4Schristos              */
1147*3117ece4Schristos             opt[0].price = LL_PRICE(litlen);
1148*3117ece4Schristos             ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
1149*3117ece4Schristos             ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
1150*3117ece4Schristos 
1151*3117ece4Schristos             /* large match -> immediate encoding */
1152*3117ece4Schristos             {   U32 const maxML = matches[nbMatches-1].len;
1153*3117ece4Schristos                 U32 const maxOffBase = matches[nbMatches-1].off;
1154*3117ece4Schristos                 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
1155*3117ece4Schristos                             nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
1156*3117ece4Schristos 
1157*3117ece4Schristos                 if (maxML > sufficient_len) {
1158*3117ece4Schristos                     lastStretch.litlen = 0;
1159*3117ece4Schristos                     lastStretch.mlen = maxML;
1160*3117ece4Schristos                     lastStretch.off = maxOffBase;
1161*3117ece4Schristos                     DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
1162*3117ece4Schristos                                 maxML, sufficient_len);
1163*3117ece4Schristos                     cur = 0;
1164*3117ece4Schristos                     last_pos = maxML;
1165*3117ece4Schristos                     goto _shortestPath;
1166*3117ece4Schristos             }   }
1167*3117ece4Schristos 
1168*3117ece4Schristos             /* set prices for first matches starting position == 0 */
1169*3117ece4Schristos             assert(opt[0].price >= 0);
1170*3117ece4Schristos             {   U32 pos;
1171*3117ece4Schristos                 U32 matchNb;
1172*3117ece4Schristos                 for (pos = 1; pos < minMatch; pos++) {
1173*3117ece4Schristos                     opt[pos].price = ZSTD_MAX_PRICE;
1174*3117ece4Schristos                     opt[pos].mlen = 0;
1175*3117ece4Schristos                     opt[pos].litlen = litlen + pos;
1176*3117ece4Schristos                 }
1177*3117ece4Schristos                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1178*3117ece4Schristos                     U32 const offBase = matches[matchNb].off;
1179*3117ece4Schristos                     U32 const end = matches[matchNb].len;
1180*3117ece4Schristos                     for ( ; pos <= end ; pos++ ) {
1181*3117ece4Schristos                         int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
1182*3117ece4Schristos                         int const sequencePrice = opt[0].price + matchPrice;
1183*3117ece4Schristos                         DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1184*3117ece4Schristos                                     pos, ZSTD_fCost(sequencePrice));
1185*3117ece4Schristos                         opt[pos].mlen = pos;
1186*3117ece4Schristos                         opt[pos].off = offBase;
1187*3117ece4Schristos                         opt[pos].litlen = 0; /* end of match */
1188*3117ece4Schristos                         opt[pos].price = sequencePrice + LL_PRICE(0);
1189*3117ece4Schristos                     }
1190*3117ece4Schristos                 }
1191*3117ece4Schristos                 last_pos = pos-1;
1192*3117ece4Schristos                 opt[pos].price = ZSTD_MAX_PRICE;
1193*3117ece4Schristos             }
1194*3117ece4Schristos         }
1195*3117ece4Schristos 
1196*3117ece4Schristos         /* check further positions */
1197*3117ece4Schristos         for (cur = 1; cur <= last_pos; cur++) {
1198*3117ece4Schristos             const BYTE* const inr = ip + cur;
1199*3117ece4Schristos             assert(cur <= ZSTD_OPT_NUM);
1200*3117ece4Schristos             DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
1201*3117ece4Schristos 
1202*3117ece4Schristos             /* Fix current position with one literal if cheaper */
1203*3117ece4Schristos             {   U32 const litlen = opt[cur-1].litlen + 1;
1204*3117ece4Schristos                 int const price = opt[cur-1].price
1205*3117ece4Schristos                                 + LIT_PRICE(ip+cur-1)
1206*3117ece4Schristos                                 + LL_INCPRICE(litlen);
1207*3117ece4Schristos                 assert(price < 1000000000); /* overflow check */
1208*3117ece4Schristos                 if (price <= opt[cur].price) {
1209*3117ece4Schristos                     ZSTD_optimal_t const prevMatch = opt[cur];
1210*3117ece4Schristos                     DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1211*3117ece4Schristos                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1212*3117ece4Schristos                                 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1213*3117ece4Schristos                     opt[cur] = opt[cur-1];
1214*3117ece4Schristos                     opt[cur].litlen = litlen;
1215*3117ece4Schristos                     opt[cur].price = price;
1216*3117ece4Schristos                     if ( (optLevel >= 1) /* additional check only for higher modes */
1217*3117ece4Schristos                       && (prevMatch.litlen == 0) /* replace a match */
1218*3117ece4Schristos                       && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
1219*3117ece4Schristos                       && LIKELY(ip + cur < iend)
1220*3117ece4Schristos                     ) {
1221*3117ece4Schristos                         /* check next position, in case it would be cheaper */
1222*3117ece4Schristos                         int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
1223*3117ece4Schristos                         int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
1224*3117ece4Schristos                         DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
1225*3117ece4Schristos                                 cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
1226*3117ece4Schristos                         if ( (with1literal < withMoreLiterals)
1227*3117ece4Schristos                           && (with1literal < opt[cur+1].price) ) {
1228*3117ece4Schristos                             /* update offset history - before it disappears */
1229*3117ece4Schristos                             U32 const prev = cur - prevMatch.mlen;
1230*3117ece4Schristos                             repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
1231*3117ece4Schristos                             assert(cur >= prevMatch.mlen);
1232*3117ece4Schristos                             DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
1233*3117ece4Schristos                                         ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
1234*3117ece4Schristos                                         newReps.rep[0], newReps.rep[1], newReps.rep[2] );
1235*3117ece4Schristos                             opt[cur+1] = prevMatch;  /* mlen & offbase */
1236*3117ece4Schristos                             ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t));
1237*3117ece4Schristos                             opt[cur+1].litlen = 1;
1238*3117ece4Schristos                             opt[cur+1].price = with1literal;
1239*3117ece4Schristos                             if (last_pos < cur+1) last_pos = cur+1;
1240*3117ece4Schristos                         }
1241*3117ece4Schristos                     }
1242*3117ece4Schristos                 } else {
1243*3117ece4Schristos                     DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)",
1244*3117ece4Schristos                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
1245*3117ece4Schristos                 }
1246*3117ece4Schristos             }
1247*3117ece4Schristos 
1248*3117ece4Schristos             /* Offset history is not updated during match comparison.
1249*3117ece4Schristos              * Do it here, now that the match is selected and confirmed.
1250*3117ece4Schristos              */
1251*3117ece4Schristos             ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1252*3117ece4Schristos             assert(cur >= opt[cur].mlen);
1253*3117ece4Schristos             if (opt[cur].litlen == 0) {
1254*3117ece4Schristos                 /* just finished a match => alter offset history */
1255*3117ece4Schristos                 U32 const prev = cur - opt[cur].mlen;
1256*3117ece4Schristos                 repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
1257*3117ece4Schristos                 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1258*3117ece4Schristos             }
1259*3117ece4Schristos 
1260*3117ece4Schristos             /* last match must start at a minimum distance of 8 from oend */
1261*3117ece4Schristos             if (inr > ilimit) continue;
1262*3117ece4Schristos 
1263*3117ece4Schristos             if (cur == last_pos) break;
1264*3117ece4Schristos 
1265*3117ece4Schristos             if ( (optLevel==0) /*static_test*/
1266*3117ece4Schristos               && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1267*3117ece4Schristos                 DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
1268*3117ece4Schristos                 continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1269*3117ece4Schristos             }
1270*3117ece4Schristos 
1271*3117ece4Schristos             assert(opt[cur].price >= 0);
1272*3117ece4Schristos             {   U32 const ll0 = (opt[cur].litlen == 0);
1273*3117ece4Schristos                 int const previousPrice = opt[cur].price;
1274*3117ece4Schristos                 int const basePrice = previousPrice + LL_PRICE(0);
1275*3117ece4Schristos                 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1276*3117ece4Schristos                 U32 matchNb;
1277*3117ece4Schristos 
1278*3117ece4Schristos                 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1279*3117ece4Schristos                                                   (U32)(inr-istart), (U32)(iend-inr));
1280*3117ece4Schristos 
1281*3117ece4Schristos                 if (!nbMatches) {
1282*3117ece4Schristos                     DEBUGLOG(7, "rPos:%u : no match found", cur);
1283*3117ece4Schristos                     continue;
1284*3117ece4Schristos                 }
1285*3117ece4Schristos 
1286*3117ece4Schristos                 {   U32 const longestML = matches[nbMatches-1].len;
1287*3117ece4Schristos                     DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u",
1288*3117ece4Schristos                                 inr-istart, cur, nbMatches, longestML);
1289*3117ece4Schristos 
1290*3117ece4Schristos                     if ( (longestML > sufficient_len)
1291*3117ece4Schristos                       || (cur + longestML >= ZSTD_OPT_NUM)
1292*3117ece4Schristos                       || (ip + cur + longestML >= iend) ) {
1293*3117ece4Schristos                         lastStretch.mlen = longestML;
1294*3117ece4Schristos                         lastStretch.off = matches[nbMatches-1].off;
1295*3117ece4Schristos                         lastStretch.litlen = 0;
1296*3117ece4Schristos                         last_pos = cur + longestML;
1297*3117ece4Schristos                         goto _shortestPath;
1298*3117ece4Schristos                 }   }
1299*3117ece4Schristos 
1300*3117ece4Schristos                 /* set prices using matches found at position == cur */
1301*3117ece4Schristos                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1302*3117ece4Schristos                     U32 const offset = matches[matchNb].off;
1303*3117ece4Schristos                     U32 const lastML = matches[matchNb].len;
1304*3117ece4Schristos                     U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1305*3117ece4Schristos                     U32 mlen;
1306*3117ece4Schristos 
1307*3117ece4Schristos                     DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
1308*3117ece4Schristos                                 matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
1309*3117ece4Schristos 
1310*3117ece4Schristos                     for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
1311*3117ece4Schristos                         U32 const pos = cur + mlen;
1312*3117ece4Schristos                         int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1313*3117ece4Schristos 
1314*3117ece4Schristos                         if ((pos > last_pos) || (price < opt[pos].price)) {
1315*3117ece4Schristos                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1316*3117ece4Schristos                                         pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1317*3117ece4Schristos                             while (last_pos < pos) {
1318*3117ece4Schristos                                 /* fill empty positions, for future comparisons */
1319*3117ece4Schristos                                 last_pos++;
1320*3117ece4Schristos                                 opt[last_pos].price = ZSTD_MAX_PRICE;
1321*3117ece4Schristos                                 opt[last_pos].litlen = !0;  /* just needs to be != 0, to mean "not an end of match" */
1322*3117ece4Schristos                             }
1323*3117ece4Schristos                             opt[pos].mlen = mlen;
1324*3117ece4Schristos                             opt[pos].off = offset;
1325*3117ece4Schristos                             opt[pos].litlen = 0;
1326*3117ece4Schristos                             opt[pos].price = price;
1327*3117ece4Schristos                         } else {
1328*3117ece4Schristos                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1329*3117ece4Schristos                                         pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1330*3117ece4Schristos                             if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1331*3117ece4Schristos                         }
1332*3117ece4Schristos             }   }   }
1333*3117ece4Schristos             opt[last_pos+1].price = ZSTD_MAX_PRICE;
1334*3117ece4Schristos         }  /* for (cur = 1; cur <= last_pos; cur++) */
1335*3117ece4Schristos 
1336*3117ece4Schristos         lastStretch = opt[last_pos];
1337*3117ece4Schristos         assert(cur >= lastStretch.mlen);
1338*3117ece4Schristos         cur = last_pos - lastStretch.mlen;
1339*3117ece4Schristos 
1340*3117ece4Schristos _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
1341*3117ece4Schristos         assert(opt[0].mlen == 0);
1342*3117ece4Schristos         assert(last_pos >= lastStretch.mlen);
1343*3117ece4Schristos         assert(cur == last_pos - lastStretch.mlen);
1344*3117ece4Schristos 
1345*3117ece4Schristos         if (lastStretch.mlen==0) {
1346*3117ece4Schristos             /* no solution : all matches have been converted into literals */
1347*3117ece4Schristos             assert(lastStretch.litlen == (ip - anchor) + last_pos);
1348*3117ece4Schristos             ip += last_pos;
1349*3117ece4Schristos             continue;
1350*3117ece4Schristos         }
1351*3117ece4Schristos         assert(lastStretch.off > 0);
1352*3117ece4Schristos 
1353*3117ece4Schristos         /* Update offset history */
1354*3117ece4Schristos         if (lastStretch.litlen == 0) {
1355*3117ece4Schristos             /* finishing on a match : update offset history */
1356*3117ece4Schristos             repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
1357*3117ece4Schristos             ZSTD_memcpy(rep, &reps, sizeof(repcodes_t));
1358*3117ece4Schristos         } else {
1359*3117ece4Schristos             ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t));
1360*3117ece4Schristos             assert(cur >= lastStretch.litlen);
1361*3117ece4Schristos             cur -= lastStretch.litlen;
1362*3117ece4Schristos         }
1363*3117ece4Schristos 
1364*3117ece4Schristos         /* Let's write the shortest path solution.
1365*3117ece4Schristos          * It is stored in @opt in reverse order,
1366*3117ece4Schristos          * starting from @storeEnd (==cur+2),
1367*3117ece4Schristos          * effectively partially @opt overwriting.
1368*3117ece4Schristos          * Content is changed too:
1369*3117ece4Schristos          * - So far, @opt stored stretches, aka a match followed by literals
1370*3117ece4Schristos          * - Now, it will store sequences, aka literals followed by a match
1371*3117ece4Schristos          */
1372*3117ece4Schristos         {   U32 const storeEnd = cur + 2;
1373*3117ece4Schristos             U32 storeStart = storeEnd;
1374*3117ece4Schristos             U32 stretchPos = cur;
1375*3117ece4Schristos 
1376*3117ece4Schristos             DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1377*3117ece4Schristos                         last_pos, cur); (void)last_pos;
1378*3117ece4Schristos             assert(storeEnd < ZSTD_OPT_SIZE);
1379*3117ece4Schristos             DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1380*3117ece4Schristos                         storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
1381*3117ece4Schristos             if (lastStretch.litlen > 0) {
1382*3117ece4Schristos                 /* last "sequence" is unfinished: just a bunch of literals */
1383*3117ece4Schristos                 opt[storeEnd].litlen = lastStretch.litlen;
1384*3117ece4Schristos                 opt[storeEnd].mlen = 0;
1385*3117ece4Schristos                 storeStart = storeEnd-1;
1386*3117ece4Schristos                 opt[storeStart] = lastStretch;
1387*3117ece4Schristos             } {
1388*3117ece4Schristos                 opt[storeEnd] = lastStretch;  /* note: litlen will be fixed */
1389*3117ece4Schristos                 storeStart = storeEnd;
1390*3117ece4Schristos             }
1391*3117ece4Schristos             while (1) {
1392*3117ece4Schristos                 ZSTD_optimal_t nextStretch = opt[stretchPos];
1393*3117ece4Schristos                 opt[storeStart].litlen = nextStretch.litlen;
1394*3117ece4Schristos                 DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
1395*3117ece4Schristos                             opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
1396*3117ece4Schristos                 if (nextStretch.mlen == 0) {
1397*3117ece4Schristos                     /* reaching beginning of segment */
1398*3117ece4Schristos                     break;
1399*3117ece4Schristos                 }
1400*3117ece4Schristos                 storeStart--;
1401*3117ece4Schristos                 opt[storeStart] = nextStretch; /* note: litlen will be fixed */
1402*3117ece4Schristos                 assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
1403*3117ece4Schristos                 stretchPos -= nextStretch.litlen + nextStretch.mlen;
1404*3117ece4Schristos             }
1405*3117ece4Schristos 
1406*3117ece4Schristos             /* save sequences */
1407*3117ece4Schristos             DEBUGLOG(6, "sending selected sequences into seqStore");
1408*3117ece4Schristos             {   U32 storePos;
1409*3117ece4Schristos                 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1410*3117ece4Schristos                     U32 const llen = opt[storePos].litlen;
1411*3117ece4Schristos                     U32 const mlen = opt[storePos].mlen;
1412*3117ece4Schristos                     U32 const offBase = opt[storePos].off;
1413*3117ece4Schristos                     U32 const advance = llen + mlen;
1414*3117ece4Schristos                     DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1415*3117ece4Schristos                                 anchor - istart, (unsigned)llen, (unsigned)mlen);
1416*3117ece4Schristos 
1417*3117ece4Schristos                     if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
1418*3117ece4Schristos                         assert(storePos == storeEnd);   /* must be last sequence */
1419*3117ece4Schristos                         ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
1420*3117ece4Schristos                         continue;   /* will finish */
1421*3117ece4Schristos                     }
1422*3117ece4Schristos 
1423*3117ece4Schristos                     assert(anchor + llen <= iend);
1424*3117ece4Schristos                     ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
1425*3117ece4Schristos                     ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
1426*3117ece4Schristos                     anchor += advance;
1427*3117ece4Schristos                     ip = anchor;
1428*3117ece4Schristos             }   }
1429*3117ece4Schristos             DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
1430*3117ece4Schristos 
1431*3117ece4Schristos             /* update all costs */
1432*3117ece4Schristos             ZSTD_setBasePrices(optStatePtr, optLevel);
1433*3117ece4Schristos         }
1434*3117ece4Schristos     }   /* while (ip < ilimit) */
1435*3117ece4Schristos 
1436*3117ece4Schristos     /* Return the last literals size */
1437*3117ece4Schristos     return (size_t)(iend - anchor);
1438*3117ece4Schristos }
1439*3117ece4Schristos #endif /* build exclusions */
1440*3117ece4Schristos 
1441*3117ece4Schristos #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1442*3117ece4Schristos static size_t ZSTD_compressBlock_opt0(
1443*3117ece4Schristos         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1444*3117ece4Schristos         const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1445*3117ece4Schristos {
1446*3117ece4Schristos     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1447*3117ece4Schristos }
1448*3117ece4Schristos #endif
1449*3117ece4Schristos 
1450*3117ece4Schristos #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1451*3117ece4Schristos static size_t ZSTD_compressBlock_opt2(
1452*3117ece4Schristos         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1453*3117ece4Schristos         const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1454*3117ece4Schristos {
1455*3117ece4Schristos     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1456*3117ece4Schristos }
1457*3117ece4Schristos #endif
1458*3117ece4Schristos 
1459*3117ece4Schristos #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1460*3117ece4Schristos size_t ZSTD_compressBlock_btopt(
1461*3117ece4Schristos         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1462*3117ece4Schristos         const void* src, size_t srcSize)
1463*3117ece4Schristos {
1464*3117ece4Schristos     DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1465*3117ece4Schristos     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1466*3117ece4Schristos }
1467*3117ece4Schristos #endif
1468*3117ece4Schristos 
1469*3117ece4Schristos 
1470*3117ece4Schristos 
1471*3117ece4Schristos 
1472*3117ece4Schristos #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1473*3117ece4Schristos /* ZSTD_initStats_ultra():
1474*3117ece4Schristos  * make a first compression pass, just to seed stats with more accurate starting values.
1475*3117ece4Schristos  * only works on first block, with no dictionary and no ldm.
1476*3117ece4Schristos  * this function cannot error out, its narrow contract must be respected.
1477*3117ece4Schristos  */
1478*3117ece4Schristos static
1479*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1480*3117ece4Schristos void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1481*3117ece4Schristos                           seqStore_t* seqStore,
1482*3117ece4Schristos                           U32 rep[ZSTD_REP_NUM],
1483*3117ece4Schristos                     const void* src, size_t srcSize)
1484*3117ece4Schristos {
1485*3117ece4Schristos     U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
1486*3117ece4Schristos     ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1487*3117ece4Schristos 
1488*3117ece4Schristos     DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1489*3117ece4Schristos     assert(ms->opt.litLengthSum == 0);    /* first block */
1490*3117ece4Schristos     assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
1491*3117ece4Schristos     assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
1492*3117ece4Schristos     assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
1493*3117ece4Schristos 
1494*3117ece4Schristos     ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict);   /* generate stats into ms->opt*/
1495*3117ece4Schristos 
1496*3117ece4Schristos     /* invalidate first scan from history, only keep entropy stats */
1497*3117ece4Schristos     ZSTD_resetSeqStore(seqStore);
1498*3117ece4Schristos     ms->window.base -= srcSize;
1499*3117ece4Schristos     ms->window.dictLimit += (U32)srcSize;
1500*3117ece4Schristos     ms->window.lowLimit = ms->window.dictLimit;
1501*3117ece4Schristos     ms->nextToUpdate = ms->window.dictLimit;
1502*3117ece4Schristos 
1503*3117ece4Schristos }
1504*3117ece4Schristos 
1505*3117ece4Schristos size_t ZSTD_compressBlock_btultra(
1506*3117ece4Schristos         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1507*3117ece4Schristos         const void* src, size_t srcSize)
1508*3117ece4Schristos {
1509*3117ece4Schristos     DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1510*3117ece4Schristos     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1511*3117ece4Schristos }
1512*3117ece4Schristos 
1513*3117ece4Schristos size_t ZSTD_compressBlock_btultra2(
1514*3117ece4Schristos         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1515*3117ece4Schristos         const void* src, size_t srcSize)
1516*3117ece4Schristos {
1517*3117ece4Schristos     U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1518*3117ece4Schristos     DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1519*3117ece4Schristos 
1520*3117ece4Schristos     /* 2-passes strategy:
1521*3117ece4Schristos      * this strategy makes a first pass over first block to collect statistics
1522*3117ece4Schristos      * in order to seed next round's statistics with it.
1523*3117ece4Schristos      * After 1st pass, function forgets history, and starts a new block.
1524*3117ece4Schristos      * Consequently, this can only work if no data has been previously loaded in tables,
1525*3117ece4Schristos      * aka, no dictionary, no prefix, no ldm preprocessing.
1526*3117ece4Schristos      * The compression ratio gain is generally small (~0.5% on first block),
1527*3117ece4Schristos      * the cost is 2x cpu time on first block. */
1528*3117ece4Schristos     assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1529*3117ece4Schristos     if ( (ms->opt.litLengthSum==0)   /* first block */
1530*3117ece4Schristos       && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
1531*3117ece4Schristos       && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
1532*3117ece4Schristos       && (curr == ms->window.dictLimit)    /* start of frame, nothing already loaded nor skipped */
1533*3117ece4Schristos       && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
1534*3117ece4Schristos       ) {
1535*3117ece4Schristos         ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1536*3117ece4Schristos     }
1537*3117ece4Schristos 
1538*3117ece4Schristos     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1539*3117ece4Schristos }
1540*3117ece4Schristos #endif
1541*3117ece4Schristos 
1542*3117ece4Schristos #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1543*3117ece4Schristos size_t ZSTD_compressBlock_btopt_dictMatchState(
1544*3117ece4Schristos         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1545*3117ece4Schristos         const void* src, size_t srcSize)
1546*3117ece4Schristos {
1547*3117ece4Schristos     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1548*3117ece4Schristos }
1549*3117ece4Schristos 
1550*3117ece4Schristos size_t ZSTD_compressBlock_btopt_extDict(
1551*3117ece4Schristos         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1552*3117ece4Schristos         const void* src, size_t srcSize)
1553*3117ece4Schristos {
1554*3117ece4Schristos     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1555*3117ece4Schristos }
1556*3117ece4Schristos #endif
1557*3117ece4Schristos 
1558*3117ece4Schristos #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1559*3117ece4Schristos size_t ZSTD_compressBlock_btultra_dictMatchState(
1560*3117ece4Schristos         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1561*3117ece4Schristos         const void* src, size_t srcSize)
1562*3117ece4Schristos {
1563*3117ece4Schristos     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1564*3117ece4Schristos }
1565*3117ece4Schristos 
1566*3117ece4Schristos size_t ZSTD_compressBlock_btultra_extDict(
1567*3117ece4Schristos         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1568*3117ece4Schristos         const void* src, size_t srcSize)
1569*3117ece4Schristos {
1570*3117ece4Schristos     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1571*3117ece4Schristos }
1572*3117ece4Schristos #endif
1573*3117ece4Schristos 
1574*3117ece4Schristos /* note : no btultra2 variant for extDict nor dictMatchState,
1575*3117ece4Schristos  * because btultra2 is not meant to work with dictionaries
1576*3117ece4Schristos  * and is only specific for the first block (no prefix) */
1577