xref: /freebsd-src/sys/contrib/openzfs/module/zstd/lib/compress/zstd_lazy.c (revision c9539b89010900499a200cdd6c0265ea5d950875)
1c03c5b1cSMartin Matuska /*
2c03c5b1cSMartin Matuska  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3c03c5b1cSMartin Matuska  * All rights reserved.
4c03c5b1cSMartin Matuska  *
5c03c5b1cSMartin Matuska  * This source code is licensed under both the BSD-style license (found in the
6c03c5b1cSMartin Matuska  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7c03c5b1cSMartin Matuska  * in the COPYING file in the root directory of this source tree).
8c03c5b1cSMartin Matuska  * You may select, at your option, one of the above-listed licenses.
9c03c5b1cSMartin Matuska  */
10c03c5b1cSMartin Matuska 
11c03c5b1cSMartin Matuska #include "zstd_compress_internal.h"
12c03c5b1cSMartin Matuska #include "zstd_lazy.h"
13c03c5b1cSMartin Matuska 
14c03c5b1cSMartin Matuska 
15c03c5b1cSMartin Matuska /*-*************************************
16c03c5b1cSMartin Matuska *  Binary Tree search
17c03c5b1cSMartin Matuska ***************************************/
18c03c5b1cSMartin Matuska 
19c03c5b1cSMartin Matuska static void
ZSTD_updateDUBT(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * iend,U32 mls)20c03c5b1cSMartin Matuska ZSTD_updateDUBT(ZSTD_matchState_t* ms,
21c03c5b1cSMartin Matuska                 const BYTE* ip, const BYTE* iend,
22c03c5b1cSMartin Matuska                 U32 mls)
23c03c5b1cSMartin Matuska {
24c03c5b1cSMartin Matuska     const ZSTD_compressionParameters* const cParams = &ms->cParams;
25c03c5b1cSMartin Matuska     U32* const hashTable = ms->hashTable;
26c03c5b1cSMartin Matuska     U32  const hashLog = cParams->hashLog;
27c03c5b1cSMartin Matuska 
28c03c5b1cSMartin Matuska     U32* const bt = ms->chainTable;
29c03c5b1cSMartin Matuska     U32  const btLog  = cParams->chainLog - 1;
30c03c5b1cSMartin Matuska     U32  const btMask = (1 << btLog) - 1;
31c03c5b1cSMartin Matuska 
32c03c5b1cSMartin Matuska     const BYTE* const base = ms->window.base;
33c03c5b1cSMartin Matuska     U32 const target = (U32)(ip - base);
34c03c5b1cSMartin Matuska     U32 idx = ms->nextToUpdate;
35c03c5b1cSMartin Matuska 
36c03c5b1cSMartin Matuska     if (idx != target)
37c03c5b1cSMartin Matuska         DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
38c03c5b1cSMartin Matuska                     idx, target, ms->window.dictLimit);
39c03c5b1cSMartin Matuska     assert(ip + 8 <= iend);   /* condition for ZSTD_hashPtr */
40c03c5b1cSMartin Matuska     (void)iend;
41c03c5b1cSMartin Matuska 
42c03c5b1cSMartin Matuska     assert(idx >= ms->window.dictLimit);   /* condition for valid base+idx */
43c03c5b1cSMartin Matuska     for ( ; idx < target ; idx++) {
44c03c5b1cSMartin Matuska         size_t const h  = ZSTD_hashPtr(base + idx, hashLog, mls);   /* assumption : ip + 8 <= iend */
45c03c5b1cSMartin Matuska         U32    const matchIndex = hashTable[h];
46c03c5b1cSMartin Matuska 
47c03c5b1cSMartin Matuska         U32*   const nextCandidatePtr = bt + 2*(idx&btMask);
48c03c5b1cSMartin Matuska         U32*   const sortMarkPtr  = nextCandidatePtr + 1;
49c03c5b1cSMartin Matuska 
50c03c5b1cSMartin Matuska         DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
51c03c5b1cSMartin Matuska         hashTable[h] = idx;   /* Update Hash Table */
52c03c5b1cSMartin Matuska         *nextCandidatePtr = matchIndex;   /* update BT like a chain */
53c03c5b1cSMartin Matuska         *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
54c03c5b1cSMartin Matuska     }
55c03c5b1cSMartin Matuska     ms->nextToUpdate = target;
56c03c5b1cSMartin Matuska }
57c03c5b1cSMartin Matuska 
58c03c5b1cSMartin Matuska 
59c03c5b1cSMartin Matuska /** ZSTD_insertDUBT1() :
60c03c5b1cSMartin Matuska  *  sort one already inserted but unsorted position
61c03c5b1cSMartin Matuska  *  assumption : current >= btlow == (current - btmask)
62c03c5b1cSMartin Matuska  *  doesn't fail */
63c03c5b1cSMartin Matuska static void
ZSTD_insertDUBT1(ZSTD_matchState_t * ms,U32 current,const BYTE * inputEnd,U32 nbCompares,U32 btLow,const ZSTD_dictMode_e dictMode)64c03c5b1cSMartin Matuska ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
65c03c5b1cSMartin Matuska                  U32 current, const BYTE* inputEnd,
66c03c5b1cSMartin Matuska                  U32 nbCompares, U32 btLow,
67c03c5b1cSMartin Matuska                  const ZSTD_dictMode_e dictMode)
68c03c5b1cSMartin Matuska {
69c03c5b1cSMartin Matuska     const ZSTD_compressionParameters* const cParams = &ms->cParams;
70c03c5b1cSMartin Matuska     U32* const bt = ms->chainTable;
71c03c5b1cSMartin Matuska     U32  const btLog  = cParams->chainLog - 1;
72c03c5b1cSMartin Matuska     U32  const btMask = (1 << btLog) - 1;
73c03c5b1cSMartin Matuska     size_t commonLengthSmaller=0, commonLengthLarger=0;
74c03c5b1cSMartin Matuska     const BYTE* const base = ms->window.base;
75c03c5b1cSMartin Matuska     const BYTE* const dictBase = ms->window.dictBase;
76c03c5b1cSMartin Matuska     const U32 dictLimit = ms->window.dictLimit;
77c03c5b1cSMartin Matuska     const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
78c03c5b1cSMartin Matuska     const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
79c03c5b1cSMartin Matuska     const BYTE* const dictEnd = dictBase + dictLimit;
80c03c5b1cSMartin Matuska     const BYTE* const prefixStart = base + dictLimit;
81c03c5b1cSMartin Matuska     const BYTE* match;
82c03c5b1cSMartin Matuska     U32* smallerPtr = bt + 2*(current&btMask);
83c03c5b1cSMartin Matuska     U32* largerPtr  = smallerPtr + 1;
84c03c5b1cSMartin Matuska     U32 matchIndex = *smallerPtr;   /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
85c03c5b1cSMartin Matuska     U32 dummy32;   /* to be nullified at the end */
86c03c5b1cSMartin Matuska     U32 const windowValid = ms->window.lowLimit;
87c03c5b1cSMartin Matuska     U32 const maxDistance = 1U << cParams->windowLog;
88c03c5b1cSMartin Matuska     U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
89c03c5b1cSMartin Matuska 
90c03c5b1cSMartin Matuska 
91c03c5b1cSMartin Matuska     DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
92c03c5b1cSMartin Matuska                 current, dictLimit, windowLow);
93c03c5b1cSMartin Matuska     assert(current >= btLow);
94c03c5b1cSMartin Matuska     assert(ip < iend);   /* condition for ZSTD_count */
95c03c5b1cSMartin Matuska 
96c03c5b1cSMartin Matuska     while (nbCompares-- && (matchIndex > windowLow)) {
97c03c5b1cSMartin Matuska         U32* const nextPtr = bt + 2*(matchIndex & btMask);
98c03c5b1cSMartin Matuska         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
99c03c5b1cSMartin Matuska         assert(matchIndex < current);
100c03c5b1cSMartin Matuska         /* note : all candidates are now supposed sorted,
101c03c5b1cSMartin Matuska          * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
102c03c5b1cSMartin Matuska          * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
103c03c5b1cSMartin Matuska 
104c03c5b1cSMartin Matuska         if ( (dictMode != ZSTD_extDict)
105c03c5b1cSMartin Matuska           || (matchIndex+matchLength >= dictLimit)  /* both in current segment*/
106c03c5b1cSMartin Matuska           || (current < dictLimit) /* both in extDict */) {
107c03c5b1cSMartin Matuska             const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
108c03c5b1cSMartin Matuska                                      || (matchIndex+matchLength >= dictLimit)) ?
109c03c5b1cSMartin Matuska                                         base : dictBase;
110c03c5b1cSMartin Matuska             assert( (matchIndex+matchLength >= dictLimit)   /* might be wrong if extDict is incorrectly set to 0 */
111c03c5b1cSMartin Matuska                  || (current < dictLimit) );
112c03c5b1cSMartin Matuska             match = mBase + matchIndex;
113c03c5b1cSMartin Matuska             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
114c03c5b1cSMartin Matuska         } else {
115c03c5b1cSMartin Matuska             match = dictBase + matchIndex;
116c03c5b1cSMartin Matuska             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
117c03c5b1cSMartin Matuska             if (matchIndex+matchLength >= dictLimit)
118c03c5b1cSMartin Matuska                 match = base + matchIndex;   /* preparation for next read of match[matchLength] */
119c03c5b1cSMartin Matuska         }
120c03c5b1cSMartin Matuska 
121c03c5b1cSMartin Matuska         DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
122c03c5b1cSMartin Matuska                     current, matchIndex, (U32)matchLength);
123c03c5b1cSMartin Matuska 
124c03c5b1cSMartin Matuska         if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
125c03c5b1cSMartin Matuska             break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
126c03c5b1cSMartin Matuska         }
127c03c5b1cSMartin Matuska 
128c03c5b1cSMartin Matuska         if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
129c03c5b1cSMartin Matuska             /* match is smaller than current */
130c03c5b1cSMartin Matuska             *smallerPtr = matchIndex;             /* update smaller idx */
131c03c5b1cSMartin Matuska             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
132c03c5b1cSMartin Matuska             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
133c03c5b1cSMartin Matuska             DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
134c03c5b1cSMartin Matuska                         matchIndex, btLow, nextPtr[1]);
135c03c5b1cSMartin Matuska             smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
136c03c5b1cSMartin Matuska             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
137c03c5b1cSMartin Matuska         } else {
138c03c5b1cSMartin Matuska             /* match is larger than current */
139c03c5b1cSMartin Matuska             *largerPtr = matchIndex;
140c03c5b1cSMartin Matuska             commonLengthLarger = matchLength;
141c03c5b1cSMartin Matuska             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
142c03c5b1cSMartin Matuska             DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
143c03c5b1cSMartin Matuska                         matchIndex, btLow, nextPtr[0]);
144c03c5b1cSMartin Matuska             largerPtr = nextPtr;
145c03c5b1cSMartin Matuska             matchIndex = nextPtr[0];
146c03c5b1cSMartin Matuska     }   }
147c03c5b1cSMartin Matuska 
148c03c5b1cSMartin Matuska     *smallerPtr = *largerPtr = 0;
149c03c5b1cSMartin Matuska }
150c03c5b1cSMartin Matuska 
151c03c5b1cSMartin Matuska 
152c03c5b1cSMartin Matuska static size_t
ZSTD_DUBT_findBetterDictMatch(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iend,size_t * offsetPtr,size_t bestLength,U32 nbCompares,U32 const mls,const ZSTD_dictMode_e dictMode)153c03c5b1cSMartin Matuska ZSTD_DUBT_findBetterDictMatch (
154c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms,
155c03c5b1cSMartin Matuska         const BYTE* const ip, const BYTE* const iend,
156c03c5b1cSMartin Matuska         size_t* offsetPtr,
157c03c5b1cSMartin Matuska         size_t bestLength,
158c03c5b1cSMartin Matuska         U32 nbCompares,
159c03c5b1cSMartin Matuska         U32 const mls,
160c03c5b1cSMartin Matuska         const ZSTD_dictMode_e dictMode)
161c03c5b1cSMartin Matuska {
162c03c5b1cSMartin Matuska     const ZSTD_matchState_t * const dms = ms->dictMatchState;
163c03c5b1cSMartin Matuska     const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
164c03c5b1cSMartin Matuska     const U32 * const dictHashTable = dms->hashTable;
165c03c5b1cSMartin Matuska     U32         const hashLog = dmsCParams->hashLog;
166c03c5b1cSMartin Matuska     size_t      const h  = ZSTD_hashPtr(ip, hashLog, mls);
167c03c5b1cSMartin Matuska     U32               dictMatchIndex = dictHashTable[h];
168c03c5b1cSMartin Matuska 
169c03c5b1cSMartin Matuska     const BYTE* const base = ms->window.base;
170c03c5b1cSMartin Matuska     const BYTE* const prefixStart = base + ms->window.dictLimit;
171c03c5b1cSMartin Matuska     U32         const current = (U32)(ip-base);
172c03c5b1cSMartin Matuska     const BYTE* const dictBase = dms->window.base;
173c03c5b1cSMartin Matuska     const BYTE* const dictEnd = dms->window.nextSrc;
174c03c5b1cSMartin Matuska     U32         const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
175c03c5b1cSMartin Matuska     U32         const dictLowLimit = dms->window.lowLimit;
176c03c5b1cSMartin Matuska     U32         const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
177c03c5b1cSMartin Matuska 
178c03c5b1cSMartin Matuska     U32*        const dictBt = dms->chainTable;
179c03c5b1cSMartin Matuska     U32         const btLog  = dmsCParams->chainLog - 1;
180c03c5b1cSMartin Matuska     U32         const btMask = (1 << btLog) - 1;
181c03c5b1cSMartin Matuska     U32         const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
182c03c5b1cSMartin Matuska 
183c03c5b1cSMartin Matuska     size_t commonLengthSmaller=0, commonLengthLarger=0;
184c03c5b1cSMartin Matuska 
185c03c5b1cSMartin Matuska     (void)dictMode;
186c03c5b1cSMartin Matuska     assert(dictMode == ZSTD_dictMatchState);
187c03c5b1cSMartin Matuska 
188c03c5b1cSMartin Matuska     while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {
189c03c5b1cSMartin Matuska         U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
190c03c5b1cSMartin Matuska         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
191c03c5b1cSMartin Matuska         const BYTE* match = dictBase + dictMatchIndex;
192c03c5b1cSMartin Matuska         matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
193c03c5b1cSMartin Matuska         if (dictMatchIndex+matchLength >= dictHighLimit)
194c03c5b1cSMartin Matuska             match = base + dictMatchIndex + dictIndexDelta;   /* to prepare for next usage of match[matchLength] */
195c03c5b1cSMartin Matuska 
196c03c5b1cSMartin Matuska         if (matchLength > bestLength) {
197c03c5b1cSMartin Matuska             U32 matchIndex = dictMatchIndex + dictIndexDelta;
198c03c5b1cSMartin Matuska             if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
199c03c5b1cSMartin Matuska                 DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
200c03c5b1cSMartin Matuska                     current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
201c03c5b1cSMartin Matuska                 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
202c03c5b1cSMartin Matuska             }
203c03c5b1cSMartin Matuska             if (ip+matchLength == iend) {   /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
204c03c5b1cSMartin Matuska                 break;   /* drop, to guarantee consistency (miss a little bit of compression) */
205c03c5b1cSMartin Matuska             }
206c03c5b1cSMartin Matuska         }
207c03c5b1cSMartin Matuska 
208c03c5b1cSMartin Matuska         if (match[matchLength] < ip[matchLength]) {
209c03c5b1cSMartin Matuska             if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
210c03c5b1cSMartin Matuska             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
211c03c5b1cSMartin Matuska             dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
212c03c5b1cSMartin Matuska         } else {
213c03c5b1cSMartin Matuska             /* match is larger than current */
214c03c5b1cSMartin Matuska             if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
215c03c5b1cSMartin Matuska             commonLengthLarger = matchLength;
216c03c5b1cSMartin Matuska             dictMatchIndex = nextPtr[0];
217c03c5b1cSMartin Matuska         }
218c03c5b1cSMartin Matuska     }
219c03c5b1cSMartin Matuska 
220c03c5b1cSMartin Matuska     if (bestLength >= MINMATCH) {
221c03c5b1cSMartin Matuska         U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
222c03c5b1cSMartin Matuska         DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
223c03c5b1cSMartin Matuska                     current, (U32)bestLength, (U32)*offsetPtr, mIndex);
224c03c5b1cSMartin Matuska     }
225c03c5b1cSMartin Matuska     return bestLength;
226c03c5b1cSMartin Matuska 
227c03c5b1cSMartin Matuska }
228c03c5b1cSMartin Matuska 
229c03c5b1cSMartin Matuska 
230c03c5b1cSMartin Matuska static size_t
ZSTD_DUBT_findBestMatch(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iend,size_t * offsetPtr,U32 const mls,const ZSTD_dictMode_e dictMode)231c03c5b1cSMartin Matuska ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
232c03c5b1cSMartin Matuska                         const BYTE* const ip, const BYTE* const iend,
233c03c5b1cSMartin Matuska                         size_t* offsetPtr,
234c03c5b1cSMartin Matuska                         U32 const mls,
235c03c5b1cSMartin Matuska                         const ZSTD_dictMode_e dictMode)
236c03c5b1cSMartin Matuska {
237c03c5b1cSMartin Matuska     const ZSTD_compressionParameters* const cParams = &ms->cParams;
238c03c5b1cSMartin Matuska     U32*   const hashTable = ms->hashTable;
239c03c5b1cSMartin Matuska     U32    const hashLog = cParams->hashLog;
240c03c5b1cSMartin Matuska     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
241c03c5b1cSMartin Matuska     U32          matchIndex  = hashTable[h];
242c03c5b1cSMartin Matuska 
243c03c5b1cSMartin Matuska     const BYTE* const base = ms->window.base;
244c03c5b1cSMartin Matuska     U32    const current = (U32)(ip-base);
245c03c5b1cSMartin Matuska     U32    const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
246c03c5b1cSMartin Matuska 
247c03c5b1cSMartin Matuska     U32*   const bt = ms->chainTable;
248c03c5b1cSMartin Matuska     U32    const btLog  = cParams->chainLog - 1;
249c03c5b1cSMartin Matuska     U32    const btMask = (1 << btLog) - 1;
250c03c5b1cSMartin Matuska     U32    const btLow = (btMask >= current) ? 0 : current - btMask;
251c03c5b1cSMartin Matuska     U32    const unsortLimit = MAX(btLow, windowLow);
252c03c5b1cSMartin Matuska 
253c03c5b1cSMartin Matuska     U32*         nextCandidate = bt + 2*(matchIndex&btMask);
254c03c5b1cSMartin Matuska     U32*         unsortedMark = bt + 2*(matchIndex&btMask) + 1;
255c03c5b1cSMartin Matuska     U32          nbCompares = 1U << cParams->searchLog;
256c03c5b1cSMartin Matuska     U32          nbCandidates = nbCompares;
257c03c5b1cSMartin Matuska     U32          previousCandidate = 0;
258c03c5b1cSMartin Matuska 
259c03c5b1cSMartin Matuska     DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);
260c03c5b1cSMartin Matuska     assert(ip <= iend-8);   /* required for h calculation */
261c03c5b1cSMartin Matuska 
262c03c5b1cSMartin Matuska     /* reach end of unsorted candidates list */
263c03c5b1cSMartin Matuska     while ( (matchIndex > unsortLimit)
264c03c5b1cSMartin Matuska          && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
265c03c5b1cSMartin Matuska          && (nbCandidates > 1) ) {
266c03c5b1cSMartin Matuska         DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
267c03c5b1cSMartin Matuska                     matchIndex);
268c03c5b1cSMartin Matuska         *unsortedMark = previousCandidate;  /* the unsortedMark becomes a reversed chain, to move up back to original position */
269c03c5b1cSMartin Matuska         previousCandidate = matchIndex;
270c03c5b1cSMartin Matuska         matchIndex = *nextCandidate;
271c03c5b1cSMartin Matuska         nextCandidate = bt + 2*(matchIndex&btMask);
272c03c5b1cSMartin Matuska         unsortedMark = bt + 2*(matchIndex&btMask) + 1;
273c03c5b1cSMartin Matuska         nbCandidates --;
274c03c5b1cSMartin Matuska     }
275c03c5b1cSMartin Matuska 
276c03c5b1cSMartin Matuska     /* nullify last candidate if it's still unsorted
277c03c5b1cSMartin Matuska      * simplification, detrimental to compression ratio, beneficial for speed */
278c03c5b1cSMartin Matuska     if ( (matchIndex > unsortLimit)
279c03c5b1cSMartin Matuska       && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
280c03c5b1cSMartin Matuska         DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
281c03c5b1cSMartin Matuska                     matchIndex);
282c03c5b1cSMartin Matuska         *nextCandidate = *unsortedMark = 0;
283c03c5b1cSMartin Matuska     }
284c03c5b1cSMartin Matuska 
285c03c5b1cSMartin Matuska     /* batch sort stacked candidates */
286c03c5b1cSMartin Matuska     matchIndex = previousCandidate;
287c03c5b1cSMartin Matuska     while (matchIndex) {  /* will end on matchIndex == 0 */
288c03c5b1cSMartin Matuska         U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
289c03c5b1cSMartin Matuska         U32 const nextCandidateIdx = *nextCandidateIdxPtr;
290c03c5b1cSMartin Matuska         ZSTD_insertDUBT1(ms, matchIndex, iend,
291c03c5b1cSMartin Matuska                          nbCandidates, unsortLimit, dictMode);
292c03c5b1cSMartin Matuska         matchIndex = nextCandidateIdx;
293c03c5b1cSMartin Matuska         nbCandidates++;
294c03c5b1cSMartin Matuska     }
295c03c5b1cSMartin Matuska 
296c03c5b1cSMartin Matuska     /* find longest match */
297c03c5b1cSMartin Matuska     {   size_t commonLengthSmaller = 0, commonLengthLarger = 0;
298c03c5b1cSMartin Matuska         const BYTE* const dictBase = ms->window.dictBase;
299c03c5b1cSMartin Matuska         const U32 dictLimit = ms->window.dictLimit;
300c03c5b1cSMartin Matuska         const BYTE* const dictEnd = dictBase + dictLimit;
301c03c5b1cSMartin Matuska         const BYTE* const prefixStart = base + dictLimit;
302c03c5b1cSMartin Matuska         U32* smallerPtr = bt + 2*(current&btMask);
303c03c5b1cSMartin Matuska         U32* largerPtr  = bt + 2*(current&btMask) + 1;
304c03c5b1cSMartin Matuska         U32 matchEndIdx = current + 8 + 1;
305c03c5b1cSMartin Matuska         U32 dummy32;   /* to be nullified at the end */
306c03c5b1cSMartin Matuska         size_t bestLength = 0;
307c03c5b1cSMartin Matuska 
308c03c5b1cSMartin Matuska         matchIndex  = hashTable[h];
309c03c5b1cSMartin Matuska         hashTable[h] = current;   /* Update Hash Table */
310c03c5b1cSMartin Matuska 
311c03c5b1cSMartin Matuska         while (nbCompares-- && (matchIndex > windowLow)) {
312c03c5b1cSMartin Matuska             U32* const nextPtr = bt + 2*(matchIndex & btMask);
313c03c5b1cSMartin Matuska             size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
314c03c5b1cSMartin Matuska             const BYTE* match;
315c03c5b1cSMartin Matuska 
316c03c5b1cSMartin Matuska             if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
317c03c5b1cSMartin Matuska                 match = base + matchIndex;
318c03c5b1cSMartin Matuska                 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
319c03c5b1cSMartin Matuska             } else {
320c03c5b1cSMartin Matuska                 match = dictBase + matchIndex;
321c03c5b1cSMartin Matuska                 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
322c03c5b1cSMartin Matuska                 if (matchIndex+matchLength >= dictLimit)
323c03c5b1cSMartin Matuska                     match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
324c03c5b1cSMartin Matuska             }
325c03c5b1cSMartin Matuska 
326c03c5b1cSMartin Matuska             if (matchLength > bestLength) {
327c03c5b1cSMartin Matuska                 if (matchLength > matchEndIdx - matchIndex)
328c03c5b1cSMartin Matuska                     matchEndIdx = matchIndex + (U32)matchLength;
329c03c5b1cSMartin Matuska                 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
330c03c5b1cSMartin Matuska                     bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
331c03c5b1cSMartin Matuska                 if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
332c03c5b1cSMartin Matuska                     if (dictMode == ZSTD_dictMatchState) {
333c03c5b1cSMartin Matuska                         nbCompares = 0; /* in addition to avoiding checking any
334c03c5b1cSMartin Matuska                                          * further in this loop, make sure we
335c03c5b1cSMartin Matuska                                          * skip checking in the dictionary. */
336c03c5b1cSMartin Matuska                     }
337c03c5b1cSMartin Matuska                     break;   /* drop, to guarantee consistency (miss a little bit of compression) */
338c03c5b1cSMartin Matuska                 }
339c03c5b1cSMartin Matuska             }
340c03c5b1cSMartin Matuska 
341c03c5b1cSMartin Matuska             if (match[matchLength] < ip[matchLength]) {
342c03c5b1cSMartin Matuska                 /* match is smaller than current */
343c03c5b1cSMartin Matuska                 *smallerPtr = matchIndex;             /* update smaller idx */
344c03c5b1cSMartin Matuska                 commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
345c03c5b1cSMartin Matuska                 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
346c03c5b1cSMartin Matuska                 smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
347c03c5b1cSMartin Matuska                 matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
348c03c5b1cSMartin Matuska             } else {
349c03c5b1cSMartin Matuska                 /* match is larger than current */
350c03c5b1cSMartin Matuska                 *largerPtr = matchIndex;
351c03c5b1cSMartin Matuska                 commonLengthLarger = matchLength;
352c03c5b1cSMartin Matuska                 if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
353c03c5b1cSMartin Matuska                 largerPtr = nextPtr;
354c03c5b1cSMartin Matuska                 matchIndex = nextPtr[0];
355c03c5b1cSMartin Matuska         }   }
356c03c5b1cSMartin Matuska 
357c03c5b1cSMartin Matuska         *smallerPtr = *largerPtr = 0;
358c03c5b1cSMartin Matuska 
359c03c5b1cSMartin Matuska         if (dictMode == ZSTD_dictMatchState && nbCompares) {
360c03c5b1cSMartin Matuska             bestLength = ZSTD_DUBT_findBetterDictMatch(
361c03c5b1cSMartin Matuska                     ms, ip, iend,
362c03c5b1cSMartin Matuska                     offsetPtr, bestLength, nbCompares,
363c03c5b1cSMartin Matuska                     mls, dictMode);
364c03c5b1cSMartin Matuska         }
365c03c5b1cSMartin Matuska 
366c03c5b1cSMartin Matuska         assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
367c03c5b1cSMartin Matuska         ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
368c03c5b1cSMartin Matuska         if (bestLength >= MINMATCH) {
369c03c5b1cSMartin Matuska             U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
370c03c5b1cSMartin Matuska             DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
371c03c5b1cSMartin Matuska                         current, (U32)bestLength, (U32)*offsetPtr, mIndex);
372c03c5b1cSMartin Matuska         }
373c03c5b1cSMartin Matuska         return bestLength;
374c03c5b1cSMartin Matuska     }
375c03c5b1cSMartin Matuska }
376c03c5b1cSMartin Matuska 
377c03c5b1cSMartin Matuska 
378c03c5b1cSMartin Matuska /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
379c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE size_t
ZSTD_BtFindBestMatch(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iLimit,size_t * offsetPtr,const U32 mls,const ZSTD_dictMode_e dictMode)380c03c5b1cSMartin Matuska ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
381c03c5b1cSMartin Matuska                 const BYTE* const ip, const BYTE* const iLimit,
382c03c5b1cSMartin Matuska                       size_t* offsetPtr,
383c03c5b1cSMartin Matuska                 const U32 mls /* template */,
384c03c5b1cSMartin Matuska                 const ZSTD_dictMode_e dictMode)
385c03c5b1cSMartin Matuska {
386c03c5b1cSMartin Matuska     DEBUGLOG(7, "ZSTD_BtFindBestMatch");
387c03c5b1cSMartin Matuska     if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
388c03c5b1cSMartin Matuska     ZSTD_updateDUBT(ms, ip, iLimit, mls);
389c03c5b1cSMartin Matuska     return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
390c03c5b1cSMartin Matuska }
391c03c5b1cSMartin Matuska 
392c03c5b1cSMartin Matuska 
393c03c5b1cSMartin Matuska static size_t
ZSTD_BtFindBestMatch_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)394c03c5b1cSMartin Matuska ZSTD_BtFindBestMatch_selectMLS (  ZSTD_matchState_t* ms,
395c03c5b1cSMartin Matuska                             const BYTE* ip, const BYTE* const iLimit,
396c03c5b1cSMartin Matuska                                   size_t* offsetPtr)
397c03c5b1cSMartin Matuska {
398c03c5b1cSMartin Matuska     switch(ms->cParams.minMatch)
399c03c5b1cSMartin Matuska     {
400c03c5b1cSMartin Matuska     default : /* includes case 3 */
401c03c5b1cSMartin Matuska     case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
402c03c5b1cSMartin Matuska     case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
403c03c5b1cSMartin Matuska     case 7 :
404c03c5b1cSMartin Matuska     case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
405c03c5b1cSMartin Matuska     }
406c03c5b1cSMartin Matuska }
407c03c5b1cSMartin Matuska 
408c03c5b1cSMartin Matuska 
ZSTD_BtFindBestMatch_dictMatchState_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)409c03c5b1cSMartin Matuska static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
410c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms,
411c03c5b1cSMartin Matuska                         const BYTE* ip, const BYTE* const iLimit,
412c03c5b1cSMartin Matuska                         size_t* offsetPtr)
413c03c5b1cSMartin Matuska {
414c03c5b1cSMartin Matuska     switch(ms->cParams.minMatch)
415c03c5b1cSMartin Matuska     {
416c03c5b1cSMartin Matuska     default : /* includes case 3 */
417c03c5b1cSMartin Matuska     case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
418c03c5b1cSMartin Matuska     case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
419c03c5b1cSMartin Matuska     case 7 :
420c03c5b1cSMartin Matuska     case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
421c03c5b1cSMartin Matuska     }
422c03c5b1cSMartin Matuska }
423c03c5b1cSMartin Matuska 
424c03c5b1cSMartin Matuska 
ZSTD_BtFindBestMatch_extDict_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)425c03c5b1cSMartin Matuska static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
426c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms,
427c03c5b1cSMartin Matuska                         const BYTE* ip, const BYTE* const iLimit,
428c03c5b1cSMartin Matuska                         size_t* offsetPtr)
429c03c5b1cSMartin Matuska {
430c03c5b1cSMartin Matuska     switch(ms->cParams.minMatch)
431c03c5b1cSMartin Matuska     {
432c03c5b1cSMartin Matuska     default : /* includes case 3 */
433c03c5b1cSMartin Matuska     case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
434c03c5b1cSMartin Matuska     case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
435c03c5b1cSMartin Matuska     case 7 :
436c03c5b1cSMartin Matuska     case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
437c03c5b1cSMartin Matuska     }
438c03c5b1cSMartin Matuska }
439c03c5b1cSMartin Matuska 
440c03c5b1cSMartin Matuska 
441c03c5b1cSMartin Matuska 
442c03c5b1cSMartin Matuska /* *********************************
443c03c5b1cSMartin Matuska *  Hash Chain
444c03c5b1cSMartin Matuska ***********************************/
445c03c5b1cSMartin Matuska #define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & (mask)]
446c03c5b1cSMartin Matuska 
447c03c5b1cSMartin Matuska /* Update chains up to ip (excluded)
448c03c5b1cSMartin Matuska    Assumption : always within prefix (i.e. not within extDict) */
ZSTD_insertAndFindFirstIndex_internal(ZSTD_matchState_t * ms,const ZSTD_compressionParameters * const cParams,const BYTE * ip,U32 const mls)449c03c5b1cSMartin Matuska static U32 ZSTD_insertAndFindFirstIndex_internal(
450c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms,
451c03c5b1cSMartin Matuska                         const ZSTD_compressionParameters* const cParams,
452c03c5b1cSMartin Matuska                         const BYTE* ip, U32 const mls)
453c03c5b1cSMartin Matuska {
454c03c5b1cSMartin Matuska     U32* const hashTable  = ms->hashTable;
455c03c5b1cSMartin Matuska     const U32 hashLog = cParams->hashLog;
456c03c5b1cSMartin Matuska     U32* const chainTable = ms->chainTable;
457c03c5b1cSMartin Matuska     const U32 chainMask = (1 << cParams->chainLog) - 1;
458c03c5b1cSMartin Matuska     const BYTE* const base = ms->window.base;
459c03c5b1cSMartin Matuska     const U32 target = (U32)(ip - base);
460c03c5b1cSMartin Matuska     U32 idx = ms->nextToUpdate;
461c03c5b1cSMartin Matuska 
462c03c5b1cSMartin Matuska     while(idx < target) { /* catch up */
463c03c5b1cSMartin Matuska         size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
464c03c5b1cSMartin Matuska         NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
465c03c5b1cSMartin Matuska         hashTable[h] = idx;
466c03c5b1cSMartin Matuska         idx++;
467c03c5b1cSMartin Matuska     }
468c03c5b1cSMartin Matuska 
469c03c5b1cSMartin Matuska     ms->nextToUpdate = target;
470c03c5b1cSMartin Matuska     return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
471c03c5b1cSMartin Matuska }
472c03c5b1cSMartin Matuska 
ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t * ms,const BYTE * ip)473c03c5b1cSMartin Matuska U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
474c03c5b1cSMartin Matuska     const ZSTD_compressionParameters* const cParams = &ms->cParams;
475c03c5b1cSMartin Matuska     return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
476c03c5b1cSMartin Matuska }
477c03c5b1cSMartin Matuska 
478c03c5b1cSMartin Matuska 
479c03c5b1cSMartin Matuska /* inlining is important to hardwire a hot branch (template emulation) */
480c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE
ZSTD_HcFindBestMatch_generic(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iLimit,size_t * offsetPtr,const U32 mls,const ZSTD_dictMode_e dictMode)481c03c5b1cSMartin Matuska size_t ZSTD_HcFindBestMatch_generic (
482c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms,
483c03c5b1cSMartin Matuska                         const BYTE* const ip, const BYTE* const iLimit,
484c03c5b1cSMartin Matuska                         size_t* offsetPtr,
485c03c5b1cSMartin Matuska                         const U32 mls, const ZSTD_dictMode_e dictMode)
486c03c5b1cSMartin Matuska {
487c03c5b1cSMartin Matuska     const ZSTD_compressionParameters* const cParams = &ms->cParams;
488c03c5b1cSMartin Matuska     U32* const chainTable = ms->chainTable;
489c03c5b1cSMartin Matuska     const U32 chainSize = (1 << cParams->chainLog);
490c03c5b1cSMartin Matuska     const U32 chainMask = chainSize-1;
491c03c5b1cSMartin Matuska     const BYTE* const base = ms->window.base;
492c03c5b1cSMartin Matuska     const BYTE* const dictBase = ms->window.dictBase;
493c03c5b1cSMartin Matuska     const U32 dictLimit = ms->window.dictLimit;
494c03c5b1cSMartin Matuska     const BYTE* const prefixStart = base + dictLimit;
495c03c5b1cSMartin Matuska     const BYTE* const dictEnd = dictBase + dictLimit;
496c03c5b1cSMartin Matuska     const U32 current = (U32)(ip-base);
497c03c5b1cSMartin Matuska     const U32 maxDistance = 1U << cParams->windowLog;
498c03c5b1cSMartin Matuska     const U32 lowestValid = ms->window.lowLimit;
499c03c5b1cSMartin Matuska     const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
500c03c5b1cSMartin Matuska     const U32 isDictionary = (ms->loadedDictEnd != 0);
501c03c5b1cSMartin Matuska     const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
502c03c5b1cSMartin Matuska     const U32 minChain = current > chainSize ? current - chainSize : 0;
503c03c5b1cSMartin Matuska     U32 nbAttempts = 1U << cParams->searchLog;
504c03c5b1cSMartin Matuska     size_t ml=4-1;
505c03c5b1cSMartin Matuska 
506c03c5b1cSMartin Matuska     /* HC4 match finder */
507c03c5b1cSMartin Matuska     U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
508c03c5b1cSMartin Matuska 
509c03c5b1cSMartin Matuska     for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
510c03c5b1cSMartin Matuska         size_t currentMl=0;
511c03c5b1cSMartin Matuska         if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
512c03c5b1cSMartin Matuska             const BYTE* const match = base + matchIndex;
513c03c5b1cSMartin Matuska             assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
514c03c5b1cSMartin Matuska             if (match[ml] == ip[ml])   /* potentially better */
515c03c5b1cSMartin Matuska                 currentMl = ZSTD_count(ip, match, iLimit);
516c03c5b1cSMartin Matuska         } else {
517c03c5b1cSMartin Matuska             const BYTE* const match = dictBase + matchIndex;
518c03c5b1cSMartin Matuska             assert(match+4 <= dictEnd);
519c03c5b1cSMartin Matuska             if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
520c03c5b1cSMartin Matuska                 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
521c03c5b1cSMartin Matuska         }
522c03c5b1cSMartin Matuska 
523c03c5b1cSMartin Matuska         /* save best solution */
524c03c5b1cSMartin Matuska         if (currentMl > ml) {
525c03c5b1cSMartin Matuska             ml = currentMl;
526c03c5b1cSMartin Matuska             *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
527c03c5b1cSMartin Matuska             if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
528c03c5b1cSMartin Matuska         }
529c03c5b1cSMartin Matuska 
530c03c5b1cSMartin Matuska         if (matchIndex <= minChain) break;
531c03c5b1cSMartin Matuska         matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
532c03c5b1cSMartin Matuska     }
533c03c5b1cSMartin Matuska 
534c03c5b1cSMartin Matuska     if (dictMode == ZSTD_dictMatchState) {
535c03c5b1cSMartin Matuska         const ZSTD_matchState_t* const dms = ms->dictMatchState;
536c03c5b1cSMartin Matuska         const U32* const dmsChainTable = dms->chainTable;
537c03c5b1cSMartin Matuska         const U32 dmsChainSize         = (1 << dms->cParams.chainLog);
538c03c5b1cSMartin Matuska         const U32 dmsChainMask         = dmsChainSize - 1;
539c03c5b1cSMartin Matuska         const U32 dmsLowestIndex       = dms->window.dictLimit;
540c03c5b1cSMartin Matuska         const BYTE* const dmsBase      = dms->window.base;
541c03c5b1cSMartin Matuska         const BYTE* const dmsEnd       = dms->window.nextSrc;
542c03c5b1cSMartin Matuska         const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
543c03c5b1cSMartin Matuska         const U32 dmsIndexDelta        = dictLimit - dmsSize;
544c03c5b1cSMartin Matuska         const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
545c03c5b1cSMartin Matuska 
546c03c5b1cSMartin Matuska         matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
547c03c5b1cSMartin Matuska 
548c03c5b1cSMartin Matuska         for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
549c03c5b1cSMartin Matuska             size_t currentMl=0;
550c03c5b1cSMartin Matuska             const BYTE* const match = dmsBase + matchIndex;
551c03c5b1cSMartin Matuska             assert(match+4 <= dmsEnd);
552c03c5b1cSMartin Matuska             if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
553c03c5b1cSMartin Matuska                 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
554c03c5b1cSMartin Matuska 
555c03c5b1cSMartin Matuska             /* save best solution */
556c03c5b1cSMartin Matuska             if (currentMl > ml) {
557c03c5b1cSMartin Matuska                 ml = currentMl;
558c03c5b1cSMartin Matuska                 *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
559c03c5b1cSMartin Matuska                 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
560c03c5b1cSMartin Matuska             }
561c03c5b1cSMartin Matuska 
562c03c5b1cSMartin Matuska             if (matchIndex <= dmsMinChain) break;
563c03c5b1cSMartin Matuska             matchIndex = dmsChainTable[matchIndex & dmsChainMask];
564c03c5b1cSMartin Matuska         }
565c03c5b1cSMartin Matuska     }
566c03c5b1cSMartin Matuska 
567c03c5b1cSMartin Matuska     return ml;
568c03c5b1cSMartin Matuska }
569c03c5b1cSMartin Matuska 
570c03c5b1cSMartin Matuska 
ZSTD_HcFindBestMatch_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)571c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
572c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms,
573c03c5b1cSMartin Matuska                         const BYTE* ip, const BYTE* const iLimit,
574c03c5b1cSMartin Matuska                         size_t* offsetPtr)
575c03c5b1cSMartin Matuska {
576c03c5b1cSMartin Matuska     switch(ms->cParams.minMatch)
577c03c5b1cSMartin Matuska     {
578c03c5b1cSMartin Matuska     default : /* includes case 3 */
579c03c5b1cSMartin Matuska     case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
580c03c5b1cSMartin Matuska     case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
581c03c5b1cSMartin Matuska     case 7 :
582c03c5b1cSMartin Matuska     case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
583c03c5b1cSMartin Matuska     }
584c03c5b1cSMartin Matuska }
585c03c5b1cSMartin Matuska 
586c03c5b1cSMartin Matuska 
ZSTD_HcFindBestMatch_dictMatchState_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)587c03c5b1cSMartin Matuska static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
588c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms,
589c03c5b1cSMartin Matuska                         const BYTE* ip, const BYTE* const iLimit,
590c03c5b1cSMartin Matuska                         size_t* offsetPtr)
591c03c5b1cSMartin Matuska {
592c03c5b1cSMartin Matuska     switch(ms->cParams.minMatch)
593c03c5b1cSMartin Matuska     {
594c03c5b1cSMartin Matuska     default : /* includes case 3 */
595c03c5b1cSMartin Matuska     case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
596c03c5b1cSMartin Matuska     case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
597c03c5b1cSMartin Matuska     case 7 :
598c03c5b1cSMartin Matuska     case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
599c03c5b1cSMartin Matuska     }
600c03c5b1cSMartin Matuska }
601c03c5b1cSMartin Matuska 
602c03c5b1cSMartin Matuska 
ZSTD_HcFindBestMatch_extDict_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)603c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
604c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms,
605c03c5b1cSMartin Matuska                         const BYTE* ip, const BYTE* const iLimit,
606c03c5b1cSMartin Matuska                         size_t* offsetPtr)
607c03c5b1cSMartin Matuska {
608c03c5b1cSMartin Matuska     switch(ms->cParams.minMatch)
609c03c5b1cSMartin Matuska     {
610c03c5b1cSMartin Matuska     default : /* includes case 3 */
611c03c5b1cSMartin Matuska     case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
612c03c5b1cSMartin Matuska     case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
613c03c5b1cSMartin Matuska     case 7 :
614c03c5b1cSMartin Matuska     case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
615c03c5b1cSMartin Matuska     }
616c03c5b1cSMartin Matuska }
617c03c5b1cSMartin Matuska 
618c03c5b1cSMartin Matuska 
619c03c5b1cSMartin Matuska /* *******************************
620c03c5b1cSMartin Matuska *  Common parser - lazy strategy
621c03c5b1cSMartin Matuska *********************************/
622c03c5b1cSMartin Matuska typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
623c03c5b1cSMartin Matuska 
624c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_lazy_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const searchMethod_e searchMethod,const U32 depth,ZSTD_dictMode_e const dictMode)625c03c5b1cSMartin Matuska ZSTD_compressBlock_lazy_generic(
626c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
627c03c5b1cSMartin Matuska                         U32 rep[ZSTD_REP_NUM],
628c03c5b1cSMartin Matuska                         const void* src, size_t srcSize,
629c03c5b1cSMartin Matuska                         const searchMethod_e searchMethod, const U32 depth,
630c03c5b1cSMartin Matuska                         ZSTD_dictMode_e const dictMode)
631c03c5b1cSMartin Matuska {
632c03c5b1cSMartin Matuska     const BYTE* const istart = (const BYTE*)src;
633c03c5b1cSMartin Matuska     const BYTE* ip = istart;
634c03c5b1cSMartin Matuska     const BYTE* anchor = istart;
635c03c5b1cSMartin Matuska     const BYTE* const iend = istart + srcSize;
636c03c5b1cSMartin Matuska     const BYTE* const ilimit = iend - 8;
637c03c5b1cSMartin Matuska     const BYTE* const base = ms->window.base;
638c03c5b1cSMartin Matuska     const U32 prefixLowestIndex = ms->window.dictLimit;
639c03c5b1cSMartin Matuska     const BYTE* const prefixLowest = base + prefixLowestIndex;
640c03c5b1cSMartin Matuska 
641c03c5b1cSMartin Matuska     typedef size_t (*searchMax_f)(
642c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms,
643c03c5b1cSMartin Matuska                         const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
644c03c5b1cSMartin Matuska     searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
645c03c5b1cSMartin Matuska         (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
646c03c5b1cSMartin Matuska                                          : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
647c03c5b1cSMartin Matuska         (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
648c03c5b1cSMartin Matuska                                          : ZSTD_HcFindBestMatch_selectMLS);
649c03c5b1cSMartin Matuska     U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
650c03c5b1cSMartin Matuska 
651c03c5b1cSMartin Matuska     const ZSTD_matchState_t* const dms = ms->dictMatchState;
652c03c5b1cSMartin Matuska     const U32 dictLowestIndex      = dictMode == ZSTD_dictMatchState ?
653c03c5b1cSMartin Matuska                                      dms->window.dictLimit : 0;
654c03c5b1cSMartin Matuska     const BYTE* const dictBase     = dictMode == ZSTD_dictMatchState ?
655c03c5b1cSMartin Matuska                                      dms->window.base : NULL;
656c03c5b1cSMartin Matuska     const BYTE* const dictLowest   = dictMode == ZSTD_dictMatchState ?
657c03c5b1cSMartin Matuska                                      dictBase + dictLowestIndex : NULL;
658c03c5b1cSMartin Matuska     const BYTE* const dictEnd      = dictMode == ZSTD_dictMatchState ?
659c03c5b1cSMartin Matuska                                      dms->window.nextSrc : NULL;
660c03c5b1cSMartin Matuska     const U32 dictIndexDelta       = dictMode == ZSTD_dictMatchState ?
661c03c5b1cSMartin Matuska                                      prefixLowestIndex - (U32)(dictEnd - dictBase) :
662c03c5b1cSMartin Matuska                                      0;
663c03c5b1cSMartin Matuska     const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
664c03c5b1cSMartin Matuska 
665c03c5b1cSMartin Matuska     DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
666c03c5b1cSMartin Matuska 
667c03c5b1cSMartin Matuska     /* init */
668c03c5b1cSMartin Matuska     ip += (dictAndPrefixLength == 0);
669c03c5b1cSMartin Matuska     if (dictMode == ZSTD_noDict) {
670c03c5b1cSMartin Matuska         U32 const current = (U32)(ip - base);
671c03c5b1cSMartin Matuska         U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
672c03c5b1cSMartin Matuska         U32 const maxRep = current - windowLow;
673c03c5b1cSMartin Matuska         if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
674c03c5b1cSMartin Matuska         if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
675c03c5b1cSMartin Matuska     }
676c03c5b1cSMartin Matuska     if (dictMode == ZSTD_dictMatchState) {
677c03c5b1cSMartin Matuska         /* dictMatchState repCode checks don't currently handle repCode == 0
678c03c5b1cSMartin Matuska          * disabling. */
679c03c5b1cSMartin Matuska         assert(offset_1 <= dictAndPrefixLength);
680c03c5b1cSMartin Matuska         assert(offset_2 <= dictAndPrefixLength);
681c03c5b1cSMartin Matuska     }
682c03c5b1cSMartin Matuska 
683c03c5b1cSMartin Matuska     /* Match Loop */
684c03c5b1cSMartin Matuska #if defined(__GNUC__) && defined(__x86_64__)
685c03c5b1cSMartin Matuska     /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
686c03c5b1cSMartin Matuska      * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
687c03c5b1cSMartin Matuska      */
688c03c5b1cSMartin Matuska     __asm__(".p2align 5");
689c03c5b1cSMartin Matuska #endif
690c03c5b1cSMartin Matuska     while (ip < ilimit) {
691c03c5b1cSMartin Matuska         size_t matchLength=0;
692c03c5b1cSMartin Matuska         size_t offset=0;
693c03c5b1cSMartin Matuska         const BYTE* start=ip+1;
694c03c5b1cSMartin Matuska 
695c03c5b1cSMartin Matuska         /* check repCode */
696c03c5b1cSMartin Matuska         if (dictMode == ZSTD_dictMatchState) {
697c03c5b1cSMartin Matuska             const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
698c03c5b1cSMartin Matuska             const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
699c03c5b1cSMartin Matuska                                 && repIndex < prefixLowestIndex) ?
700c03c5b1cSMartin Matuska                                    dictBase + (repIndex - dictIndexDelta) :
701c03c5b1cSMartin Matuska                                    base + repIndex;
702c03c5b1cSMartin Matuska             if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
703c03c5b1cSMartin Matuska                 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
704c03c5b1cSMartin Matuska                 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
705c03c5b1cSMartin Matuska                 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
706c03c5b1cSMartin Matuska                 if (depth==0) goto _storeSequence;
707c03c5b1cSMartin Matuska             }
708c03c5b1cSMartin Matuska         }
709c03c5b1cSMartin Matuska         if ( dictMode == ZSTD_noDict
710c03c5b1cSMartin Matuska           && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
711c03c5b1cSMartin Matuska             matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
712c03c5b1cSMartin Matuska             if (depth==0) goto _storeSequence;
713c03c5b1cSMartin Matuska         }
714c03c5b1cSMartin Matuska 
715c03c5b1cSMartin Matuska         /* first search (depth 0) */
716c03c5b1cSMartin Matuska         {   size_t offsetFound = 999999999;
717c03c5b1cSMartin Matuska             size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
718c03c5b1cSMartin Matuska             if (ml2 > matchLength)
719c03c5b1cSMartin Matuska                 matchLength = ml2, start = ip, offset=offsetFound;
720c03c5b1cSMartin Matuska         }
721c03c5b1cSMartin Matuska 
722c03c5b1cSMartin Matuska         if (matchLength < 4) {
723c03c5b1cSMartin Matuska             ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
724c03c5b1cSMartin Matuska             continue;
725c03c5b1cSMartin Matuska         }
726c03c5b1cSMartin Matuska 
727c03c5b1cSMartin Matuska         /* let's try to find a better solution */
728c03c5b1cSMartin Matuska         if (depth>=1)
729c03c5b1cSMartin Matuska         while (ip<ilimit) {
730c03c5b1cSMartin Matuska             ip ++;
731c03c5b1cSMartin Matuska             if ( (dictMode == ZSTD_noDict)
732c03c5b1cSMartin Matuska               && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
733c03c5b1cSMartin Matuska                 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
734c03c5b1cSMartin Matuska                 int const gain2 = (int)(mlRep * 3);
735c03c5b1cSMartin Matuska                 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
736c03c5b1cSMartin Matuska                 if ((mlRep >= 4) && (gain2 > gain1))
737c03c5b1cSMartin Matuska                     matchLength = mlRep, offset = 0, start = ip;
738c03c5b1cSMartin Matuska             }
739c03c5b1cSMartin Matuska             if (dictMode == ZSTD_dictMatchState) {
740c03c5b1cSMartin Matuska                 const U32 repIndex = (U32)(ip - base) - offset_1;
741c03c5b1cSMartin Matuska                 const BYTE* repMatch = repIndex < prefixLowestIndex ?
742c03c5b1cSMartin Matuska                                dictBase + (repIndex - dictIndexDelta) :
743c03c5b1cSMartin Matuska                                base + repIndex;
744c03c5b1cSMartin Matuska                 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
745c03c5b1cSMartin Matuska                     && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
746c03c5b1cSMartin Matuska                     const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
747c03c5b1cSMartin Matuska                     size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
748c03c5b1cSMartin Matuska                     int const gain2 = (int)(mlRep * 3);
749c03c5b1cSMartin Matuska                     int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
750c03c5b1cSMartin Matuska                     if ((mlRep >= 4) && (gain2 > gain1))
751c03c5b1cSMartin Matuska                         matchLength = mlRep, offset = 0, start = ip;
752c03c5b1cSMartin Matuska                 }
753c03c5b1cSMartin Matuska             }
754c03c5b1cSMartin Matuska             {   size_t offset2=999999999;
755c03c5b1cSMartin Matuska                 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
756c03c5b1cSMartin Matuska                 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
757c03c5b1cSMartin Matuska                 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
758c03c5b1cSMartin Matuska                 if ((ml2 >= 4) && (gain2 > gain1)) {
759c03c5b1cSMartin Matuska                     matchLength = ml2, offset = offset2, start = ip;
760c03c5b1cSMartin Matuska                     continue;   /* search a better one */
761c03c5b1cSMartin Matuska             }   }
762c03c5b1cSMartin Matuska 
763c03c5b1cSMartin Matuska             /* let's find an even better one */
764c03c5b1cSMartin Matuska             if ((depth==2) && (ip<ilimit)) {
765c03c5b1cSMartin Matuska                 ip ++;
766c03c5b1cSMartin Matuska                 if ( (dictMode == ZSTD_noDict)
767c03c5b1cSMartin Matuska                   && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
768c03c5b1cSMartin Matuska                     size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
769c03c5b1cSMartin Matuska                     int const gain2 = (int)(mlRep * 4);
770c03c5b1cSMartin Matuska                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
771c03c5b1cSMartin Matuska                     if ((mlRep >= 4) && (gain2 > gain1))
772c03c5b1cSMartin Matuska                         matchLength = mlRep, offset = 0, start = ip;
773c03c5b1cSMartin Matuska                 }
774c03c5b1cSMartin Matuska                 if (dictMode == ZSTD_dictMatchState) {
775c03c5b1cSMartin Matuska                     const U32 repIndex = (U32)(ip - base) - offset_1;
776c03c5b1cSMartin Matuska                     const BYTE* repMatch = repIndex < prefixLowestIndex ?
777c03c5b1cSMartin Matuska                                    dictBase + (repIndex - dictIndexDelta) :
778c03c5b1cSMartin Matuska                                    base + repIndex;
779c03c5b1cSMartin Matuska                     if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
780c03c5b1cSMartin Matuska                         && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
781c03c5b1cSMartin Matuska                         const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
782c03c5b1cSMartin Matuska                         size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
783c03c5b1cSMartin Matuska                         int const gain2 = (int)(mlRep * 4);
784c03c5b1cSMartin Matuska                         int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
785c03c5b1cSMartin Matuska                         if ((mlRep >= 4) && (gain2 > gain1))
786c03c5b1cSMartin Matuska                             matchLength = mlRep, offset = 0, start = ip;
787c03c5b1cSMartin Matuska                     }
788c03c5b1cSMartin Matuska                 }
789c03c5b1cSMartin Matuska                 {   size_t offset2=999999999;
790c03c5b1cSMartin Matuska                     size_t const ml2 = searchMax(ms, ip, iend, &offset2);
791c03c5b1cSMartin Matuska                     int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
792c03c5b1cSMartin Matuska                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
793c03c5b1cSMartin Matuska                     if ((ml2 >= 4) && (gain2 > gain1)) {
794c03c5b1cSMartin Matuska                         matchLength = ml2, offset = offset2, start = ip;
795c03c5b1cSMartin Matuska                         continue;
796c03c5b1cSMartin Matuska             }   }   }
797c03c5b1cSMartin Matuska             break;  /* nothing found : store previous solution */
798c03c5b1cSMartin Matuska         }
799c03c5b1cSMartin Matuska 
800c03c5b1cSMartin Matuska         /* NOTE:
801c03c5b1cSMartin Matuska          * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
802c03c5b1cSMartin Matuska          * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
803c03c5b1cSMartin Matuska          * overflows the pointer, which is undefined behavior.
804c03c5b1cSMartin Matuska          */
805c03c5b1cSMartin Matuska         /* catch up */
806c03c5b1cSMartin Matuska         if (offset) {
807c03c5b1cSMartin Matuska             if (dictMode == ZSTD_noDict) {
808c03c5b1cSMartin Matuska                 while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
809c03c5b1cSMartin Matuska                      && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) )  /* only search for offset within prefix */
810c03c5b1cSMartin Matuska                     { start--; matchLength++; }
811c03c5b1cSMartin Matuska             }
812c03c5b1cSMartin Matuska             if (dictMode == ZSTD_dictMatchState) {
813c03c5b1cSMartin Matuska                 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
814c03c5b1cSMartin Matuska                 const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
815c03c5b1cSMartin Matuska                 const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
816c03c5b1cSMartin Matuska                 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
817c03c5b1cSMartin Matuska             }
818c03c5b1cSMartin Matuska             offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
819c03c5b1cSMartin Matuska         }
820c03c5b1cSMartin Matuska         /* store sequence */
821c03c5b1cSMartin Matuska _storeSequence:
822c03c5b1cSMartin Matuska         {   size_t const litLength = start - anchor;
823c03c5b1cSMartin Matuska             ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
824c03c5b1cSMartin Matuska             anchor = ip = start + matchLength;
825c03c5b1cSMartin Matuska         }
826c03c5b1cSMartin Matuska 
827c03c5b1cSMartin Matuska         /* check immediate repcode */
828c03c5b1cSMartin Matuska         if (dictMode == ZSTD_dictMatchState) {
829c03c5b1cSMartin Matuska             while (ip <= ilimit) {
830c03c5b1cSMartin Matuska                 U32 const current2 = (U32)(ip-base);
831c03c5b1cSMartin Matuska                 U32 const repIndex = current2 - offset_2;
832c03c5b1cSMartin Matuska                 const BYTE* repMatch = dictMode == ZSTD_dictMatchState
833c03c5b1cSMartin Matuska                     && repIndex < prefixLowestIndex ?
834c03c5b1cSMartin Matuska                         dictBase - dictIndexDelta + repIndex :
835c03c5b1cSMartin Matuska                         base + repIndex;
836c03c5b1cSMartin Matuska                 if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
837c03c5b1cSMartin Matuska                    && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
838c03c5b1cSMartin Matuska                     const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
839c03c5b1cSMartin Matuska                     matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
840c03c5b1cSMartin Matuska                     offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset_2 <=> offset_1 */
841c03c5b1cSMartin Matuska                     ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
842c03c5b1cSMartin Matuska                     ip += matchLength;
843c03c5b1cSMartin Matuska                     anchor = ip;
844c03c5b1cSMartin Matuska                     continue;
845c03c5b1cSMartin Matuska                 }
846c03c5b1cSMartin Matuska                 break;
847c03c5b1cSMartin Matuska             }
848c03c5b1cSMartin Matuska         }
849c03c5b1cSMartin Matuska 
850c03c5b1cSMartin Matuska         if (dictMode == ZSTD_noDict) {
851c03c5b1cSMartin Matuska             while ( ((ip <= ilimit) & (offset_2>0))
852c03c5b1cSMartin Matuska                  && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
853c03c5b1cSMartin Matuska                 /* store sequence */
854c03c5b1cSMartin Matuska                 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
855c03c5b1cSMartin Matuska                 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
856c03c5b1cSMartin Matuska                 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
857c03c5b1cSMartin Matuska                 ip += matchLength;
858c03c5b1cSMartin Matuska                 anchor = ip;
859c03c5b1cSMartin Matuska                 continue;   /* faster when present ... (?) */
860c03c5b1cSMartin Matuska     }   }   }
861c03c5b1cSMartin Matuska 
862c03c5b1cSMartin Matuska     /* Save reps for next block */
863c03c5b1cSMartin Matuska     rep[0] = offset_1 ? offset_1 : savedOffset;
864c03c5b1cSMartin Matuska     rep[1] = offset_2 ? offset_2 : savedOffset;
865c03c5b1cSMartin Matuska 
866c03c5b1cSMartin Matuska     /* Return the last literals size */
867c03c5b1cSMartin Matuska     return (size_t)(iend - anchor);
868c03c5b1cSMartin Matuska }
869c03c5b1cSMartin Matuska 
870c03c5b1cSMartin Matuska 
ZSTD_compressBlock_btlazy2(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)871c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_btlazy2(
872c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
873c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
874c03c5b1cSMartin Matuska {
875c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
876c03c5b1cSMartin Matuska }
877c03c5b1cSMartin Matuska 
ZSTD_compressBlock_lazy2(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)878c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_lazy2(
879c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
880c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
881c03c5b1cSMartin Matuska {
882c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
883c03c5b1cSMartin Matuska }
884c03c5b1cSMartin Matuska 
ZSTD_compressBlock_lazy(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)885c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_lazy(
886c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
887c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
888c03c5b1cSMartin Matuska {
889c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
890c03c5b1cSMartin Matuska }
891c03c5b1cSMartin Matuska 
ZSTD_compressBlock_greedy(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)892c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_greedy(
893c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
894c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
895c03c5b1cSMartin Matuska {
896c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
897c03c5b1cSMartin Matuska }
898c03c5b1cSMartin Matuska 
ZSTD_compressBlock_btlazy2_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)899c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_btlazy2_dictMatchState(
900c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
901c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
902c03c5b1cSMartin Matuska {
903c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
904c03c5b1cSMartin Matuska }
905c03c5b1cSMartin Matuska 
ZSTD_compressBlock_lazy2_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)906c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_lazy2_dictMatchState(
907c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
908c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
909c03c5b1cSMartin Matuska {
910c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
911c03c5b1cSMartin Matuska }
912c03c5b1cSMartin Matuska 
ZSTD_compressBlock_lazy_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)913c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_lazy_dictMatchState(
914c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
915c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
916c03c5b1cSMartin Matuska {
917c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
918c03c5b1cSMartin Matuska }
919c03c5b1cSMartin Matuska 
ZSTD_compressBlock_greedy_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)920c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_greedy_dictMatchState(
921c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
922c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
923c03c5b1cSMartin Matuska {
924c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
925c03c5b1cSMartin Matuska }
926c03c5b1cSMartin Matuska 
927c03c5b1cSMartin Matuska 
928c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE
ZSTD_compressBlock_lazy_extDict_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const searchMethod_e searchMethod,const U32 depth)929c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_lazy_extDict_generic(
930c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
931c03c5b1cSMartin Matuska                         U32 rep[ZSTD_REP_NUM],
932c03c5b1cSMartin Matuska                         const void* src, size_t srcSize,
933c03c5b1cSMartin Matuska                         const searchMethod_e searchMethod, const U32 depth)
934c03c5b1cSMartin Matuska {
935c03c5b1cSMartin Matuska     const BYTE* const istart = (const BYTE*)src;
936c03c5b1cSMartin Matuska     const BYTE* ip = istart;
937c03c5b1cSMartin Matuska     const BYTE* anchor = istart;
938c03c5b1cSMartin Matuska     const BYTE* const iend = istart + srcSize;
939c03c5b1cSMartin Matuska     const BYTE* const ilimit = iend - 8;
940c03c5b1cSMartin Matuska     const BYTE* const base = ms->window.base;
941c03c5b1cSMartin Matuska     const U32 dictLimit = ms->window.dictLimit;
942c03c5b1cSMartin Matuska     const BYTE* const prefixStart = base + dictLimit;
943c03c5b1cSMartin Matuska     const BYTE* const dictBase = ms->window.dictBase;
944c03c5b1cSMartin Matuska     const BYTE* const dictEnd  = dictBase + dictLimit;
945c03c5b1cSMartin Matuska     const BYTE* const dictStart  = dictBase + ms->window.lowLimit;
946c03c5b1cSMartin Matuska     const U32 windowLog = ms->cParams.windowLog;
947c03c5b1cSMartin Matuska 
948c03c5b1cSMartin Matuska     typedef size_t (*searchMax_f)(
949c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms,
950c03c5b1cSMartin Matuska                         const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
951c03c5b1cSMartin Matuska     searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
952c03c5b1cSMartin Matuska 
953c03c5b1cSMartin Matuska     U32 offset_1 = rep[0], offset_2 = rep[1];
954c03c5b1cSMartin Matuska 
955c03c5b1cSMartin Matuska     DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
956c03c5b1cSMartin Matuska 
957c03c5b1cSMartin Matuska     /* init */
958c03c5b1cSMartin Matuska     ip += (ip == prefixStart);
959c03c5b1cSMartin Matuska 
960c03c5b1cSMartin Matuska     /* Match Loop */
961c03c5b1cSMartin Matuska #if defined(__GNUC__) && defined(__x86_64__)
962c03c5b1cSMartin Matuska     /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
963c03c5b1cSMartin Matuska      * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
964c03c5b1cSMartin Matuska      */
965c03c5b1cSMartin Matuska     __asm__(".p2align 5");
966c03c5b1cSMartin Matuska #endif
967c03c5b1cSMartin Matuska     while (ip < ilimit) {
968c03c5b1cSMartin Matuska         size_t matchLength=0;
969c03c5b1cSMartin Matuska         size_t offset=0;
970c03c5b1cSMartin Matuska         const BYTE* start=ip+1;
971c03c5b1cSMartin Matuska         U32 current = (U32)(ip-base);
972c03c5b1cSMartin Matuska 
973c03c5b1cSMartin Matuska         /* check repCode */
974c03c5b1cSMartin Matuska         {   const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
975c03c5b1cSMartin Matuska             const U32 repIndex = (U32)(current+1 - offset_1);
976c03c5b1cSMartin Matuska             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
977c03c5b1cSMartin Matuska             const BYTE* const repMatch = repBase + repIndex;
978*c9539b89SMartin Matuska             if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */
979*c9539b89SMartin Matuska                & (offset_1 < current+1 - windowLow) ) /* note: we are searching at current+1 */
980c03c5b1cSMartin Matuska             if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
981c03c5b1cSMartin Matuska                 /* repcode detected we should take it */
982c03c5b1cSMartin Matuska                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
983c03c5b1cSMartin Matuska                 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
984c03c5b1cSMartin Matuska                 if (depth==0) goto _storeSequence;
985c03c5b1cSMartin Matuska         }   }
986c03c5b1cSMartin Matuska 
987c03c5b1cSMartin Matuska         /* first search (depth 0) */
988c03c5b1cSMartin Matuska         {   size_t offsetFound = 999999999;
989c03c5b1cSMartin Matuska             size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
990c03c5b1cSMartin Matuska             if (ml2 > matchLength)
991c03c5b1cSMartin Matuska                 matchLength = ml2, start = ip, offset=offsetFound;
992c03c5b1cSMartin Matuska         }
993c03c5b1cSMartin Matuska 
994c03c5b1cSMartin Matuska          if (matchLength < 4) {
995c03c5b1cSMartin Matuska             ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
996c03c5b1cSMartin Matuska             continue;
997c03c5b1cSMartin Matuska         }
998c03c5b1cSMartin Matuska 
999c03c5b1cSMartin Matuska         /* let's try to find a better solution */
1000c03c5b1cSMartin Matuska         if (depth>=1)
1001c03c5b1cSMartin Matuska         while (ip<ilimit) {
1002c03c5b1cSMartin Matuska             ip ++;
1003c03c5b1cSMartin Matuska             current++;
1004c03c5b1cSMartin Matuska             /* check repCode */
1005c03c5b1cSMartin Matuska             if (offset) {
1006c03c5b1cSMartin Matuska                 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
1007c03c5b1cSMartin Matuska                 const U32 repIndex = (U32)(current - offset_1);
1008c03c5b1cSMartin Matuska                 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1009c03c5b1cSMartin Matuska                 const BYTE* const repMatch = repBase + repIndex;
1010*c9539b89SMartin Matuska                     if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
1011*c9539b89SMartin Matuska                        & (offset_1 < current - windowLow) ) /* equivalent to `current > repIndex >= windowLow` */
1012c03c5b1cSMartin Matuska                 if (MEM_read32(ip) == MEM_read32(repMatch)) {
1013c03c5b1cSMartin Matuska                     /* repcode detected */
1014c03c5b1cSMartin Matuska                     const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1015c03c5b1cSMartin Matuska                     size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1016c03c5b1cSMartin Matuska                     int const gain2 = (int)(repLength * 3);
1017c03c5b1cSMartin Matuska                     int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
1018c03c5b1cSMartin Matuska                     if ((repLength >= 4) && (gain2 > gain1))
1019c03c5b1cSMartin Matuska                         matchLength = repLength, offset = 0, start = ip;
1020c03c5b1cSMartin Matuska             }   }
1021c03c5b1cSMartin Matuska 
1022c03c5b1cSMartin Matuska             /* search match, depth 1 */
1023c03c5b1cSMartin Matuska             {   size_t offset2=999999999;
1024c03c5b1cSMartin Matuska                 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1025c03c5b1cSMartin Matuska                 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
1026c03c5b1cSMartin Matuska                 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
1027c03c5b1cSMartin Matuska                 if ((ml2 >= 4) && (gain2 > gain1)) {
1028c03c5b1cSMartin Matuska                     matchLength = ml2, offset = offset2, start = ip;
1029c03c5b1cSMartin Matuska                     continue;   /* search a better one */
1030c03c5b1cSMartin Matuska             }   }
1031c03c5b1cSMartin Matuska 
1032c03c5b1cSMartin Matuska             /* let's find an even better one */
1033c03c5b1cSMartin Matuska             if ((depth==2) && (ip<ilimit)) {
1034c03c5b1cSMartin Matuska                 ip ++;
1035c03c5b1cSMartin Matuska                 current++;
1036c03c5b1cSMartin Matuska                 /* check repCode */
1037c03c5b1cSMartin Matuska                 if (offset) {
1038c03c5b1cSMartin Matuska                     const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
1039c03c5b1cSMartin Matuska                     const U32 repIndex = (U32)(current - offset_1);
1040c03c5b1cSMartin Matuska                     const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1041c03c5b1cSMartin Matuska                     const BYTE* const repMatch = repBase + repIndex;
1042*c9539b89SMartin Matuska                 if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
1043*c9539b89SMartin Matuska                    & (offset_1 < current - windowLow) ) /* equivalent to `current > repIndex >= windowLow` */
1044c03c5b1cSMartin Matuska                     if (MEM_read32(ip) == MEM_read32(repMatch)) {
1045c03c5b1cSMartin Matuska                         /* repcode detected */
1046c03c5b1cSMartin Matuska                         const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1047c03c5b1cSMartin Matuska                         size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1048c03c5b1cSMartin Matuska                         int const gain2 = (int)(repLength * 4);
1049c03c5b1cSMartin Matuska                         int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
1050c03c5b1cSMartin Matuska                         if ((repLength >= 4) && (gain2 > gain1))
1051c03c5b1cSMartin Matuska                             matchLength = repLength, offset = 0, start = ip;
1052c03c5b1cSMartin Matuska                 }   }
1053c03c5b1cSMartin Matuska 
1054c03c5b1cSMartin Matuska                 /* search match, depth 2 */
1055c03c5b1cSMartin Matuska                 {   size_t offset2=999999999;
1056c03c5b1cSMartin Matuska                     size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1057c03c5b1cSMartin Matuska                     int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
1058c03c5b1cSMartin Matuska                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
1059c03c5b1cSMartin Matuska                     if ((ml2 >= 4) && (gain2 > gain1)) {
1060c03c5b1cSMartin Matuska                         matchLength = ml2, offset = offset2, start = ip;
1061c03c5b1cSMartin Matuska                         continue;
1062c03c5b1cSMartin Matuska             }   }   }
1063c03c5b1cSMartin Matuska             break;  /* nothing found : store previous solution */
1064c03c5b1cSMartin Matuska         }
1065c03c5b1cSMartin Matuska 
1066c03c5b1cSMartin Matuska         /* catch up */
1067c03c5b1cSMartin Matuska         if (offset) {
1068c03c5b1cSMartin Matuska             U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
1069c03c5b1cSMartin Matuska             const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
1070c03c5b1cSMartin Matuska             const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
1071c03c5b1cSMartin Matuska             while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
1072c03c5b1cSMartin Matuska             offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
1073c03c5b1cSMartin Matuska         }
1074c03c5b1cSMartin Matuska 
1075c03c5b1cSMartin Matuska         /* store sequence */
1076c03c5b1cSMartin Matuska _storeSequence:
1077c03c5b1cSMartin Matuska         {   size_t const litLength = start - anchor;
1078c03c5b1cSMartin Matuska             ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
1079c03c5b1cSMartin Matuska             anchor = ip = start + matchLength;
1080c03c5b1cSMartin Matuska         }
1081c03c5b1cSMartin Matuska 
1082c03c5b1cSMartin Matuska         /* check immediate repcode */
1083c03c5b1cSMartin Matuska         while (ip <= ilimit) {
1084c03c5b1cSMartin Matuska             const U32 repCurrent = (U32)(ip-base);
1085c03c5b1cSMartin Matuska             const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
1086c03c5b1cSMartin Matuska             const U32 repIndex = repCurrent - offset_2;
1087c03c5b1cSMartin Matuska             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1088c03c5b1cSMartin Matuska             const BYTE* const repMatch = repBase + repIndex;
1089*c9539b89SMartin Matuska             if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
1090*c9539b89SMartin Matuska                & (offset_2 < repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
1091c03c5b1cSMartin Matuska             if (MEM_read32(ip) == MEM_read32(repMatch)) {
1092c03c5b1cSMartin Matuska                 /* repcode detected we should take it */
1093c03c5b1cSMartin Matuska                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1094c03c5b1cSMartin Matuska                 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1095c03c5b1cSMartin Matuska                 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
1096c03c5b1cSMartin Matuska                 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
1097c03c5b1cSMartin Matuska                 ip += matchLength;
1098c03c5b1cSMartin Matuska                 anchor = ip;
1099c03c5b1cSMartin Matuska                 continue;   /* faster when present ... (?) */
1100c03c5b1cSMartin Matuska             }
1101c03c5b1cSMartin Matuska             break;
1102c03c5b1cSMartin Matuska     }   }
1103c03c5b1cSMartin Matuska 
1104c03c5b1cSMartin Matuska     /* Save reps for next block */
1105c03c5b1cSMartin Matuska     rep[0] = offset_1;
1106c03c5b1cSMartin Matuska     rep[1] = offset_2;
1107c03c5b1cSMartin Matuska 
1108c03c5b1cSMartin Matuska     /* Return the last literals size */
1109c03c5b1cSMartin Matuska     return (size_t)(iend - anchor);
1110c03c5b1cSMartin Matuska }
1111c03c5b1cSMartin Matuska 
1112c03c5b1cSMartin Matuska 
ZSTD_compressBlock_greedy_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)1113c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_greedy_extDict(
1114c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1115c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
1116c03c5b1cSMartin Matuska {
1117c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
1118c03c5b1cSMartin Matuska }
1119c03c5b1cSMartin Matuska 
ZSTD_compressBlock_lazy_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)1120c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_lazy_extDict(
1121c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1122c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
1123c03c5b1cSMartin Matuska 
1124c03c5b1cSMartin Matuska {
1125c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
1126c03c5b1cSMartin Matuska }
1127c03c5b1cSMartin Matuska 
ZSTD_compressBlock_lazy2_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)1128c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_lazy2_extDict(
1129c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1130c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
1131c03c5b1cSMartin Matuska 
1132c03c5b1cSMartin Matuska {
1133c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
1134c03c5b1cSMartin Matuska }
1135c03c5b1cSMartin Matuska 
ZSTD_compressBlock_btlazy2_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)1136c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_btlazy2_extDict(
1137c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1138c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
1139c03c5b1cSMartin Matuska 
1140c03c5b1cSMartin Matuska {
1141c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
1142c03c5b1cSMartin Matuska }
1143