xref: /netbsd-src/external/bsd/zstd/dist/lib/compress/zstd_double_fast.c (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1*3117ece4Schristos /*
2*3117ece4Schristos  * Copyright (c) Meta Platforms, Inc. and affiliates.
3*3117ece4Schristos  * All rights reserved.
4*3117ece4Schristos  *
5*3117ece4Schristos  * This source code is licensed under both the BSD-style license (found in the
6*3117ece4Schristos  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*3117ece4Schristos  * in the COPYING file in the root directory of this source tree).
8*3117ece4Schristos  * You may select, at your option, one of the above-listed licenses.
9*3117ece4Schristos  */
10*3117ece4Schristos 
11*3117ece4Schristos #include "zstd_compress_internal.h"
12*3117ece4Schristos #include "zstd_double_fast.h"
13*3117ece4Schristos 
14*3117ece4Schristos #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
15*3117ece4Schristos 
16*3117ece4Schristos static
17*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
18*3117ece4Schristos void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
19*3117ece4Schristos                               void const* end, ZSTD_dictTableLoadMethod_e dtlm)
20*3117ece4Schristos {
21*3117ece4Schristos     const ZSTD_compressionParameters* const cParams = &ms->cParams;
22*3117ece4Schristos     U32* const hashLarge = ms->hashTable;
23*3117ece4Schristos     U32  const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
24*3117ece4Schristos     U32  const mls = cParams->minMatch;
25*3117ece4Schristos     U32* const hashSmall = ms->chainTable;
26*3117ece4Schristos     U32  const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
27*3117ece4Schristos     const BYTE* const base = ms->window.base;
28*3117ece4Schristos     const BYTE* ip = base + ms->nextToUpdate;
29*3117ece4Schristos     const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
30*3117ece4Schristos     const U32 fastHashFillStep = 3;
31*3117ece4Schristos 
32*3117ece4Schristos     /* Always insert every fastHashFillStep position into the hash tables.
33*3117ece4Schristos      * Insert the other positions into the large hash table if their entry
34*3117ece4Schristos      * is empty.
35*3117ece4Schristos      */
36*3117ece4Schristos     for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
37*3117ece4Schristos         U32 const curr = (U32)(ip - base);
38*3117ece4Schristos         U32 i;
39*3117ece4Schristos         for (i = 0; i < fastHashFillStep; ++i) {
40*3117ece4Schristos             size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
41*3117ece4Schristos             size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
42*3117ece4Schristos             if (i == 0) {
43*3117ece4Schristos                 ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
44*3117ece4Schristos             }
45*3117ece4Schristos             if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
46*3117ece4Schristos                 ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
47*3117ece4Schristos             }
48*3117ece4Schristos             /* Only load extra positions for ZSTD_dtlm_full */
49*3117ece4Schristos             if (dtlm == ZSTD_dtlm_fast)
50*3117ece4Schristos                 break;
51*3117ece4Schristos     }   }
52*3117ece4Schristos }
53*3117ece4Schristos 
54*3117ece4Schristos static
55*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
56*3117ece4Schristos void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
57*3117ece4Schristos                               void const* end, ZSTD_dictTableLoadMethod_e dtlm)
58*3117ece4Schristos {
59*3117ece4Schristos     const ZSTD_compressionParameters* const cParams = &ms->cParams;
60*3117ece4Schristos     U32* const hashLarge = ms->hashTable;
61*3117ece4Schristos     U32  const hBitsL = cParams->hashLog;
62*3117ece4Schristos     U32  const mls = cParams->minMatch;
63*3117ece4Schristos     U32* const hashSmall = ms->chainTable;
64*3117ece4Schristos     U32  const hBitsS = cParams->chainLog;
65*3117ece4Schristos     const BYTE* const base = ms->window.base;
66*3117ece4Schristos     const BYTE* ip = base + ms->nextToUpdate;
67*3117ece4Schristos     const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
68*3117ece4Schristos     const U32 fastHashFillStep = 3;
69*3117ece4Schristos 
70*3117ece4Schristos     /* Always insert every fastHashFillStep position into the hash tables.
71*3117ece4Schristos      * Insert the other positions into the large hash table if their entry
72*3117ece4Schristos      * is empty.
73*3117ece4Schristos      */
74*3117ece4Schristos     for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
75*3117ece4Schristos         U32 const curr = (U32)(ip - base);
76*3117ece4Schristos         U32 i;
77*3117ece4Schristos         for (i = 0; i < fastHashFillStep; ++i) {
78*3117ece4Schristos             size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
79*3117ece4Schristos             size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
80*3117ece4Schristos             if (i == 0)
81*3117ece4Schristos                 hashSmall[smHash] = curr + i;
82*3117ece4Schristos             if (i == 0 || hashLarge[lgHash] == 0)
83*3117ece4Schristos                 hashLarge[lgHash] = curr + i;
84*3117ece4Schristos             /* Only load extra positions for ZSTD_dtlm_full */
85*3117ece4Schristos             if (dtlm == ZSTD_dtlm_fast)
86*3117ece4Schristos                 break;
87*3117ece4Schristos         }   }
88*3117ece4Schristos }
89*3117ece4Schristos 
90*3117ece4Schristos void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
91*3117ece4Schristos                         const void* const end,
92*3117ece4Schristos                         ZSTD_dictTableLoadMethod_e dtlm,
93*3117ece4Schristos                         ZSTD_tableFillPurpose_e tfp)
94*3117ece4Schristos {
95*3117ece4Schristos     if (tfp == ZSTD_tfp_forCDict) {
96*3117ece4Schristos         ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
97*3117ece4Schristos     } else {
98*3117ece4Schristos         ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
99*3117ece4Schristos     }
100*3117ece4Schristos }
101*3117ece4Schristos 
102*3117ece4Schristos 
103*3117ece4Schristos FORCE_INLINE_TEMPLATE
104*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
105*3117ece4Schristos size_t ZSTD_compressBlock_doubleFast_noDict_generic(
106*3117ece4Schristos         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
107*3117ece4Schristos         void const* src, size_t srcSize, U32 const mls /* template */)
108*3117ece4Schristos {
109*3117ece4Schristos     ZSTD_compressionParameters const* cParams = &ms->cParams;
110*3117ece4Schristos     U32* const hashLong = ms->hashTable;
111*3117ece4Schristos     const U32 hBitsL = cParams->hashLog;
112*3117ece4Schristos     U32* const hashSmall = ms->chainTable;
113*3117ece4Schristos     const U32 hBitsS = cParams->chainLog;
114*3117ece4Schristos     const BYTE* const base = ms->window.base;
115*3117ece4Schristos     const BYTE* const istart = (const BYTE*)src;
116*3117ece4Schristos     const BYTE* anchor = istart;
117*3117ece4Schristos     const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
118*3117ece4Schristos     /* presumes that, if there is a dictionary, it must be using Attach mode */
119*3117ece4Schristos     const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
120*3117ece4Schristos     const BYTE* const prefixLowest = base + prefixLowestIndex;
121*3117ece4Schristos     const BYTE* const iend = istart + srcSize;
122*3117ece4Schristos     const BYTE* const ilimit = iend - HASH_READ_SIZE;
123*3117ece4Schristos     U32 offset_1=rep[0], offset_2=rep[1];
124*3117ece4Schristos     U32 offsetSaved1 = 0, offsetSaved2 = 0;
125*3117ece4Schristos 
126*3117ece4Schristos     size_t mLength;
127*3117ece4Schristos     U32 offset;
128*3117ece4Schristos     U32 curr;
129*3117ece4Schristos 
130*3117ece4Schristos     /* how many positions to search before increasing step size */
131*3117ece4Schristos     const size_t kStepIncr = 1 << kSearchStrength;
132*3117ece4Schristos     /* the position at which to increment the step size if no match is found */
133*3117ece4Schristos     const BYTE* nextStep;
134*3117ece4Schristos     size_t step; /* the current step size */
135*3117ece4Schristos 
136*3117ece4Schristos     size_t hl0; /* the long hash at ip */
137*3117ece4Schristos     size_t hl1; /* the long hash at ip1 */
138*3117ece4Schristos 
139*3117ece4Schristos     U32 idxl0; /* the long match index for ip */
140*3117ece4Schristos     U32 idxl1; /* the long match index for ip1 */
141*3117ece4Schristos 
142*3117ece4Schristos     const BYTE* matchl0; /* the long match for ip */
143*3117ece4Schristos     const BYTE* matchs0; /* the short match for ip */
144*3117ece4Schristos     const BYTE* matchl1; /* the long match for ip1 */
145*3117ece4Schristos 
146*3117ece4Schristos     const BYTE* ip = istart; /* the current position */
147*3117ece4Schristos     const BYTE* ip1; /* the next position */
148*3117ece4Schristos 
149*3117ece4Schristos     DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");
150*3117ece4Schristos 
151*3117ece4Schristos     /* init */
152*3117ece4Schristos     ip += ((ip - prefixLowest) == 0);
153*3117ece4Schristos     {
154*3117ece4Schristos         U32 const current = (U32)(ip - base);
155*3117ece4Schristos         U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
156*3117ece4Schristos         U32 const maxRep = current - windowLow;
157*3117ece4Schristos         if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
158*3117ece4Schristos         if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
159*3117ece4Schristos     }
160*3117ece4Schristos 
161*3117ece4Schristos     /* Outer Loop: one iteration per match found and stored */
162*3117ece4Schristos     while (1) {
163*3117ece4Schristos         step = 1;
164*3117ece4Schristos         nextStep = ip + kStepIncr;
165*3117ece4Schristos         ip1 = ip + step;
166*3117ece4Schristos 
167*3117ece4Schristos         if (ip1 > ilimit) {
168*3117ece4Schristos             goto _cleanup;
169*3117ece4Schristos         }
170*3117ece4Schristos 
171*3117ece4Schristos         hl0 = ZSTD_hashPtr(ip, hBitsL, 8);
172*3117ece4Schristos         idxl0 = hashLong[hl0];
173*3117ece4Schristos         matchl0 = base + idxl0;
174*3117ece4Schristos 
175*3117ece4Schristos         /* Inner Loop: one iteration per search / position */
176*3117ece4Schristos         do {
177*3117ece4Schristos             const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);
178*3117ece4Schristos             const U32 idxs0 = hashSmall[hs0];
179*3117ece4Schristos             curr = (U32)(ip-base);
180*3117ece4Schristos             matchs0 = base + idxs0;
181*3117ece4Schristos 
182*3117ece4Schristos             hashLong[hl0] = hashSmall[hs0] = curr;   /* update hash tables */
183*3117ece4Schristos 
184*3117ece4Schristos             /* check noDict repcode */
185*3117ece4Schristos             if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
186*3117ece4Schristos                 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
187*3117ece4Schristos                 ip++;
188*3117ece4Schristos                 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
189*3117ece4Schristos                 goto _match_stored;
190*3117ece4Schristos             }
191*3117ece4Schristos 
192*3117ece4Schristos             hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);
193*3117ece4Schristos 
194*3117ece4Schristos             if (idxl0 > prefixLowestIndex) {
195*3117ece4Schristos                 /* check prefix long match */
196*3117ece4Schristos                 if (MEM_read64(matchl0) == MEM_read64(ip)) {
197*3117ece4Schristos                     mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
198*3117ece4Schristos                     offset = (U32)(ip-matchl0);
199*3117ece4Schristos                     while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
200*3117ece4Schristos                     goto _match_found;
201*3117ece4Schristos                 }
202*3117ece4Schristos             }
203*3117ece4Schristos 
204*3117ece4Schristos             idxl1 = hashLong[hl1];
205*3117ece4Schristos             matchl1 = base + idxl1;
206*3117ece4Schristos 
207*3117ece4Schristos             if (idxs0 > prefixLowestIndex) {
208*3117ece4Schristos                 /* check prefix short match */
209*3117ece4Schristos                 if (MEM_read32(matchs0) == MEM_read32(ip)) {
210*3117ece4Schristos                     goto _search_next_long;
211*3117ece4Schristos                 }
212*3117ece4Schristos             }
213*3117ece4Schristos 
214*3117ece4Schristos             if (ip1 >= nextStep) {
215*3117ece4Schristos                 PREFETCH_L1(ip1 + 64);
216*3117ece4Schristos                 PREFETCH_L1(ip1 + 128);
217*3117ece4Schristos                 step++;
218*3117ece4Schristos                 nextStep += kStepIncr;
219*3117ece4Schristos             }
220*3117ece4Schristos             ip = ip1;
221*3117ece4Schristos             ip1 += step;
222*3117ece4Schristos 
223*3117ece4Schristos             hl0 = hl1;
224*3117ece4Schristos             idxl0 = idxl1;
225*3117ece4Schristos             matchl0 = matchl1;
226*3117ece4Schristos     #if defined(__aarch64__)
227*3117ece4Schristos             PREFETCH_L1(ip+256);
228*3117ece4Schristos     #endif
229*3117ece4Schristos         } while (ip1 <= ilimit);
230*3117ece4Schristos 
231*3117ece4Schristos _cleanup:
232*3117ece4Schristos         /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
233*3117ece4Schristos          * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
234*3117ece4Schristos         offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
235*3117ece4Schristos 
236*3117ece4Schristos         /* save reps for next block */
237*3117ece4Schristos         rep[0] = offset_1 ? offset_1 : offsetSaved1;
238*3117ece4Schristos         rep[1] = offset_2 ? offset_2 : offsetSaved2;
239*3117ece4Schristos 
240*3117ece4Schristos         /* Return the last literals size */
241*3117ece4Schristos         return (size_t)(iend - anchor);
242*3117ece4Schristos 
243*3117ece4Schristos _search_next_long:
244*3117ece4Schristos 
245*3117ece4Schristos         /* check prefix long +1 match */
246*3117ece4Schristos         if (idxl1 > prefixLowestIndex) {
247*3117ece4Schristos             if (MEM_read64(matchl1) == MEM_read64(ip1)) {
248*3117ece4Schristos                 ip = ip1;
249*3117ece4Schristos                 mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;
250*3117ece4Schristos                 offset = (U32)(ip-matchl1);
251*3117ece4Schristos                 while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */
252*3117ece4Schristos                 goto _match_found;
253*3117ece4Schristos             }
254*3117ece4Schristos         }
255*3117ece4Schristos 
256*3117ece4Schristos         /* if no long +1 match, explore the short match we found */
257*3117ece4Schristos         mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
258*3117ece4Schristos         offset = (U32)(ip - matchs0);
259*3117ece4Schristos         while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */
260*3117ece4Schristos 
261*3117ece4Schristos         /* fall-through */
262*3117ece4Schristos 
263*3117ece4Schristos _match_found: /* requires ip, offset, mLength */
264*3117ece4Schristos         offset_2 = offset_1;
265*3117ece4Schristos         offset_1 = offset;
266*3117ece4Schristos 
267*3117ece4Schristos         if (step < 4) {
268*3117ece4Schristos             /* It is unsafe to write this value back to the hashtable when ip1 is
269*3117ece4Schristos              * greater than or equal to the new ip we will have after we're done
270*3117ece4Schristos              * processing this match. Rather than perform that test directly
271*3117ece4Schristos              * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler
272*3117ece4Schristos              * more predictable test. The minmatch even if we take a short match is
273*3117ece4Schristos              * 4 bytes, so as long as step, the distance between ip and ip1
274*3117ece4Schristos              * (initially) is less than 4, we know ip1 < new ip. */
275*3117ece4Schristos             hashLong[hl1] = (U32)(ip1 - base);
276*3117ece4Schristos         }
277*3117ece4Schristos 
278*3117ece4Schristos         ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
279*3117ece4Schristos 
280*3117ece4Schristos _match_stored:
281*3117ece4Schristos         /* match found */
282*3117ece4Schristos         ip += mLength;
283*3117ece4Schristos         anchor = ip;
284*3117ece4Schristos 
285*3117ece4Schristos         if (ip <= ilimit) {
286*3117ece4Schristos             /* Complementary insertion */
287*3117ece4Schristos             /* done after iLimit test, as candidates could be > iend-8 */
288*3117ece4Schristos             {   U32 const indexToInsert = curr+2;
289*3117ece4Schristos                 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
290*3117ece4Schristos                 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
291*3117ece4Schristos                 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
292*3117ece4Schristos                 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
293*3117ece4Schristos             }
294*3117ece4Schristos 
295*3117ece4Schristos             /* check immediate repcode */
296*3117ece4Schristos             while ( (ip <= ilimit)
297*3117ece4Schristos                  && ( (offset_2>0)
298*3117ece4Schristos                     & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
299*3117ece4Schristos                 /* store sequence */
300*3117ece4Schristos                 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
301*3117ece4Schristos                 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
302*3117ece4Schristos                 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
303*3117ece4Schristos                 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
304*3117ece4Schristos                 ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
305*3117ece4Schristos                 ip += rLength;
306*3117ece4Schristos                 anchor = ip;
307*3117ece4Schristos                 continue;   /* faster when present ... (?) */
308*3117ece4Schristos             }
309*3117ece4Schristos         }
310*3117ece4Schristos     }
311*3117ece4Schristos }
312*3117ece4Schristos 
313*3117ece4Schristos 
314*3117ece4Schristos FORCE_INLINE_TEMPLATE
315*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
316*3117ece4Schristos size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
317*3117ece4Schristos         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
318*3117ece4Schristos         void const* src, size_t srcSize,
319*3117ece4Schristos         U32 const mls /* template */)
320*3117ece4Schristos {
321*3117ece4Schristos     ZSTD_compressionParameters const* cParams = &ms->cParams;
322*3117ece4Schristos     U32* const hashLong = ms->hashTable;
323*3117ece4Schristos     const U32 hBitsL = cParams->hashLog;
324*3117ece4Schristos     U32* const hashSmall = ms->chainTable;
325*3117ece4Schristos     const U32 hBitsS = cParams->chainLog;
326*3117ece4Schristos     const BYTE* const base = ms->window.base;
327*3117ece4Schristos     const BYTE* const istart = (const BYTE*)src;
328*3117ece4Schristos     const BYTE* ip = istart;
329*3117ece4Schristos     const BYTE* anchor = istart;
330*3117ece4Schristos     const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
331*3117ece4Schristos     /* presumes that, if there is a dictionary, it must be using Attach mode */
332*3117ece4Schristos     const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
333*3117ece4Schristos     const BYTE* const prefixLowest = base + prefixLowestIndex;
334*3117ece4Schristos     const BYTE* const iend = istart + srcSize;
335*3117ece4Schristos     const BYTE* const ilimit = iend - HASH_READ_SIZE;
336*3117ece4Schristos     U32 offset_1=rep[0], offset_2=rep[1];
337*3117ece4Schristos 
338*3117ece4Schristos     const ZSTD_matchState_t* const dms = ms->dictMatchState;
339*3117ece4Schristos     const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
340*3117ece4Schristos     const U32* const dictHashLong  = dms->hashTable;
341*3117ece4Schristos     const U32* const dictHashSmall = dms->chainTable;
342*3117ece4Schristos     const U32 dictStartIndex       = dms->window.dictLimit;
343*3117ece4Schristos     const BYTE* const dictBase     = dms->window.base;
344*3117ece4Schristos     const BYTE* const dictStart    = dictBase + dictStartIndex;
345*3117ece4Schristos     const BYTE* const dictEnd      = dms->window.nextSrc;
346*3117ece4Schristos     const U32 dictIndexDelta       = prefixLowestIndex - (U32)(dictEnd - dictBase);
347*3117ece4Schristos     const U32 dictHBitsL           = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
348*3117ece4Schristos     const U32 dictHBitsS           = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
349*3117ece4Schristos     const U32 dictAndPrefixLength  = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
350*3117ece4Schristos 
351*3117ece4Schristos     DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
352*3117ece4Schristos 
353*3117ece4Schristos     /* if a dictionary is attached, it must be within window range */
354*3117ece4Schristos     assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
355*3117ece4Schristos 
356*3117ece4Schristos     if (ms->prefetchCDictTables) {
357*3117ece4Schristos         size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
358*3117ece4Schristos         size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
359*3117ece4Schristos         PREFETCH_AREA(dictHashLong, hashTableBytes);
360*3117ece4Schristos         PREFETCH_AREA(dictHashSmall, chainTableBytes);
361*3117ece4Schristos     }
362*3117ece4Schristos 
363*3117ece4Schristos     /* init */
364*3117ece4Schristos     ip += (dictAndPrefixLength == 0);
365*3117ece4Schristos 
366*3117ece4Schristos     /* dictMatchState repCode checks don't currently handle repCode == 0
367*3117ece4Schristos      * disabling. */
368*3117ece4Schristos     assert(offset_1 <= dictAndPrefixLength);
369*3117ece4Schristos     assert(offset_2 <= dictAndPrefixLength);
370*3117ece4Schristos 
371*3117ece4Schristos     /* Main Search Loop */
372*3117ece4Schristos     while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
373*3117ece4Schristos         size_t mLength;
374*3117ece4Schristos         U32 offset;
375*3117ece4Schristos         size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
376*3117ece4Schristos         size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
377*3117ece4Schristos         size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
378*3117ece4Schristos         size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
379*3117ece4Schristos         U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
380*3117ece4Schristos         U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
381*3117ece4Schristos         int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
382*3117ece4Schristos         int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
383*3117ece4Schristos         U32 const curr = (U32)(ip-base);
384*3117ece4Schristos         U32 const matchIndexL = hashLong[h2];
385*3117ece4Schristos         U32 matchIndexS = hashSmall[h];
386*3117ece4Schristos         const BYTE* matchLong = base + matchIndexL;
387*3117ece4Schristos         const BYTE* match = base + matchIndexS;
388*3117ece4Schristos         const U32 repIndex = curr + 1 - offset_1;
389*3117ece4Schristos         const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
390*3117ece4Schristos                                dictBase + (repIndex - dictIndexDelta) :
391*3117ece4Schristos                                base + repIndex;
392*3117ece4Schristos         hashLong[h2] = hashSmall[h] = curr;   /* update hash tables */
393*3117ece4Schristos 
394*3117ece4Schristos         /* check repcode */
395*3117ece4Schristos         if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
396*3117ece4Schristos             && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
397*3117ece4Schristos             const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
398*3117ece4Schristos             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
399*3117ece4Schristos             ip++;
400*3117ece4Schristos             ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
401*3117ece4Schristos             goto _match_stored;
402*3117ece4Schristos         }
403*3117ece4Schristos 
404*3117ece4Schristos         if (matchIndexL > prefixLowestIndex) {
405*3117ece4Schristos             /* check prefix long match */
406*3117ece4Schristos             if (MEM_read64(matchLong) == MEM_read64(ip)) {
407*3117ece4Schristos                 mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
408*3117ece4Schristos                 offset = (U32)(ip-matchLong);
409*3117ece4Schristos                 while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
410*3117ece4Schristos                 goto _match_found;
411*3117ece4Schristos             }
412*3117ece4Schristos         } else if (dictTagsMatchL) {
413*3117ece4Schristos             /* check dictMatchState long match */
414*3117ece4Schristos             U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
415*3117ece4Schristos             const BYTE* dictMatchL = dictBase + dictMatchIndexL;
416*3117ece4Schristos             assert(dictMatchL < dictEnd);
417*3117ece4Schristos 
418*3117ece4Schristos             if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
419*3117ece4Schristos                 mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
420*3117ece4Schristos                 offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
421*3117ece4Schristos                 while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
422*3117ece4Schristos                 goto _match_found;
423*3117ece4Schristos         }   }
424*3117ece4Schristos 
425*3117ece4Schristos         if (matchIndexS > prefixLowestIndex) {
426*3117ece4Schristos             /* check prefix short match */
427*3117ece4Schristos             if (MEM_read32(match) == MEM_read32(ip)) {
428*3117ece4Schristos                 goto _search_next_long;
429*3117ece4Schristos             }
430*3117ece4Schristos         } else if (dictTagsMatchS) {
431*3117ece4Schristos             /* check dictMatchState short match */
432*3117ece4Schristos             U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
433*3117ece4Schristos             match = dictBase + dictMatchIndexS;
434*3117ece4Schristos             matchIndexS = dictMatchIndexS + dictIndexDelta;
435*3117ece4Schristos 
436*3117ece4Schristos             if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
437*3117ece4Schristos                 goto _search_next_long;
438*3117ece4Schristos         }   }
439*3117ece4Schristos 
440*3117ece4Schristos         ip += ((ip-anchor) >> kSearchStrength) + 1;
441*3117ece4Schristos #if defined(__aarch64__)
442*3117ece4Schristos         PREFETCH_L1(ip+256);
443*3117ece4Schristos #endif
444*3117ece4Schristos         continue;
445*3117ece4Schristos 
446*3117ece4Schristos _search_next_long:
447*3117ece4Schristos         {   size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
448*3117ece4Schristos             size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
449*3117ece4Schristos             U32 const matchIndexL3 = hashLong[hl3];
450*3117ece4Schristos             U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
451*3117ece4Schristos             int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
452*3117ece4Schristos             const BYTE* matchL3 = base + matchIndexL3;
453*3117ece4Schristos             hashLong[hl3] = curr + 1;
454*3117ece4Schristos 
455*3117ece4Schristos             /* check prefix long +1 match */
456*3117ece4Schristos             if (matchIndexL3 > prefixLowestIndex) {
457*3117ece4Schristos                 if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
458*3117ece4Schristos                     mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
459*3117ece4Schristos                     ip++;
460*3117ece4Schristos                     offset = (U32)(ip-matchL3);
461*3117ece4Schristos                     while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
462*3117ece4Schristos                     goto _match_found;
463*3117ece4Schristos                 }
464*3117ece4Schristos             } else if (dictTagsMatchL3) {
465*3117ece4Schristos                 /* check dict long +1 match */
466*3117ece4Schristos                 U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
467*3117ece4Schristos                 const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
468*3117ece4Schristos                 assert(dictMatchL3 < dictEnd);
469*3117ece4Schristos                 if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
470*3117ece4Schristos                     mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
471*3117ece4Schristos                     ip++;
472*3117ece4Schristos                     offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
473*3117ece4Schristos                     while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
474*3117ece4Schristos                     goto _match_found;
475*3117ece4Schristos         }   }   }
476*3117ece4Schristos 
477*3117ece4Schristos         /* if no long +1 match, explore the short match we found */
478*3117ece4Schristos         if (matchIndexS < prefixLowestIndex) {
479*3117ece4Schristos             mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
480*3117ece4Schristos             offset = (U32)(curr - matchIndexS);
481*3117ece4Schristos             while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
482*3117ece4Schristos         } else {
483*3117ece4Schristos             mLength = ZSTD_count(ip+4, match+4, iend) + 4;
484*3117ece4Schristos             offset = (U32)(ip - match);
485*3117ece4Schristos             while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
486*3117ece4Schristos         }
487*3117ece4Schristos 
488*3117ece4Schristos _match_found:
489*3117ece4Schristos         offset_2 = offset_1;
490*3117ece4Schristos         offset_1 = offset;
491*3117ece4Schristos 
492*3117ece4Schristos         ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
493*3117ece4Schristos 
494*3117ece4Schristos _match_stored:
495*3117ece4Schristos         /* match found */
496*3117ece4Schristos         ip += mLength;
497*3117ece4Schristos         anchor = ip;
498*3117ece4Schristos 
499*3117ece4Schristos         if (ip <= ilimit) {
500*3117ece4Schristos             /* Complementary insertion */
501*3117ece4Schristos             /* done after iLimit test, as candidates could be > iend-8 */
502*3117ece4Schristos             {   U32 const indexToInsert = curr+2;
503*3117ece4Schristos                 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
504*3117ece4Schristos                 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
505*3117ece4Schristos                 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
506*3117ece4Schristos                 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
507*3117ece4Schristos             }
508*3117ece4Schristos 
509*3117ece4Schristos             /* check immediate repcode */
510*3117ece4Schristos             while (ip <= ilimit) {
511*3117ece4Schristos                 U32 const current2 = (U32)(ip-base);
512*3117ece4Schristos                 U32 const repIndex2 = current2 - offset_2;
513*3117ece4Schristos                 const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
514*3117ece4Schristos                         dictBase + repIndex2 - dictIndexDelta :
515*3117ece4Schristos                         base + repIndex2;
516*3117ece4Schristos                 if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
517*3117ece4Schristos                    && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
518*3117ece4Schristos                     const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
519*3117ece4Schristos                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
520*3117ece4Schristos                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
521*3117ece4Schristos                     ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
522*3117ece4Schristos                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
523*3117ece4Schristos                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
524*3117ece4Schristos                     ip += repLength2;
525*3117ece4Schristos                     anchor = ip;
526*3117ece4Schristos                     continue;
527*3117ece4Schristos                 }
528*3117ece4Schristos                 break;
529*3117ece4Schristos             }
530*3117ece4Schristos         }
531*3117ece4Schristos     }   /* while (ip < ilimit) */
532*3117ece4Schristos 
533*3117ece4Schristos     /* save reps for next block */
534*3117ece4Schristos     rep[0] = offset_1;
535*3117ece4Schristos     rep[1] = offset_2;
536*3117ece4Schristos 
537*3117ece4Schristos     /* Return the last literals size */
538*3117ece4Schristos     return (size_t)(iend - anchor);
539*3117ece4Schristos }
540*3117ece4Schristos 
541*3117ece4Schristos #define ZSTD_GEN_DFAST_FN(dictMode, mls)                                                                 \
542*3117ece4Schristos     static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls(                                      \
543*3117ece4Schristos             ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],                          \
544*3117ece4Schristos             void const* src, size_t srcSize)                                                             \
545*3117ece4Schristos     {                                                                                                    \
546*3117ece4Schristos         return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
547*3117ece4Schristos     }
548*3117ece4Schristos 
549*3117ece4Schristos ZSTD_GEN_DFAST_FN(noDict, 4)
550*3117ece4Schristos ZSTD_GEN_DFAST_FN(noDict, 5)
551*3117ece4Schristos ZSTD_GEN_DFAST_FN(noDict, 6)
552*3117ece4Schristos ZSTD_GEN_DFAST_FN(noDict, 7)
553*3117ece4Schristos 
554*3117ece4Schristos ZSTD_GEN_DFAST_FN(dictMatchState, 4)
555*3117ece4Schristos ZSTD_GEN_DFAST_FN(dictMatchState, 5)
556*3117ece4Schristos ZSTD_GEN_DFAST_FN(dictMatchState, 6)
557*3117ece4Schristos ZSTD_GEN_DFAST_FN(dictMatchState, 7)
558*3117ece4Schristos 
559*3117ece4Schristos 
560*3117ece4Schristos size_t ZSTD_compressBlock_doubleFast(
561*3117ece4Schristos         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
562*3117ece4Schristos         void const* src, size_t srcSize)
563*3117ece4Schristos {
564*3117ece4Schristos     const U32 mls = ms->cParams.minMatch;
565*3117ece4Schristos     switch(mls)
566*3117ece4Schristos     {
567*3117ece4Schristos     default: /* includes case 3 */
568*3117ece4Schristos     case 4 :
569*3117ece4Schristos         return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
570*3117ece4Schristos     case 5 :
571*3117ece4Schristos         return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
572*3117ece4Schristos     case 6 :
573*3117ece4Schristos         return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
574*3117ece4Schristos     case 7 :
575*3117ece4Schristos         return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
576*3117ece4Schristos     }
577*3117ece4Schristos }
578*3117ece4Schristos 
579*3117ece4Schristos 
580*3117ece4Schristos size_t ZSTD_compressBlock_doubleFast_dictMatchState(
581*3117ece4Schristos         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
582*3117ece4Schristos         void const* src, size_t srcSize)
583*3117ece4Schristos {
584*3117ece4Schristos     const U32 mls = ms->cParams.minMatch;
585*3117ece4Schristos     switch(mls)
586*3117ece4Schristos     {
587*3117ece4Schristos     default: /* includes case 3 */
588*3117ece4Schristos     case 4 :
589*3117ece4Schristos         return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
590*3117ece4Schristos     case 5 :
591*3117ece4Schristos         return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
592*3117ece4Schristos     case 6 :
593*3117ece4Schristos         return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
594*3117ece4Schristos     case 7 :
595*3117ece4Schristos         return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
596*3117ece4Schristos     }
597*3117ece4Schristos }
598*3117ece4Schristos 
599*3117ece4Schristos 
600*3117ece4Schristos static
601*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
602*3117ece4Schristos size_t ZSTD_compressBlock_doubleFast_extDict_generic(
603*3117ece4Schristos         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
604*3117ece4Schristos         void const* src, size_t srcSize,
605*3117ece4Schristos         U32 const mls /* template */)
606*3117ece4Schristos {
607*3117ece4Schristos     ZSTD_compressionParameters const* cParams = &ms->cParams;
608*3117ece4Schristos     U32* const hashLong = ms->hashTable;
609*3117ece4Schristos     U32  const hBitsL = cParams->hashLog;
610*3117ece4Schristos     U32* const hashSmall = ms->chainTable;
611*3117ece4Schristos     U32  const hBitsS = cParams->chainLog;
612*3117ece4Schristos     const BYTE* const istart = (const BYTE*)src;
613*3117ece4Schristos     const BYTE* ip = istart;
614*3117ece4Schristos     const BYTE* anchor = istart;
615*3117ece4Schristos     const BYTE* const iend = istart + srcSize;
616*3117ece4Schristos     const BYTE* const ilimit = iend - 8;
617*3117ece4Schristos     const BYTE* const base = ms->window.base;
618*3117ece4Schristos     const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
619*3117ece4Schristos     const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
620*3117ece4Schristos     const U32   dictStartIndex = lowLimit;
621*3117ece4Schristos     const U32   dictLimit = ms->window.dictLimit;
622*3117ece4Schristos     const U32   prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
623*3117ece4Schristos     const BYTE* const prefixStart = base + prefixStartIndex;
624*3117ece4Schristos     const BYTE* const dictBase = ms->window.dictBase;
625*3117ece4Schristos     const BYTE* const dictStart = dictBase + dictStartIndex;
626*3117ece4Schristos     const BYTE* const dictEnd = dictBase + prefixStartIndex;
627*3117ece4Schristos     U32 offset_1=rep[0], offset_2=rep[1];
628*3117ece4Schristos 
629*3117ece4Schristos     DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
630*3117ece4Schristos 
631*3117ece4Schristos     /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
632*3117ece4Schristos     if (prefixStartIndex == dictStartIndex)
633*3117ece4Schristos         return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
634*3117ece4Schristos 
635*3117ece4Schristos     /* Search Loop */
636*3117ece4Schristos     while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
637*3117ece4Schristos         const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
638*3117ece4Schristos         const U32 matchIndex = hashSmall[hSmall];
639*3117ece4Schristos         const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
640*3117ece4Schristos         const BYTE* match = matchBase + matchIndex;
641*3117ece4Schristos 
642*3117ece4Schristos         const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
643*3117ece4Schristos         const U32 matchLongIndex = hashLong[hLong];
644*3117ece4Schristos         const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
645*3117ece4Schristos         const BYTE* matchLong = matchLongBase + matchLongIndex;
646*3117ece4Schristos 
647*3117ece4Schristos         const U32 curr = (U32)(ip-base);
648*3117ece4Schristos         const U32 repIndex = curr + 1 - offset_1;   /* offset_1 expected <= curr +1 */
649*3117ece4Schristos         const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
650*3117ece4Schristos         const BYTE* const repMatch = repBase + repIndex;
651*3117ece4Schristos         size_t mLength;
652*3117ece4Schristos         hashSmall[hSmall] = hashLong[hLong] = curr;   /* update hash table */
653*3117ece4Schristos 
654*3117ece4Schristos         if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
655*3117ece4Schristos             & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
656*3117ece4Schristos           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
657*3117ece4Schristos             const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
658*3117ece4Schristos             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
659*3117ece4Schristos             ip++;
660*3117ece4Schristos             ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
661*3117ece4Schristos         } else {
662*3117ece4Schristos             if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
663*3117ece4Schristos                 const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
664*3117ece4Schristos                 const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
665*3117ece4Schristos                 U32 offset;
666*3117ece4Schristos                 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
667*3117ece4Schristos                 offset = curr - matchLongIndex;
668*3117ece4Schristos                 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
669*3117ece4Schristos                 offset_2 = offset_1;
670*3117ece4Schristos                 offset_1 = offset;
671*3117ece4Schristos                 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
672*3117ece4Schristos 
673*3117ece4Schristos             } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
674*3117ece4Schristos                 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
675*3117ece4Schristos                 U32 const matchIndex3 = hashLong[h3];
676*3117ece4Schristos                 const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
677*3117ece4Schristos                 const BYTE* match3 = match3Base + matchIndex3;
678*3117ece4Schristos                 U32 offset;
679*3117ece4Schristos                 hashLong[h3] = curr + 1;
680*3117ece4Schristos                 if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
681*3117ece4Schristos                     const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
682*3117ece4Schristos                     const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
683*3117ece4Schristos                     mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
684*3117ece4Schristos                     ip++;
685*3117ece4Schristos                     offset = curr+1 - matchIndex3;
686*3117ece4Schristos                     while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
687*3117ece4Schristos                 } else {
688*3117ece4Schristos                     const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
689*3117ece4Schristos                     const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
690*3117ece4Schristos                     mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
691*3117ece4Schristos                     offset = curr - matchIndex;
692*3117ece4Schristos                     while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
693*3117ece4Schristos                 }
694*3117ece4Schristos                 offset_2 = offset_1;
695*3117ece4Schristos                 offset_1 = offset;
696*3117ece4Schristos                 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
697*3117ece4Schristos 
698*3117ece4Schristos             } else {
699*3117ece4Schristos                 ip += ((ip-anchor) >> kSearchStrength) + 1;
700*3117ece4Schristos                 continue;
701*3117ece4Schristos         }   }
702*3117ece4Schristos 
703*3117ece4Schristos         /* move to next sequence start */
704*3117ece4Schristos         ip += mLength;
705*3117ece4Schristos         anchor = ip;
706*3117ece4Schristos 
707*3117ece4Schristos         if (ip <= ilimit) {
708*3117ece4Schristos             /* Complementary insertion */
709*3117ece4Schristos             /* done after iLimit test, as candidates could be > iend-8 */
710*3117ece4Schristos             {   U32 const indexToInsert = curr+2;
711*3117ece4Schristos                 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
712*3117ece4Schristos                 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
713*3117ece4Schristos                 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
714*3117ece4Schristos                 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
715*3117ece4Schristos             }
716*3117ece4Schristos 
717*3117ece4Schristos             /* check immediate repcode */
718*3117ece4Schristos             while (ip <= ilimit) {
719*3117ece4Schristos                 U32 const current2 = (U32)(ip-base);
720*3117ece4Schristos                 U32 const repIndex2 = current2 - offset_2;
721*3117ece4Schristos                 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
722*3117ece4Schristos                 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
723*3117ece4Schristos                     & (offset_2 <= current2 - dictStartIndex))
724*3117ece4Schristos                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
725*3117ece4Schristos                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
726*3117ece4Schristos                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
727*3117ece4Schristos                     U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
728*3117ece4Schristos                     ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
729*3117ece4Schristos                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
730*3117ece4Schristos                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
731*3117ece4Schristos                     ip += repLength2;
732*3117ece4Schristos                     anchor = ip;
733*3117ece4Schristos                     continue;
734*3117ece4Schristos                 }
735*3117ece4Schristos                 break;
736*3117ece4Schristos     }   }   }
737*3117ece4Schristos 
738*3117ece4Schristos     /* save reps for next block */
739*3117ece4Schristos     rep[0] = offset_1;
740*3117ece4Schristos     rep[1] = offset_2;
741*3117ece4Schristos 
742*3117ece4Schristos     /* Return the last literals size */
743*3117ece4Schristos     return (size_t)(iend - anchor);
744*3117ece4Schristos }
745*3117ece4Schristos 
746*3117ece4Schristos ZSTD_GEN_DFAST_FN(extDict, 4)
747*3117ece4Schristos ZSTD_GEN_DFAST_FN(extDict, 5)
748*3117ece4Schristos ZSTD_GEN_DFAST_FN(extDict, 6)
749*3117ece4Schristos ZSTD_GEN_DFAST_FN(extDict, 7)
750*3117ece4Schristos 
751*3117ece4Schristos size_t ZSTD_compressBlock_doubleFast_extDict(
752*3117ece4Schristos         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
753*3117ece4Schristos         void const* src, size_t srcSize)
754*3117ece4Schristos {
755*3117ece4Schristos     U32 const mls = ms->cParams.minMatch;
756*3117ece4Schristos     switch(mls)
757*3117ece4Schristos     {
758*3117ece4Schristos     default: /* includes case 3 */
759*3117ece4Schristos     case 4 :
760*3117ece4Schristos         return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
761*3117ece4Schristos     case 5 :
762*3117ece4Schristos         return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
763*3117ece4Schristos     case 6 :
764*3117ece4Schristos         return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
765*3117ece4Schristos     case 7 :
766*3117ece4Schristos         return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
767*3117ece4Schristos     }
768*3117ece4Schristos }
769*3117ece4Schristos 
770*3117ece4Schristos #endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
771