1c03c5b1cSMartin Matuska /*
2c03c5b1cSMartin Matuska * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3c03c5b1cSMartin Matuska * All rights reserved.
4c03c5b1cSMartin Matuska *
5c03c5b1cSMartin Matuska * This source code is licensed under both the BSD-style license (found in the
6c03c5b1cSMartin Matuska * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7c03c5b1cSMartin Matuska * in the COPYING file in the root directory of this source tree).
8c03c5b1cSMartin Matuska * You may select, at your option, one of the above-listed licenses.
9c03c5b1cSMartin Matuska */
10c03c5b1cSMartin Matuska
11c03c5b1cSMartin Matuska #include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
12c03c5b1cSMartin Matuska #include "zstd_fast.h"
13c03c5b1cSMartin Matuska
14c03c5b1cSMartin Matuska
ZSTD_fillHashTable(ZSTD_matchState_t * ms,const void * const end,ZSTD_dictTableLoadMethod_e dtlm)15c03c5b1cSMartin Matuska void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
16c03c5b1cSMartin Matuska const void* const end,
17c03c5b1cSMartin Matuska ZSTD_dictTableLoadMethod_e dtlm)
18c03c5b1cSMartin Matuska {
19c03c5b1cSMartin Matuska const ZSTD_compressionParameters* const cParams = &ms->cParams;
20c03c5b1cSMartin Matuska U32* const hashTable = ms->hashTable;
21c03c5b1cSMartin Matuska U32 const hBits = cParams->hashLog;
22c03c5b1cSMartin Matuska U32 const mls = cParams->minMatch;
23c03c5b1cSMartin Matuska const BYTE* const base = ms->window.base;
24c03c5b1cSMartin Matuska const BYTE* ip = base + ms->nextToUpdate;
25c03c5b1cSMartin Matuska const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
26c03c5b1cSMartin Matuska const U32 fastHashFillStep = 3;
27c03c5b1cSMartin Matuska
28c03c5b1cSMartin Matuska /* Always insert every fastHashFillStep position into the hash table.
29c03c5b1cSMartin Matuska * Insert the other positions if their hash entry is empty.
30c03c5b1cSMartin Matuska */
31c03c5b1cSMartin Matuska for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
32c03c5b1cSMartin Matuska U32 const current = (U32)(ip - base);
33c03c5b1cSMartin Matuska size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
34c03c5b1cSMartin Matuska hashTable[hash0] = current;
35c03c5b1cSMartin Matuska if (dtlm == ZSTD_dtlm_fast) continue;
36c03c5b1cSMartin Matuska /* Only load extra positions for ZSTD_dtlm_full */
37c03c5b1cSMartin Matuska { U32 p;
38c03c5b1cSMartin Matuska for (p = 1; p < fastHashFillStep; ++p) {
39c03c5b1cSMartin Matuska size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
40c03c5b1cSMartin Matuska if (hashTable[hash] == 0) { /* not yet filled */
41c03c5b1cSMartin Matuska hashTable[hash] = current + p;
42c03c5b1cSMartin Matuska } } } }
43c03c5b1cSMartin Matuska }
44c03c5b1cSMartin Matuska
45c03c5b1cSMartin Matuska
46c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_fast_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize,U32 const mls)47c03c5b1cSMartin Matuska ZSTD_compressBlock_fast_generic(
48c03c5b1cSMartin Matuska ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
49c03c5b1cSMartin Matuska void const* src, size_t srcSize,
50c03c5b1cSMartin Matuska U32 const mls)
51c03c5b1cSMartin Matuska {
52c03c5b1cSMartin Matuska const ZSTD_compressionParameters* const cParams = &ms->cParams;
53c03c5b1cSMartin Matuska U32* const hashTable = ms->hashTable;
54c03c5b1cSMartin Matuska U32 const hlog = cParams->hashLog;
55c03c5b1cSMartin Matuska /* support stepSize of 0 */
56c03c5b1cSMartin Matuska size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
57c03c5b1cSMartin Matuska const BYTE* const base = ms->window.base;
58c03c5b1cSMartin Matuska const BYTE* const istart = (const BYTE*)src;
59c03c5b1cSMartin Matuska /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
60c03c5b1cSMartin Matuska const BYTE* ip0 = istart;
61c03c5b1cSMartin Matuska const BYTE* ip1;
62c03c5b1cSMartin Matuska const BYTE* anchor = istart;
63c03c5b1cSMartin Matuska const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
64c03c5b1cSMartin Matuska const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
65c03c5b1cSMartin Matuska const BYTE* const prefixStart = base + prefixStartIndex;
66c03c5b1cSMartin Matuska const BYTE* const iend = istart + srcSize;
67c03c5b1cSMartin Matuska const BYTE* const ilimit = iend - HASH_READ_SIZE;
68c03c5b1cSMartin Matuska U32 offset_1=rep[0], offset_2=rep[1];
69c03c5b1cSMartin Matuska U32 offsetSaved = 0;
70c03c5b1cSMartin Matuska
71c03c5b1cSMartin Matuska /* init */
72c03c5b1cSMartin Matuska DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
73c03c5b1cSMartin Matuska ip0 += (ip0 == prefixStart);
74c03c5b1cSMartin Matuska ip1 = ip0 + 1;
75c03c5b1cSMartin Matuska { U32 const current = (U32)(ip0 - base);
76c03c5b1cSMartin Matuska U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
77c03c5b1cSMartin Matuska U32 const maxRep = current - windowLow;
78c03c5b1cSMartin Matuska if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
79c03c5b1cSMartin Matuska if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
80c03c5b1cSMartin Matuska }
81c03c5b1cSMartin Matuska
82c03c5b1cSMartin Matuska /* Main Search Loop */
83c03c5b1cSMartin Matuska #ifdef __INTEL_COMPILER
84c03c5b1cSMartin Matuska /* From intel 'The vector pragma indicates that the loop should be
85c03c5b1cSMartin Matuska * vectorized if it is legal to do so'. Can be used together with
86c03c5b1cSMartin Matuska * #pragma ivdep (but have opted to exclude that because intel
87c03c5b1cSMartin Matuska * warns against using it).*/
88c03c5b1cSMartin Matuska #pragma vector always
89c03c5b1cSMartin Matuska #endif
90c03c5b1cSMartin Matuska while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */
91c03c5b1cSMartin Matuska size_t mLength;
92c03c5b1cSMartin Matuska BYTE const* ip2 = ip0 + 2;
93c03c5b1cSMartin Matuska size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls);
94c03c5b1cSMartin Matuska U32 const val0 = MEM_read32(ip0);
95c03c5b1cSMartin Matuska size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls);
96c03c5b1cSMartin Matuska U32 const val1 = MEM_read32(ip1);
97c03c5b1cSMartin Matuska U32 const current0 = (U32)(ip0-base);
98c03c5b1cSMartin Matuska U32 const current1 = (U32)(ip1-base);
99c03c5b1cSMartin Matuska U32 const matchIndex0 = hashTable[h0];
100c03c5b1cSMartin Matuska U32 const matchIndex1 = hashTable[h1];
101c03c5b1cSMartin Matuska BYTE const* repMatch = ip2 - offset_1;
102c03c5b1cSMartin Matuska const BYTE* match0 = base + matchIndex0;
103c03c5b1cSMartin Matuska const BYTE* match1 = base + matchIndex1;
104c03c5b1cSMartin Matuska U32 offcode;
105c03c5b1cSMartin Matuska
106c03c5b1cSMartin Matuska #if defined(__aarch64__)
107c03c5b1cSMartin Matuska PREFETCH_L1(ip0+256);
108c03c5b1cSMartin Matuska #endif
109c03c5b1cSMartin Matuska
110c03c5b1cSMartin Matuska hashTable[h0] = current0; /* update hash table */
111c03c5b1cSMartin Matuska hashTable[h1] = current1; /* update hash table */
112c03c5b1cSMartin Matuska
113c03c5b1cSMartin Matuska assert(ip0 + 1 == ip1);
114c03c5b1cSMartin Matuska
115c03c5b1cSMartin Matuska if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
116c03c5b1cSMartin Matuska mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
117c03c5b1cSMartin Matuska ip0 = ip2 - mLength;
118c03c5b1cSMartin Matuska match0 = repMatch - mLength;
119c03c5b1cSMartin Matuska mLength += 4;
120c03c5b1cSMartin Matuska offcode = 0;
121c03c5b1cSMartin Matuska goto _match;
122c03c5b1cSMartin Matuska }
123c03c5b1cSMartin Matuska if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) {
124c03c5b1cSMartin Matuska /* found a regular match */
125c03c5b1cSMartin Matuska goto _offset;
126c03c5b1cSMartin Matuska }
127c03c5b1cSMartin Matuska if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) {
128c03c5b1cSMartin Matuska /* found a regular match after one literal */
129c03c5b1cSMartin Matuska ip0 = ip1;
130c03c5b1cSMartin Matuska match0 = match1;
131c03c5b1cSMartin Matuska goto _offset;
132c03c5b1cSMartin Matuska }
133c03c5b1cSMartin Matuska { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
134c03c5b1cSMartin Matuska assert(step >= 2);
135c03c5b1cSMartin Matuska ip0 += step;
136c03c5b1cSMartin Matuska ip1 += step;
137c03c5b1cSMartin Matuska continue;
138c03c5b1cSMartin Matuska }
139c03c5b1cSMartin Matuska _offset: /* Requires: ip0, match0 */
140c03c5b1cSMartin Matuska /* Compute the offset code */
141c03c5b1cSMartin Matuska offset_2 = offset_1;
142c03c5b1cSMartin Matuska offset_1 = (U32)(ip0-match0);
143c03c5b1cSMartin Matuska offcode = offset_1 + ZSTD_REP_MOVE;
144c03c5b1cSMartin Matuska mLength = 4;
145c03c5b1cSMartin Matuska /* Count the backwards match length */
146c03c5b1cSMartin Matuska while (((ip0>anchor) & (match0>prefixStart))
147c03c5b1cSMartin Matuska && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
148c03c5b1cSMartin Matuska
149c03c5b1cSMartin Matuska _match: /* Requires: ip0, match0, offcode */
150c03c5b1cSMartin Matuska /* Count the forward length */
151c03c5b1cSMartin Matuska mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
152c03c5b1cSMartin Matuska ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
153c03c5b1cSMartin Matuska /* match found */
154c03c5b1cSMartin Matuska ip0 += mLength;
155c03c5b1cSMartin Matuska anchor = ip0;
156c03c5b1cSMartin Matuska
157c03c5b1cSMartin Matuska if (ip0 <= ilimit) {
158c03c5b1cSMartin Matuska /* Fill Table */
159c03c5b1cSMartin Matuska assert(base+current0+2 > istart); /* check base overflow */
160c03c5b1cSMartin Matuska hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
161c03c5b1cSMartin Matuska hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
162c03c5b1cSMartin Matuska
163c03c5b1cSMartin Matuska if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
164c03c5b1cSMartin Matuska while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
165c03c5b1cSMartin Matuska /* store sequence */
166c03c5b1cSMartin Matuska size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
167c03c5b1cSMartin Matuska { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
168c03c5b1cSMartin Matuska hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
169c03c5b1cSMartin Matuska ip0 += rLength;
170c03c5b1cSMartin Matuska ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
171c03c5b1cSMartin Matuska anchor = ip0;
172c03c5b1cSMartin Matuska continue; /* faster when present (confirmed on gcc-8) ... (?) */
173c03c5b1cSMartin Matuska } } }
174c03c5b1cSMartin Matuska ip1 = ip0 + 1;
175c03c5b1cSMartin Matuska }
176c03c5b1cSMartin Matuska
177c03c5b1cSMartin Matuska /* save reps for next block */
178c03c5b1cSMartin Matuska rep[0] = offset_1 ? offset_1 : offsetSaved;
179c03c5b1cSMartin Matuska rep[1] = offset_2 ? offset_2 : offsetSaved;
180c03c5b1cSMartin Matuska
181c03c5b1cSMartin Matuska /* Return the last literals size */
182c03c5b1cSMartin Matuska return (size_t)(iend - anchor);
183c03c5b1cSMartin Matuska }
184c03c5b1cSMartin Matuska
185c03c5b1cSMartin Matuska
ZSTD_compressBlock_fast(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)186c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_fast(
187c03c5b1cSMartin Matuska ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
188c03c5b1cSMartin Matuska void const* src, size_t srcSize)
189c03c5b1cSMartin Matuska {
190c03c5b1cSMartin Matuska U32 const mls = ms->cParams.minMatch;
191c03c5b1cSMartin Matuska assert(ms->dictMatchState == NULL);
192c03c5b1cSMartin Matuska switch(mls)
193c03c5b1cSMartin Matuska {
194c03c5b1cSMartin Matuska default: /* includes case 3 */
195c03c5b1cSMartin Matuska case 4 :
196c03c5b1cSMartin Matuska return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
197c03c5b1cSMartin Matuska case 5 :
198c03c5b1cSMartin Matuska return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
199c03c5b1cSMartin Matuska case 6 :
200c03c5b1cSMartin Matuska return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
201c03c5b1cSMartin Matuska case 7 :
202c03c5b1cSMartin Matuska return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
203c03c5b1cSMartin Matuska }
204c03c5b1cSMartin Matuska }
205c03c5b1cSMartin Matuska
206c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE
ZSTD_compressBlock_fast_dictMatchState_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize,U32 const mls)207c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_fast_dictMatchState_generic(
208c03c5b1cSMartin Matuska ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
209c03c5b1cSMartin Matuska void const* src, size_t srcSize, U32 const mls)
210c03c5b1cSMartin Matuska {
211c03c5b1cSMartin Matuska const ZSTD_compressionParameters* const cParams = &ms->cParams;
212c03c5b1cSMartin Matuska U32* const hashTable = ms->hashTable;
213c03c5b1cSMartin Matuska U32 const hlog = cParams->hashLog;
214c03c5b1cSMartin Matuska /* support stepSize of 0 */
215c03c5b1cSMartin Matuska U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
216c03c5b1cSMartin Matuska const BYTE* const base = ms->window.base;
217c03c5b1cSMartin Matuska const BYTE* const istart = (const BYTE*)src;
218c03c5b1cSMartin Matuska const BYTE* ip = istart;
219c03c5b1cSMartin Matuska const BYTE* anchor = istart;
220c03c5b1cSMartin Matuska const U32 prefixStartIndex = ms->window.dictLimit;
221c03c5b1cSMartin Matuska const BYTE* const prefixStart = base + prefixStartIndex;
222c03c5b1cSMartin Matuska const BYTE* const iend = istart + srcSize;
223c03c5b1cSMartin Matuska const BYTE* const ilimit = iend - HASH_READ_SIZE;
224c03c5b1cSMartin Matuska U32 offset_1=rep[0], offset_2=rep[1];
225c03c5b1cSMartin Matuska U32 offsetSaved = 0;
226c03c5b1cSMartin Matuska
227c03c5b1cSMartin Matuska const ZSTD_matchState_t* const dms = ms->dictMatchState;
228c03c5b1cSMartin Matuska const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
229c03c5b1cSMartin Matuska const U32* const dictHashTable = dms->hashTable;
230c03c5b1cSMartin Matuska const U32 dictStartIndex = dms->window.dictLimit;
231c03c5b1cSMartin Matuska const BYTE* const dictBase = dms->window.base;
232c03c5b1cSMartin Matuska const BYTE* const dictStart = dictBase + dictStartIndex;
233c03c5b1cSMartin Matuska const BYTE* const dictEnd = dms->window.nextSrc;
234c03c5b1cSMartin Matuska const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase);
235c03c5b1cSMartin Matuska const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
236c03c5b1cSMartin Matuska const U32 dictHLog = dictCParams->hashLog;
237c03c5b1cSMartin Matuska
238c03c5b1cSMartin Matuska /* if a dictionary is still attached, it necessarily means that
239c03c5b1cSMartin Matuska * it is within window size. So we just check it. */
240c03c5b1cSMartin Matuska const U32 maxDistance = 1U << cParams->windowLog;
241c03c5b1cSMartin Matuska const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
242c03c5b1cSMartin Matuska assert(endIndex - prefixStartIndex <= maxDistance);
243c03c5b1cSMartin Matuska (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
244c03c5b1cSMartin Matuska
245c03c5b1cSMartin Matuska /* ensure there will be no no underflow
246c03c5b1cSMartin Matuska * when translating a dict index into a local index */
247c03c5b1cSMartin Matuska assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
248c03c5b1cSMartin Matuska
249c03c5b1cSMartin Matuska /* init */
250c03c5b1cSMartin Matuska DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
251c03c5b1cSMartin Matuska ip += (dictAndPrefixLength == 0);
252c03c5b1cSMartin Matuska /* dictMatchState repCode checks don't currently handle repCode == 0
253c03c5b1cSMartin Matuska * disabling. */
254c03c5b1cSMartin Matuska assert(offset_1 <= dictAndPrefixLength);
255c03c5b1cSMartin Matuska assert(offset_2 <= dictAndPrefixLength);
256c03c5b1cSMartin Matuska
257c03c5b1cSMartin Matuska /* Main Search Loop */
258c03c5b1cSMartin Matuska while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
259c03c5b1cSMartin Matuska size_t mLength;
260c03c5b1cSMartin Matuska size_t const h = ZSTD_hashPtr(ip, hlog, mls);
261c03c5b1cSMartin Matuska U32 const current = (U32)(ip-base);
262c03c5b1cSMartin Matuska U32 const matchIndex = hashTable[h];
263c03c5b1cSMartin Matuska const BYTE* match = base + matchIndex;
264c03c5b1cSMartin Matuska const U32 repIndex = current + 1 - offset_1;
265c03c5b1cSMartin Matuska const BYTE* repMatch = (repIndex < prefixStartIndex) ?
266c03c5b1cSMartin Matuska dictBase + (repIndex - dictIndexDelta) :
267c03c5b1cSMartin Matuska base + repIndex;
268c03c5b1cSMartin Matuska hashTable[h] = current; /* update hash table */
269c03c5b1cSMartin Matuska
270c03c5b1cSMartin Matuska if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
271c03c5b1cSMartin Matuska && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
272c03c5b1cSMartin Matuska const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
273c03c5b1cSMartin Matuska mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
274c03c5b1cSMartin Matuska ip++;
275c03c5b1cSMartin Matuska ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
276c03c5b1cSMartin Matuska } else if ( (matchIndex <= prefixStartIndex) ) {
277c03c5b1cSMartin Matuska size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
278c03c5b1cSMartin Matuska U32 const dictMatchIndex = dictHashTable[dictHash];
279c03c5b1cSMartin Matuska const BYTE* dictMatch = dictBase + dictMatchIndex;
280c03c5b1cSMartin Matuska if (dictMatchIndex <= dictStartIndex ||
281c03c5b1cSMartin Matuska MEM_read32(dictMatch) != MEM_read32(ip)) {
282c03c5b1cSMartin Matuska assert(stepSize >= 1);
283c03c5b1cSMartin Matuska ip += ((ip-anchor) >> kSearchStrength) + stepSize;
284c03c5b1cSMartin Matuska continue;
285c03c5b1cSMartin Matuska } else {
286c03c5b1cSMartin Matuska /* found a dict match */
287c03c5b1cSMartin Matuska U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
288c03c5b1cSMartin Matuska mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
289c03c5b1cSMartin Matuska while (((ip>anchor) & (dictMatch>dictStart))
290c03c5b1cSMartin Matuska && (ip[-1] == dictMatch[-1])) {
291c03c5b1cSMartin Matuska ip--; dictMatch--; mLength++;
292c03c5b1cSMartin Matuska } /* catch up */
293c03c5b1cSMartin Matuska offset_2 = offset_1;
294c03c5b1cSMartin Matuska offset_1 = offset;
295c03c5b1cSMartin Matuska ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
296c03c5b1cSMartin Matuska }
297c03c5b1cSMartin Matuska } else if (MEM_read32(match) != MEM_read32(ip)) {
298c03c5b1cSMartin Matuska /* it's not a match, and we're not going to check the dictionary */
299c03c5b1cSMartin Matuska assert(stepSize >= 1);
300c03c5b1cSMartin Matuska ip += ((ip-anchor) >> kSearchStrength) + stepSize;
301c03c5b1cSMartin Matuska continue;
302c03c5b1cSMartin Matuska } else {
303c03c5b1cSMartin Matuska /* found a regular match */
304c03c5b1cSMartin Matuska U32 const offset = (U32)(ip-match);
305c03c5b1cSMartin Matuska mLength = ZSTD_count(ip+4, match+4, iend) + 4;
306c03c5b1cSMartin Matuska while (((ip>anchor) & (match>prefixStart))
307c03c5b1cSMartin Matuska && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
308c03c5b1cSMartin Matuska offset_2 = offset_1;
309c03c5b1cSMartin Matuska offset_1 = offset;
310c03c5b1cSMartin Matuska ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
311c03c5b1cSMartin Matuska }
312c03c5b1cSMartin Matuska
313c03c5b1cSMartin Matuska /* match found */
314c03c5b1cSMartin Matuska ip += mLength;
315c03c5b1cSMartin Matuska anchor = ip;
316c03c5b1cSMartin Matuska
317c03c5b1cSMartin Matuska if (ip <= ilimit) {
318c03c5b1cSMartin Matuska /* Fill Table */
319c03c5b1cSMartin Matuska assert(base+current+2 > istart); /* check base overflow */
320c03c5b1cSMartin Matuska hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
321c03c5b1cSMartin Matuska hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
322c03c5b1cSMartin Matuska
323c03c5b1cSMartin Matuska /* check immediate repcode */
324c03c5b1cSMartin Matuska while (ip <= ilimit) {
325c03c5b1cSMartin Matuska U32 const current2 = (U32)(ip-base);
326c03c5b1cSMartin Matuska U32 const repIndex2 = current2 - offset_2;
327c03c5b1cSMartin Matuska const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
328c03c5b1cSMartin Matuska dictBase - dictIndexDelta + repIndex2 :
329c03c5b1cSMartin Matuska base + repIndex2;
330c03c5b1cSMartin Matuska if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
331c03c5b1cSMartin Matuska && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
332c03c5b1cSMartin Matuska const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
333c03c5b1cSMartin Matuska size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
334c03c5b1cSMartin Matuska U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
335c03c5b1cSMartin Matuska ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
336c03c5b1cSMartin Matuska hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
337c03c5b1cSMartin Matuska ip += repLength2;
338c03c5b1cSMartin Matuska anchor = ip;
339c03c5b1cSMartin Matuska continue;
340c03c5b1cSMartin Matuska }
341c03c5b1cSMartin Matuska break;
342c03c5b1cSMartin Matuska }
343c03c5b1cSMartin Matuska }
344c03c5b1cSMartin Matuska }
345c03c5b1cSMartin Matuska
346c03c5b1cSMartin Matuska /* save reps for next block */
347c03c5b1cSMartin Matuska rep[0] = offset_1 ? offset_1 : offsetSaved;
348c03c5b1cSMartin Matuska rep[1] = offset_2 ? offset_2 : offsetSaved;
349c03c5b1cSMartin Matuska
350c03c5b1cSMartin Matuska /* Return the last literals size */
351c03c5b1cSMartin Matuska return (size_t)(iend - anchor);
352c03c5b1cSMartin Matuska }
353c03c5b1cSMartin Matuska
ZSTD_compressBlock_fast_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)354c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_fast_dictMatchState(
355c03c5b1cSMartin Matuska ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
356c03c5b1cSMartin Matuska void const* src, size_t srcSize)
357c03c5b1cSMartin Matuska {
358c03c5b1cSMartin Matuska U32 const mls = ms->cParams.minMatch;
359c03c5b1cSMartin Matuska assert(ms->dictMatchState != NULL);
360c03c5b1cSMartin Matuska switch(mls)
361c03c5b1cSMartin Matuska {
362c03c5b1cSMartin Matuska default: /* includes case 3 */
363c03c5b1cSMartin Matuska case 4 :
364c03c5b1cSMartin Matuska return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
365c03c5b1cSMartin Matuska case 5 :
366c03c5b1cSMartin Matuska return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
367c03c5b1cSMartin Matuska case 6 :
368c03c5b1cSMartin Matuska return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
369c03c5b1cSMartin Matuska case 7 :
370c03c5b1cSMartin Matuska return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
371c03c5b1cSMartin Matuska }
372c03c5b1cSMartin Matuska }
373c03c5b1cSMartin Matuska
374c03c5b1cSMartin Matuska
ZSTD_compressBlock_fast_extDict_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize,U32 const mls)375c03c5b1cSMartin Matuska static size_t ZSTD_compressBlock_fast_extDict_generic(
376c03c5b1cSMartin Matuska ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
377c03c5b1cSMartin Matuska void const* src, size_t srcSize, U32 const mls)
378c03c5b1cSMartin Matuska {
379c03c5b1cSMartin Matuska const ZSTD_compressionParameters* const cParams = &ms->cParams;
380c03c5b1cSMartin Matuska U32* const hashTable = ms->hashTable;
381c03c5b1cSMartin Matuska U32 const hlog = cParams->hashLog;
382c03c5b1cSMartin Matuska /* support stepSize of 0 */
383c03c5b1cSMartin Matuska U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
384c03c5b1cSMartin Matuska const BYTE* const base = ms->window.base;
385c03c5b1cSMartin Matuska const BYTE* const dictBase = ms->window.dictBase;
386c03c5b1cSMartin Matuska const BYTE* const istart = (const BYTE*)src;
387c03c5b1cSMartin Matuska const BYTE* ip = istart;
388c03c5b1cSMartin Matuska const BYTE* anchor = istart;
389c03c5b1cSMartin Matuska const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
390c03c5b1cSMartin Matuska const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
391c03c5b1cSMartin Matuska const U32 dictStartIndex = lowLimit;
392c03c5b1cSMartin Matuska const BYTE* const dictStart = dictBase + dictStartIndex;
393c03c5b1cSMartin Matuska const U32 dictLimit = ms->window.dictLimit;
394c03c5b1cSMartin Matuska const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
395c03c5b1cSMartin Matuska const BYTE* const prefixStart = base + prefixStartIndex;
396c03c5b1cSMartin Matuska const BYTE* const dictEnd = dictBase + prefixStartIndex;
397c03c5b1cSMartin Matuska const BYTE* const iend = istart + srcSize;
398c03c5b1cSMartin Matuska const BYTE* const ilimit = iend - 8;
399c03c5b1cSMartin Matuska U32 offset_1=rep[0], offset_2=rep[1];
400c03c5b1cSMartin Matuska
401c03c5b1cSMartin Matuska DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
402c03c5b1cSMartin Matuska
403c03c5b1cSMartin Matuska /* switch to "regular" variant if extDict is invalidated due to maxDistance */
404c03c5b1cSMartin Matuska if (prefixStartIndex == dictStartIndex)
405c03c5b1cSMartin Matuska return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
406c03c5b1cSMartin Matuska
407c03c5b1cSMartin Matuska /* Search Loop */
408c03c5b1cSMartin Matuska while (ip < ilimit) { /* < instead of <=, because (ip+1) */
409c03c5b1cSMartin Matuska const size_t h = ZSTD_hashPtr(ip, hlog, mls);
410c03c5b1cSMartin Matuska const U32 matchIndex = hashTable[h];
411c03c5b1cSMartin Matuska const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
412c03c5b1cSMartin Matuska const BYTE* match = matchBase + matchIndex;
413c03c5b1cSMartin Matuska const U32 current = (U32)(ip-base);
414c03c5b1cSMartin Matuska const U32 repIndex = current + 1 - offset_1;
415c03c5b1cSMartin Matuska const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
416c03c5b1cSMartin Matuska const BYTE* const repMatch = repBase + repIndex;
417c03c5b1cSMartin Matuska hashTable[h] = current; /* update hash table */
418c03c5b1cSMartin Matuska DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current);
419c03c5b1cSMartin Matuska
420*c9539b89SMartin Matuska if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
421*c9539b89SMartin Matuska & (offset_1 < current+1 - dictStartIndex) ) /* note: we are searching at current+1 */
422c03c5b1cSMartin Matuska && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
423c03c5b1cSMartin Matuska const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
424c03c5b1cSMartin Matuska size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
425c03c5b1cSMartin Matuska ip++;
426c03c5b1cSMartin Matuska ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
427c03c5b1cSMartin Matuska ip += rLength;
428c03c5b1cSMartin Matuska anchor = ip;
429c03c5b1cSMartin Matuska } else {
430c03c5b1cSMartin Matuska if ( (matchIndex < dictStartIndex) ||
431c03c5b1cSMartin Matuska (MEM_read32(match) != MEM_read32(ip)) ) {
432c03c5b1cSMartin Matuska assert(stepSize >= 1);
433c03c5b1cSMartin Matuska ip += ((ip-anchor) >> kSearchStrength) + stepSize;
434c03c5b1cSMartin Matuska continue;
435c03c5b1cSMartin Matuska }
436c03c5b1cSMartin Matuska { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
437c03c5b1cSMartin Matuska const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
438c03c5b1cSMartin Matuska U32 const offset = current - matchIndex;
439c03c5b1cSMartin Matuska size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
440c03c5b1cSMartin Matuska while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
441c03c5b1cSMartin Matuska offset_2 = offset_1; offset_1 = offset; /* update offset history */
442c03c5b1cSMartin Matuska ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
443c03c5b1cSMartin Matuska ip += mLength;
444c03c5b1cSMartin Matuska anchor = ip;
445c03c5b1cSMartin Matuska } }
446c03c5b1cSMartin Matuska
447c03c5b1cSMartin Matuska if (ip <= ilimit) {
448c03c5b1cSMartin Matuska /* Fill Table */
449c03c5b1cSMartin Matuska hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
450c03c5b1cSMartin Matuska hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
451c03c5b1cSMartin Matuska /* check immediate repcode */
452c03c5b1cSMartin Matuska while (ip <= ilimit) {
453c03c5b1cSMartin Matuska U32 const current2 = (U32)(ip-base);
454c03c5b1cSMartin Matuska U32 const repIndex2 = current2 - offset_2;
455c03c5b1cSMartin Matuska const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
456*c9539b89SMartin Matuska if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < current - dictStartIndex)) /* intentional overflow */
457c03c5b1cSMartin Matuska && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
458c03c5b1cSMartin Matuska const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
459c03c5b1cSMartin Matuska size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
460c03c5b1cSMartin Matuska { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
461c03c5b1cSMartin Matuska ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
462c03c5b1cSMartin Matuska hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
463c03c5b1cSMartin Matuska ip += repLength2;
464c03c5b1cSMartin Matuska anchor = ip;
465c03c5b1cSMartin Matuska continue;
466c03c5b1cSMartin Matuska }
467c03c5b1cSMartin Matuska break;
468c03c5b1cSMartin Matuska } } }
469c03c5b1cSMartin Matuska
470c03c5b1cSMartin Matuska /* save reps for next block */
471c03c5b1cSMartin Matuska rep[0] = offset_1;
472c03c5b1cSMartin Matuska rep[1] = offset_2;
473c03c5b1cSMartin Matuska
474c03c5b1cSMartin Matuska /* Return the last literals size */
475c03c5b1cSMartin Matuska return (size_t)(iend - anchor);
476c03c5b1cSMartin Matuska }
477c03c5b1cSMartin Matuska
478c03c5b1cSMartin Matuska
ZSTD_compressBlock_fast_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)479c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_fast_extDict(
480c03c5b1cSMartin Matuska ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
481c03c5b1cSMartin Matuska void const* src, size_t srcSize)
482c03c5b1cSMartin Matuska {
483c03c5b1cSMartin Matuska U32 const mls = ms->cParams.minMatch;
484c03c5b1cSMartin Matuska switch(mls)
485c03c5b1cSMartin Matuska {
486c03c5b1cSMartin Matuska default: /* includes case 3 */
487c03c5b1cSMartin Matuska case 4 :
488c03c5b1cSMartin Matuska return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
489c03c5b1cSMartin Matuska case 5 :
490c03c5b1cSMartin Matuska return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
491c03c5b1cSMartin Matuska case 6 :
492c03c5b1cSMartin Matuska return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
493c03c5b1cSMartin Matuska case 7 :
494c03c5b1cSMartin Matuska return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
495c03c5b1cSMartin Matuska }
496c03c5b1cSMartin Matuska }
497