1*3117ece4Schristos /* 2*3117ece4Schristos * Copyright (c) Meta Platforms, Inc. and affiliates. 3*3117ece4Schristos * All rights reserved. 4*3117ece4Schristos * 5*3117ece4Schristos * This source code is licensed under both the BSD-style license (found in the 6*3117ece4Schristos * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7*3117ece4Schristos * in the COPYING file in the root directory of this source tree). 8*3117ece4Schristos * You may select, at your option, one of the above-listed licenses. 9*3117ece4Schristos */ 10*3117ece4Schristos 11*3117ece4Schristos #include "zstd_ldm.h" 12*3117ece4Schristos 13*3117ece4Schristos #include "../common/debug.h" 14*3117ece4Schristos #include "../common/xxhash.h" 15*3117ece4Schristos #include "zstd_fast.h" /* ZSTD_fillHashTable() */ 16*3117ece4Schristos #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ 17*3117ece4Schristos #include "zstd_ldm_geartab.h" 18*3117ece4Schristos 19*3117ece4Schristos #define LDM_BUCKET_SIZE_LOG 3 20*3117ece4Schristos #define LDM_MIN_MATCH_LENGTH 64 21*3117ece4Schristos #define LDM_HASH_RLOG 7 22*3117ece4Schristos 23*3117ece4Schristos typedef struct { 24*3117ece4Schristos U64 rolling; 25*3117ece4Schristos U64 stopMask; 26*3117ece4Schristos } ldmRollingHashState_t; 27*3117ece4Schristos 28*3117ece4Schristos /** ZSTD_ldm_gear_init(): 29*3117ece4Schristos * 30*3117ece4Schristos * Initializes the rolling hash state such that it will honor the 31*3117ece4Schristos * settings in params. */ 32*3117ece4Schristos static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params) 33*3117ece4Schristos { 34*3117ece4Schristos unsigned maxBitsInMask = MIN(params->minMatchLength, 64); 35*3117ece4Schristos unsigned hashRateLog = params->hashRateLog; 36*3117ece4Schristos 37*3117ece4Schristos state->rolling = ~(U32)0; 38*3117ece4Schristos 39*3117ece4Schristos /* The choice of the splitting criterion is subject to two conditions: 40*3117ece4Schristos * 1. it has to trigger on average every 2^(hashRateLog) bytes; 41*3117ece4Schristos * 2. ideally, it has to depend on a window of minMatchLength bytes. 42*3117ece4Schristos * 43*3117ece4Schristos * In the gear hash algorithm, bit n depends on the last n bytes; 44*3117ece4Schristos * so in order to obtain a good quality splitting criterion it is 45*3117ece4Schristos * preferable to use bits with high weight. 46*3117ece4Schristos * 47*3117ece4Schristos * To match condition 1 we use a mask with hashRateLog bits set 48*3117ece4Schristos * and, because of the previous remark, we make sure these bits 49*3117ece4Schristos * have the highest possible weight while still respecting 50*3117ece4Schristos * condition 2. 51*3117ece4Schristos */ 52*3117ece4Schristos if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) { 53*3117ece4Schristos state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog); 54*3117ece4Schristos } else { 55*3117ece4Schristos /* In this degenerate case we simply honor the hash rate. */ 56*3117ece4Schristos state->stopMask = ((U64)1 << hashRateLog) - 1; 57*3117ece4Schristos } 58*3117ece4Schristos } 59*3117ece4Schristos 60*3117ece4Schristos /** ZSTD_ldm_gear_reset() 61*3117ece4Schristos * Feeds [data, data + minMatchLength) into the hash without registering any 62*3117ece4Schristos * splits. This effectively resets the hash state. This is used when skipping 63*3117ece4Schristos * over data, either at the beginning of a block, or skipping sections. 64*3117ece4Schristos */ 65*3117ece4Schristos static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state, 66*3117ece4Schristos BYTE const* data, size_t minMatchLength) 67*3117ece4Schristos { 68*3117ece4Schristos U64 hash = state->rolling; 69*3117ece4Schristos size_t n = 0; 70*3117ece4Schristos 71*3117ece4Schristos #define GEAR_ITER_ONCE() do { \ 72*3117ece4Schristos hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \ 73*3117ece4Schristos n += 1; \ 74*3117ece4Schristos } while (0) 75*3117ece4Schristos while (n + 3 < minMatchLength) { 76*3117ece4Schristos GEAR_ITER_ONCE(); 77*3117ece4Schristos GEAR_ITER_ONCE(); 78*3117ece4Schristos GEAR_ITER_ONCE(); 79*3117ece4Schristos GEAR_ITER_ONCE(); 80*3117ece4Schristos } 81*3117ece4Schristos while (n < minMatchLength) { 82*3117ece4Schristos GEAR_ITER_ONCE(); 83*3117ece4Schristos } 84*3117ece4Schristos #undef GEAR_ITER_ONCE 85*3117ece4Schristos } 86*3117ece4Schristos 87*3117ece4Schristos /** ZSTD_ldm_gear_feed(): 88*3117ece4Schristos * 89*3117ece4Schristos * Registers in the splits array all the split points found in the first 90*3117ece4Schristos * size bytes following the data pointer. This function terminates when 91*3117ece4Schristos * either all the data has been processed or LDM_BATCH_SIZE splits are 92*3117ece4Schristos * present in the splits array. 93*3117ece4Schristos * 94*3117ece4Schristos * Precondition: The splits array must not be full. 95*3117ece4Schristos * Returns: The number of bytes processed. */ 96*3117ece4Schristos static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state, 97*3117ece4Schristos BYTE const* data, size_t size, 98*3117ece4Schristos size_t* splits, unsigned* numSplits) 99*3117ece4Schristos { 100*3117ece4Schristos size_t n; 101*3117ece4Schristos U64 hash, mask; 102*3117ece4Schristos 103*3117ece4Schristos hash = state->rolling; 104*3117ece4Schristos mask = state->stopMask; 105*3117ece4Schristos n = 0; 106*3117ece4Schristos 107*3117ece4Schristos #define GEAR_ITER_ONCE() do { \ 108*3117ece4Schristos hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \ 109*3117ece4Schristos n += 1; \ 110*3117ece4Schristos if (UNLIKELY((hash & mask) == 0)) { \ 111*3117ece4Schristos splits[*numSplits] = n; \ 112*3117ece4Schristos *numSplits += 1; \ 113*3117ece4Schristos if (*numSplits == LDM_BATCH_SIZE) \ 114*3117ece4Schristos goto done; \ 115*3117ece4Schristos } \ 116*3117ece4Schristos } while (0) 117*3117ece4Schristos 118*3117ece4Schristos while (n + 3 < size) { 119*3117ece4Schristos GEAR_ITER_ONCE(); 120*3117ece4Schristos GEAR_ITER_ONCE(); 121*3117ece4Schristos GEAR_ITER_ONCE(); 122*3117ece4Schristos GEAR_ITER_ONCE(); 123*3117ece4Schristos } 124*3117ece4Schristos while (n < size) { 125*3117ece4Schristos GEAR_ITER_ONCE(); 126*3117ece4Schristos } 127*3117ece4Schristos 128*3117ece4Schristos #undef GEAR_ITER_ONCE 129*3117ece4Schristos 130*3117ece4Schristos done: 131*3117ece4Schristos state->rolling = hash; 132*3117ece4Schristos return n; 133*3117ece4Schristos } 134*3117ece4Schristos 135*3117ece4Schristos void ZSTD_ldm_adjustParameters(ldmParams_t* params, 136*3117ece4Schristos ZSTD_compressionParameters const* cParams) 137*3117ece4Schristos { 138*3117ece4Schristos params->windowLog = cParams->windowLog; 139*3117ece4Schristos ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); 140*3117ece4Schristos DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); 141*3117ece4Schristos if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; 142*3117ece4Schristos if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; 143*3117ece4Schristos if (params->hashLog == 0) { 144*3117ece4Schristos params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); 145*3117ece4Schristos assert(params->hashLog <= ZSTD_HASHLOG_MAX); 146*3117ece4Schristos } 147*3117ece4Schristos if (params->hashRateLog == 0) { 148*3117ece4Schristos params->hashRateLog = params->windowLog < params->hashLog 149*3117ece4Schristos ? 0 150*3117ece4Schristos : params->windowLog - params->hashLog; 151*3117ece4Schristos } 152*3117ece4Schristos params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); 153*3117ece4Schristos } 154*3117ece4Schristos 155*3117ece4Schristos size_t ZSTD_ldm_getTableSize(ldmParams_t params) 156*3117ece4Schristos { 157*3117ece4Schristos size_t const ldmHSize = ((size_t)1) << params.hashLog; 158*3117ece4Schristos size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); 159*3117ece4Schristos size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); 160*3117ece4Schristos size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) 161*3117ece4Schristos + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); 162*3117ece4Schristos return params.enableLdm == ZSTD_ps_enable ? totalSize : 0; 163*3117ece4Schristos } 164*3117ece4Schristos 165*3117ece4Schristos size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) 166*3117ece4Schristos { 167*3117ece4Schristos return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0; 168*3117ece4Schristos } 169*3117ece4Schristos 170*3117ece4Schristos /** ZSTD_ldm_getBucket() : 171*3117ece4Schristos * Returns a pointer to the start of the bucket associated with hash. */ 172*3117ece4Schristos static ldmEntry_t* ZSTD_ldm_getBucket( 173*3117ece4Schristos ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) 174*3117ece4Schristos { 175*3117ece4Schristos return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); 176*3117ece4Schristos } 177*3117ece4Schristos 178*3117ece4Schristos /** ZSTD_ldm_insertEntry() : 179*3117ece4Schristos * Insert the entry with corresponding hash into the hash table */ 180*3117ece4Schristos static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, 181*3117ece4Schristos size_t const hash, const ldmEntry_t entry, 182*3117ece4Schristos ldmParams_t const ldmParams) 183*3117ece4Schristos { 184*3117ece4Schristos BYTE* const pOffset = ldmState->bucketOffsets + hash; 185*3117ece4Schristos unsigned const offset = *pOffset; 186*3117ece4Schristos 187*3117ece4Schristos *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry; 188*3117ece4Schristos *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1)); 189*3117ece4Schristos 190*3117ece4Schristos } 191*3117ece4Schristos 192*3117ece4Schristos /** ZSTD_ldm_countBackwardsMatch() : 193*3117ece4Schristos * Returns the number of bytes that match backwards before pIn and pMatch. 194*3117ece4Schristos * 195*3117ece4Schristos * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ 196*3117ece4Schristos static size_t ZSTD_ldm_countBackwardsMatch( 197*3117ece4Schristos const BYTE* pIn, const BYTE* pAnchor, 198*3117ece4Schristos const BYTE* pMatch, const BYTE* pMatchBase) 199*3117ece4Schristos { 200*3117ece4Schristos size_t matchLength = 0; 201*3117ece4Schristos while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) { 202*3117ece4Schristos pIn--; 203*3117ece4Schristos pMatch--; 204*3117ece4Schristos matchLength++; 205*3117ece4Schristos } 206*3117ece4Schristos return matchLength; 207*3117ece4Schristos } 208*3117ece4Schristos 209*3117ece4Schristos /** ZSTD_ldm_countBackwardsMatch_2segments() : 210*3117ece4Schristos * Returns the number of bytes that match backwards from pMatch, 211*3117ece4Schristos * even with the backwards match spanning 2 different segments. 212*3117ece4Schristos * 213*3117ece4Schristos * On reaching `pMatchBase`, start counting from mEnd */ 214*3117ece4Schristos static size_t ZSTD_ldm_countBackwardsMatch_2segments( 215*3117ece4Schristos const BYTE* pIn, const BYTE* pAnchor, 216*3117ece4Schristos const BYTE* pMatch, const BYTE* pMatchBase, 217*3117ece4Schristos const BYTE* pExtDictStart, const BYTE* pExtDictEnd) 218*3117ece4Schristos { 219*3117ece4Schristos size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase); 220*3117ece4Schristos if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) { 221*3117ece4Schristos /* If backwards match is entirely in the extDict or prefix, immediately return */ 222*3117ece4Schristos return matchLength; 223*3117ece4Schristos } 224*3117ece4Schristos DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength); 225*3117ece4Schristos matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart); 226*3117ece4Schristos DEBUGLOG(7, "final backwards match length = %zu", matchLength); 227*3117ece4Schristos return matchLength; 228*3117ece4Schristos } 229*3117ece4Schristos 230*3117ece4Schristos /** ZSTD_ldm_fillFastTables() : 231*3117ece4Schristos * 232*3117ece4Schristos * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. 233*3117ece4Schristos * This is similar to ZSTD_loadDictionaryContent. 234*3117ece4Schristos * 235*3117ece4Schristos * The tables for the other strategies are filled within their 236*3117ece4Schristos * block compressors. */ 237*3117ece4Schristos static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, 238*3117ece4Schristos void const* end) 239*3117ece4Schristos { 240*3117ece4Schristos const BYTE* const iend = (const BYTE*)end; 241*3117ece4Schristos 242*3117ece4Schristos switch(ms->cParams.strategy) 243*3117ece4Schristos { 244*3117ece4Schristos case ZSTD_fast: 245*3117ece4Schristos ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx); 246*3117ece4Schristos break; 247*3117ece4Schristos 248*3117ece4Schristos case ZSTD_dfast: 249*3117ece4Schristos #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR 250*3117ece4Schristos ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx); 251*3117ece4Schristos #else 252*3117ece4Schristos assert(0); /* shouldn't be called: cparams should've been adjusted. */ 253*3117ece4Schristos #endif 254*3117ece4Schristos break; 255*3117ece4Schristos 256*3117ece4Schristos case ZSTD_greedy: 257*3117ece4Schristos case ZSTD_lazy: 258*3117ece4Schristos case ZSTD_lazy2: 259*3117ece4Schristos case ZSTD_btlazy2: 260*3117ece4Schristos case ZSTD_btopt: 261*3117ece4Schristos case ZSTD_btultra: 262*3117ece4Schristos case ZSTD_btultra2: 263*3117ece4Schristos break; 264*3117ece4Schristos default: 265*3117ece4Schristos assert(0); /* not possible : not a valid strategy id */ 266*3117ece4Schristos } 267*3117ece4Schristos 268*3117ece4Schristos return 0; 269*3117ece4Schristos } 270*3117ece4Schristos 271*3117ece4Schristos void ZSTD_ldm_fillHashTable( 272*3117ece4Schristos ldmState_t* ldmState, const BYTE* ip, 273*3117ece4Schristos const BYTE* iend, ldmParams_t const* params) 274*3117ece4Schristos { 275*3117ece4Schristos U32 const minMatchLength = params->minMatchLength; 276*3117ece4Schristos U32 const hBits = params->hashLog - params->bucketSizeLog; 277*3117ece4Schristos BYTE const* const base = ldmState->window.base; 278*3117ece4Schristos BYTE const* const istart = ip; 279*3117ece4Schristos ldmRollingHashState_t hashState; 280*3117ece4Schristos size_t* const splits = ldmState->splitIndices; 281*3117ece4Schristos unsigned numSplits; 282*3117ece4Schristos 283*3117ece4Schristos DEBUGLOG(5, "ZSTD_ldm_fillHashTable"); 284*3117ece4Schristos 285*3117ece4Schristos ZSTD_ldm_gear_init(&hashState, params); 286*3117ece4Schristos while (ip < iend) { 287*3117ece4Schristos size_t hashed; 288*3117ece4Schristos unsigned n; 289*3117ece4Schristos 290*3117ece4Schristos numSplits = 0; 291*3117ece4Schristos hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits); 292*3117ece4Schristos 293*3117ece4Schristos for (n = 0; n < numSplits; n++) { 294*3117ece4Schristos if (ip + splits[n] >= istart + minMatchLength) { 295*3117ece4Schristos BYTE const* const split = ip + splits[n] - minMatchLength; 296*3117ece4Schristos U64 const xxhash = XXH64(split, minMatchLength, 0); 297*3117ece4Schristos U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); 298*3117ece4Schristos ldmEntry_t entry; 299*3117ece4Schristos 300*3117ece4Schristos entry.offset = (U32)(split - base); 301*3117ece4Schristos entry.checksum = (U32)(xxhash >> 32); 302*3117ece4Schristos ZSTD_ldm_insertEntry(ldmState, hash, entry, *params); 303*3117ece4Schristos } 304*3117ece4Schristos } 305*3117ece4Schristos 306*3117ece4Schristos ip += hashed; 307*3117ece4Schristos } 308*3117ece4Schristos } 309*3117ece4Schristos 310*3117ece4Schristos 311*3117ece4Schristos /** ZSTD_ldm_limitTableUpdate() : 312*3117ece4Schristos * 313*3117ece4Schristos * Sets cctx->nextToUpdate to a position corresponding closer to anchor 314*3117ece4Schristos * if it is far way 315*3117ece4Schristos * (after a long match, only update tables a limited amount). */ 316*3117ece4Schristos static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) 317*3117ece4Schristos { 318*3117ece4Schristos U32 const curr = (U32)(anchor - ms->window.base); 319*3117ece4Schristos if (curr > ms->nextToUpdate + 1024) { 320*3117ece4Schristos ms->nextToUpdate = 321*3117ece4Schristos curr - MIN(512, curr - ms->nextToUpdate - 1024); 322*3117ece4Schristos } 323*3117ece4Schristos } 324*3117ece4Schristos 325*3117ece4Schristos static 326*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 327*3117ece4Schristos size_t ZSTD_ldm_generateSequences_internal( 328*3117ece4Schristos ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, 329*3117ece4Schristos ldmParams_t const* params, void const* src, size_t srcSize) 330*3117ece4Schristos { 331*3117ece4Schristos /* LDM parameters */ 332*3117ece4Schristos int const extDict = ZSTD_window_hasExtDict(ldmState->window); 333*3117ece4Schristos U32 const minMatchLength = params->minMatchLength; 334*3117ece4Schristos U32 const entsPerBucket = 1U << params->bucketSizeLog; 335*3117ece4Schristos U32 const hBits = params->hashLog - params->bucketSizeLog; 336*3117ece4Schristos /* Prefix and extDict parameters */ 337*3117ece4Schristos U32 const dictLimit = ldmState->window.dictLimit; 338*3117ece4Schristos U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; 339*3117ece4Schristos BYTE const* const base = ldmState->window.base; 340*3117ece4Schristos BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; 341*3117ece4Schristos BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; 342*3117ece4Schristos BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; 343*3117ece4Schristos BYTE const* const lowPrefixPtr = base + dictLimit; 344*3117ece4Schristos /* Input bounds */ 345*3117ece4Schristos BYTE const* const istart = (BYTE const*)src; 346*3117ece4Schristos BYTE const* const iend = istart + srcSize; 347*3117ece4Schristos BYTE const* const ilimit = iend - HASH_READ_SIZE; 348*3117ece4Schristos /* Input positions */ 349*3117ece4Schristos BYTE const* anchor = istart; 350*3117ece4Schristos BYTE const* ip = istart; 351*3117ece4Schristos /* Rolling hash state */ 352*3117ece4Schristos ldmRollingHashState_t hashState; 353*3117ece4Schristos /* Arrays for staged-processing */ 354*3117ece4Schristos size_t* const splits = ldmState->splitIndices; 355*3117ece4Schristos ldmMatchCandidate_t* const candidates = ldmState->matchCandidates; 356*3117ece4Schristos unsigned numSplits; 357*3117ece4Schristos 358*3117ece4Schristos if (srcSize < minMatchLength) 359*3117ece4Schristos return iend - anchor; 360*3117ece4Schristos 361*3117ece4Schristos /* Initialize the rolling hash state with the first minMatchLength bytes */ 362*3117ece4Schristos ZSTD_ldm_gear_init(&hashState, params); 363*3117ece4Schristos ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength); 364*3117ece4Schristos ip += minMatchLength; 365*3117ece4Schristos 366*3117ece4Schristos while (ip < ilimit) { 367*3117ece4Schristos size_t hashed; 368*3117ece4Schristos unsigned n; 369*3117ece4Schristos 370*3117ece4Schristos numSplits = 0; 371*3117ece4Schristos hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip, 372*3117ece4Schristos splits, &numSplits); 373*3117ece4Schristos 374*3117ece4Schristos for (n = 0; n < numSplits; n++) { 375*3117ece4Schristos BYTE const* const split = ip + splits[n] - minMatchLength; 376*3117ece4Schristos U64 const xxhash = XXH64(split, minMatchLength, 0); 377*3117ece4Schristos U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); 378*3117ece4Schristos 379*3117ece4Schristos candidates[n].split = split; 380*3117ece4Schristos candidates[n].hash = hash; 381*3117ece4Schristos candidates[n].checksum = (U32)(xxhash >> 32); 382*3117ece4Schristos candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params); 383*3117ece4Schristos PREFETCH_L1(candidates[n].bucket); 384*3117ece4Schristos } 385*3117ece4Schristos 386*3117ece4Schristos for (n = 0; n < numSplits; n++) { 387*3117ece4Schristos size_t forwardMatchLength = 0, backwardMatchLength = 0, 388*3117ece4Schristos bestMatchLength = 0, mLength; 389*3117ece4Schristos U32 offset; 390*3117ece4Schristos BYTE const* const split = candidates[n].split; 391*3117ece4Schristos U32 const checksum = candidates[n].checksum; 392*3117ece4Schristos U32 const hash = candidates[n].hash; 393*3117ece4Schristos ldmEntry_t* const bucket = candidates[n].bucket; 394*3117ece4Schristos ldmEntry_t const* cur; 395*3117ece4Schristos ldmEntry_t const* bestEntry = NULL; 396*3117ece4Schristos ldmEntry_t newEntry; 397*3117ece4Schristos 398*3117ece4Schristos newEntry.offset = (U32)(split - base); 399*3117ece4Schristos newEntry.checksum = checksum; 400*3117ece4Schristos 401*3117ece4Schristos /* If a split point would generate a sequence overlapping with 402*3117ece4Schristos * the previous one, we merely register it in the hash table and 403*3117ece4Schristos * move on */ 404*3117ece4Schristos if (split < anchor) { 405*3117ece4Schristos ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); 406*3117ece4Schristos continue; 407*3117ece4Schristos } 408*3117ece4Schristos 409*3117ece4Schristos for (cur = bucket; cur < bucket + entsPerBucket; cur++) { 410*3117ece4Schristos size_t curForwardMatchLength, curBackwardMatchLength, 411*3117ece4Schristos curTotalMatchLength; 412*3117ece4Schristos if (cur->checksum != checksum || cur->offset <= lowestIndex) { 413*3117ece4Schristos continue; 414*3117ece4Schristos } 415*3117ece4Schristos if (extDict) { 416*3117ece4Schristos BYTE const* const curMatchBase = 417*3117ece4Schristos cur->offset < dictLimit ? dictBase : base; 418*3117ece4Schristos BYTE const* const pMatch = curMatchBase + cur->offset; 419*3117ece4Schristos BYTE const* const matchEnd = 420*3117ece4Schristos cur->offset < dictLimit ? dictEnd : iend; 421*3117ece4Schristos BYTE const* const lowMatchPtr = 422*3117ece4Schristos cur->offset < dictLimit ? dictStart : lowPrefixPtr; 423*3117ece4Schristos curForwardMatchLength = 424*3117ece4Schristos ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr); 425*3117ece4Schristos if (curForwardMatchLength < minMatchLength) { 426*3117ece4Schristos continue; 427*3117ece4Schristos } 428*3117ece4Schristos curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments( 429*3117ece4Schristos split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd); 430*3117ece4Schristos } else { /* !extDict */ 431*3117ece4Schristos BYTE const* const pMatch = base + cur->offset; 432*3117ece4Schristos curForwardMatchLength = ZSTD_count(split, pMatch, iend); 433*3117ece4Schristos if (curForwardMatchLength < minMatchLength) { 434*3117ece4Schristos continue; 435*3117ece4Schristos } 436*3117ece4Schristos curBackwardMatchLength = 437*3117ece4Schristos ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr); 438*3117ece4Schristos } 439*3117ece4Schristos curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength; 440*3117ece4Schristos 441*3117ece4Schristos if (curTotalMatchLength > bestMatchLength) { 442*3117ece4Schristos bestMatchLength = curTotalMatchLength; 443*3117ece4Schristos forwardMatchLength = curForwardMatchLength; 444*3117ece4Schristos backwardMatchLength = curBackwardMatchLength; 445*3117ece4Schristos bestEntry = cur; 446*3117ece4Schristos } 447*3117ece4Schristos } 448*3117ece4Schristos 449*3117ece4Schristos /* No match found -- insert an entry into the hash table 450*3117ece4Schristos * and process the next candidate match */ 451*3117ece4Schristos if (bestEntry == NULL) { 452*3117ece4Schristos ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); 453*3117ece4Schristos continue; 454*3117ece4Schristos } 455*3117ece4Schristos 456*3117ece4Schristos /* Match found */ 457*3117ece4Schristos offset = (U32)(split - base) - bestEntry->offset; 458*3117ece4Schristos mLength = forwardMatchLength + backwardMatchLength; 459*3117ece4Schristos { 460*3117ece4Schristos rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; 461*3117ece4Schristos 462*3117ece4Schristos /* Out of sequence storage */ 463*3117ece4Schristos if (rawSeqStore->size == rawSeqStore->capacity) 464*3117ece4Schristos return ERROR(dstSize_tooSmall); 465*3117ece4Schristos seq->litLength = (U32)(split - backwardMatchLength - anchor); 466*3117ece4Schristos seq->matchLength = (U32)mLength; 467*3117ece4Schristos seq->offset = offset; 468*3117ece4Schristos rawSeqStore->size++; 469*3117ece4Schristos } 470*3117ece4Schristos 471*3117ece4Schristos /* Insert the current entry into the hash table --- it must be 472*3117ece4Schristos * done after the previous block to avoid clobbering bestEntry */ 473*3117ece4Schristos ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); 474*3117ece4Schristos 475*3117ece4Schristos anchor = split + forwardMatchLength; 476*3117ece4Schristos 477*3117ece4Schristos /* If we find a match that ends after the data that we've hashed 478*3117ece4Schristos * then we have a repeating, overlapping, pattern. E.g. all zeros. 479*3117ece4Schristos * If one repetition of the pattern matches our `stopMask` then all 480*3117ece4Schristos * repetitions will. We don't need to insert them all into out table, 481*3117ece4Schristos * only the first one. So skip over overlapping matches. 482*3117ece4Schristos * This is a major speed boost (20x) for compressing a single byte 483*3117ece4Schristos * repeated, when that byte ends up in the table. 484*3117ece4Schristos */ 485*3117ece4Schristos if (anchor > ip + hashed) { 486*3117ece4Schristos ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength); 487*3117ece4Schristos /* Continue the outer loop at anchor (ip + hashed == anchor). */ 488*3117ece4Schristos ip = anchor - hashed; 489*3117ece4Schristos break; 490*3117ece4Schristos } 491*3117ece4Schristos } 492*3117ece4Schristos 493*3117ece4Schristos ip += hashed; 494*3117ece4Schristos } 495*3117ece4Schristos 496*3117ece4Schristos return iend - anchor; 497*3117ece4Schristos } 498*3117ece4Schristos 499*3117ece4Schristos /*! ZSTD_ldm_reduceTable() : 500*3117ece4Schristos * reduce table indexes by `reducerValue` */ 501*3117ece4Schristos static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, 502*3117ece4Schristos U32 const reducerValue) 503*3117ece4Schristos { 504*3117ece4Schristos U32 u; 505*3117ece4Schristos for (u = 0; u < size; u++) { 506*3117ece4Schristos if (table[u].offset < reducerValue) table[u].offset = 0; 507*3117ece4Schristos else table[u].offset -= reducerValue; 508*3117ece4Schristos } 509*3117ece4Schristos } 510*3117ece4Schristos 511*3117ece4Schristos size_t ZSTD_ldm_generateSequences( 512*3117ece4Schristos ldmState_t* ldmState, rawSeqStore_t* sequences, 513*3117ece4Schristos ldmParams_t const* params, void const* src, size_t srcSize) 514*3117ece4Schristos { 515*3117ece4Schristos U32 const maxDist = 1U << params->windowLog; 516*3117ece4Schristos BYTE const* const istart = (BYTE const*)src; 517*3117ece4Schristos BYTE const* const iend = istart + srcSize; 518*3117ece4Schristos size_t const kMaxChunkSize = 1 << 20; 519*3117ece4Schristos size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); 520*3117ece4Schristos size_t chunk; 521*3117ece4Schristos size_t leftoverSize = 0; 522*3117ece4Schristos 523*3117ece4Schristos assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); 524*3117ece4Schristos /* Check that ZSTD_window_update() has been called for this chunk prior 525*3117ece4Schristos * to passing it to this function. 526*3117ece4Schristos */ 527*3117ece4Schristos assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); 528*3117ece4Schristos /* The input could be very large (in zstdmt), so it must be broken up into 529*3117ece4Schristos * chunks to enforce the maximum distance and handle overflow correction. 530*3117ece4Schristos */ 531*3117ece4Schristos assert(sequences->pos <= sequences->size); 532*3117ece4Schristos assert(sequences->size <= sequences->capacity); 533*3117ece4Schristos for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { 534*3117ece4Schristos BYTE const* const chunkStart = istart + chunk * kMaxChunkSize; 535*3117ece4Schristos size_t const remaining = (size_t)(iend - chunkStart); 536*3117ece4Schristos BYTE const *const chunkEnd = 537*3117ece4Schristos (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; 538*3117ece4Schristos size_t const chunkSize = chunkEnd - chunkStart; 539*3117ece4Schristos size_t newLeftoverSize; 540*3117ece4Schristos size_t const prevSize = sequences->size; 541*3117ece4Schristos 542*3117ece4Schristos assert(chunkStart < iend); 543*3117ece4Schristos /* 1. Perform overflow correction if necessary. */ 544*3117ece4Schristos if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) { 545*3117ece4Schristos U32 const ldmHSize = 1U << params->hashLog; 546*3117ece4Schristos U32 const correction = ZSTD_window_correctOverflow( 547*3117ece4Schristos &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); 548*3117ece4Schristos ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); 549*3117ece4Schristos /* invalidate dictionaries on overflow correction */ 550*3117ece4Schristos ldmState->loadedDictEnd = 0; 551*3117ece4Schristos } 552*3117ece4Schristos /* 2. We enforce the maximum offset allowed. 553*3117ece4Schristos * 554*3117ece4Schristos * kMaxChunkSize should be small enough that we don't lose too much of 555*3117ece4Schristos * the window through early invalidation. 556*3117ece4Schristos * TODO: * Test the chunk size. 557*3117ece4Schristos * * Try invalidation after the sequence generation and test the 558*3117ece4Schristos * offset against maxDist directly. 559*3117ece4Schristos * 560*3117ece4Schristos * NOTE: Because of dictionaries + sequence splitting we MUST make sure 561*3117ece4Schristos * that any offset used is valid at the END of the sequence, since it may 562*3117ece4Schristos * be split into two sequences. This condition holds when using 563*3117ece4Schristos * ZSTD_window_enforceMaxDist(), but if we move to checking offsets 564*3117ece4Schristos * against maxDist directly, we'll have to carefully handle that case. 565*3117ece4Schristos */ 566*3117ece4Schristos ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL); 567*3117ece4Schristos /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ 568*3117ece4Schristos newLeftoverSize = ZSTD_ldm_generateSequences_internal( 569*3117ece4Schristos ldmState, sequences, params, chunkStart, chunkSize); 570*3117ece4Schristos if (ZSTD_isError(newLeftoverSize)) 571*3117ece4Schristos return newLeftoverSize; 572*3117ece4Schristos /* 4. We add the leftover literals from previous iterations to the first 573*3117ece4Schristos * newly generated sequence, or add the `newLeftoverSize` if none are 574*3117ece4Schristos * generated. 575*3117ece4Schristos */ 576*3117ece4Schristos /* Prepend the leftover literals from the last call */ 577*3117ece4Schristos if (prevSize < sequences->size) { 578*3117ece4Schristos sequences->seq[prevSize].litLength += (U32)leftoverSize; 579*3117ece4Schristos leftoverSize = newLeftoverSize; 580*3117ece4Schristos } else { 581*3117ece4Schristos assert(newLeftoverSize == chunkSize); 582*3117ece4Schristos leftoverSize += chunkSize; 583*3117ece4Schristos } 584*3117ece4Schristos } 585*3117ece4Schristos return 0; 586*3117ece4Schristos } 587*3117ece4Schristos 588*3117ece4Schristos void 589*3117ece4Schristos ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) 590*3117ece4Schristos { 591*3117ece4Schristos while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { 592*3117ece4Schristos rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; 593*3117ece4Schristos if (srcSize <= seq->litLength) { 594*3117ece4Schristos /* Skip past srcSize literals */ 595*3117ece4Schristos seq->litLength -= (U32)srcSize; 596*3117ece4Schristos return; 597*3117ece4Schristos } 598*3117ece4Schristos srcSize -= seq->litLength; 599*3117ece4Schristos seq->litLength = 0; 600*3117ece4Schristos if (srcSize < seq->matchLength) { 601*3117ece4Schristos /* Skip past the first srcSize of the match */ 602*3117ece4Schristos seq->matchLength -= (U32)srcSize; 603*3117ece4Schristos if (seq->matchLength < minMatch) { 604*3117ece4Schristos /* The match is too short, omit it */ 605*3117ece4Schristos if (rawSeqStore->pos + 1 < rawSeqStore->size) { 606*3117ece4Schristos seq[1].litLength += seq[0].matchLength; 607*3117ece4Schristos } 608*3117ece4Schristos rawSeqStore->pos++; 609*3117ece4Schristos } 610*3117ece4Schristos return; 611*3117ece4Schristos } 612*3117ece4Schristos srcSize -= seq->matchLength; 613*3117ece4Schristos seq->matchLength = 0; 614*3117ece4Schristos rawSeqStore->pos++; 615*3117ece4Schristos } 616*3117ece4Schristos } 617*3117ece4Schristos 618*3117ece4Schristos /** 619*3117ece4Schristos * If the sequence length is longer than remaining then the sequence is split 620*3117ece4Schristos * between this block and the next. 621*3117ece4Schristos * 622*3117ece4Schristos * Returns the current sequence to handle, or if the rest of the block should 623*3117ece4Schristos * be literals, it returns a sequence with offset == 0. 624*3117ece4Schristos */ 625*3117ece4Schristos static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, 626*3117ece4Schristos U32 const remaining, U32 const minMatch) 627*3117ece4Schristos { 628*3117ece4Schristos rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; 629*3117ece4Schristos assert(sequence.offset > 0); 630*3117ece4Schristos /* Likely: No partial sequence */ 631*3117ece4Schristos if (remaining >= sequence.litLength + sequence.matchLength) { 632*3117ece4Schristos rawSeqStore->pos++; 633*3117ece4Schristos return sequence; 634*3117ece4Schristos } 635*3117ece4Schristos /* Cut the sequence short (offset == 0 ==> rest is literals). */ 636*3117ece4Schristos if (remaining <= sequence.litLength) { 637*3117ece4Schristos sequence.offset = 0; 638*3117ece4Schristos } else if (remaining < sequence.litLength + sequence.matchLength) { 639*3117ece4Schristos sequence.matchLength = remaining - sequence.litLength; 640*3117ece4Schristos if (sequence.matchLength < minMatch) { 641*3117ece4Schristos sequence.offset = 0; 642*3117ece4Schristos } 643*3117ece4Schristos } 644*3117ece4Schristos /* Skip past `remaining` bytes for the future sequences. */ 645*3117ece4Schristos ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch); 646*3117ece4Schristos return sequence; 647*3117ece4Schristos } 648*3117ece4Schristos 649*3117ece4Schristos void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { 650*3117ece4Schristos U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); 651*3117ece4Schristos while (currPos && rawSeqStore->pos < rawSeqStore->size) { 652*3117ece4Schristos rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; 653*3117ece4Schristos if (currPos >= currSeq.litLength + currSeq.matchLength) { 654*3117ece4Schristos currPos -= currSeq.litLength + currSeq.matchLength; 655*3117ece4Schristos rawSeqStore->pos++; 656*3117ece4Schristos } else { 657*3117ece4Schristos rawSeqStore->posInSequence = currPos; 658*3117ece4Schristos break; 659*3117ece4Schristos } 660*3117ece4Schristos } 661*3117ece4Schristos if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { 662*3117ece4Schristos rawSeqStore->posInSequence = 0; 663*3117ece4Schristos } 664*3117ece4Schristos } 665*3117ece4Schristos 666*3117ece4Schristos size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, 667*3117ece4Schristos ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 668*3117ece4Schristos ZSTD_paramSwitch_e useRowMatchFinder, 669*3117ece4Schristos void const* src, size_t srcSize) 670*3117ece4Schristos { 671*3117ece4Schristos const ZSTD_compressionParameters* const cParams = &ms->cParams; 672*3117ece4Schristos unsigned const minMatch = cParams->minMatch; 673*3117ece4Schristos ZSTD_blockCompressor const blockCompressor = 674*3117ece4Schristos ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms)); 675*3117ece4Schristos /* Input bounds */ 676*3117ece4Schristos BYTE const* const istart = (BYTE const*)src; 677*3117ece4Schristos BYTE const* const iend = istart + srcSize; 678*3117ece4Schristos /* Input positions */ 679*3117ece4Schristos BYTE const* ip = istart; 680*3117ece4Schristos 681*3117ece4Schristos DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); 682*3117ece4Schristos /* If using opt parser, use LDMs only as candidates rather than always accepting them */ 683*3117ece4Schristos if (cParams->strategy >= ZSTD_btopt) { 684*3117ece4Schristos size_t lastLLSize; 685*3117ece4Schristos ms->ldmSeqStore = rawSeqStore; 686*3117ece4Schristos lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize); 687*3117ece4Schristos ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize); 688*3117ece4Schristos return lastLLSize; 689*3117ece4Schristos } 690*3117ece4Schristos 691*3117ece4Schristos assert(rawSeqStore->pos <= rawSeqStore->size); 692*3117ece4Schristos assert(rawSeqStore->size <= rawSeqStore->capacity); 693*3117ece4Schristos /* Loop through each sequence and apply the block compressor to the literals */ 694*3117ece4Schristos while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { 695*3117ece4Schristos /* maybeSplitSequence updates rawSeqStore->pos */ 696*3117ece4Schristos rawSeq const sequence = maybeSplitSequence(rawSeqStore, 697*3117ece4Schristos (U32)(iend - ip), minMatch); 698*3117ece4Schristos /* End signal */ 699*3117ece4Schristos if (sequence.offset == 0) 700*3117ece4Schristos break; 701*3117ece4Schristos 702*3117ece4Schristos assert(ip + sequence.litLength + sequence.matchLength <= iend); 703*3117ece4Schristos 704*3117ece4Schristos /* Fill tables for block compressor */ 705*3117ece4Schristos ZSTD_ldm_limitTableUpdate(ms, ip); 706*3117ece4Schristos ZSTD_ldm_fillFastTables(ms, ip); 707*3117ece4Schristos /* Run the block compressor */ 708*3117ece4Schristos DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength); 709*3117ece4Schristos { 710*3117ece4Schristos int i; 711*3117ece4Schristos size_t const newLitLength = 712*3117ece4Schristos blockCompressor(ms, seqStore, rep, ip, sequence.litLength); 713*3117ece4Schristos ip += sequence.litLength; 714*3117ece4Schristos /* Update the repcodes */ 715*3117ece4Schristos for (i = ZSTD_REP_NUM - 1; i > 0; i--) 716*3117ece4Schristos rep[i] = rep[i-1]; 717*3117ece4Schristos rep[0] = sequence.offset; 718*3117ece4Schristos /* Store the sequence */ 719*3117ece4Schristos ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, 720*3117ece4Schristos OFFSET_TO_OFFBASE(sequence.offset), 721*3117ece4Schristos sequence.matchLength); 722*3117ece4Schristos ip += sequence.matchLength; 723*3117ece4Schristos } 724*3117ece4Schristos } 725*3117ece4Schristos /* Fill the tables for the block compressor */ 726*3117ece4Schristos ZSTD_ldm_limitTableUpdate(ms, ip); 727*3117ece4Schristos ZSTD_ldm_fillFastTables(ms, ip); 728*3117ece4Schristos /* Compress the last literals */ 729*3117ece4Schristos return blockCompressor(ms, seqStore, rep, ip, iend - ip); 730*3117ece4Schristos } 731