1*c03c5b1cSMartin Matuska /* 2*c03c5b1cSMartin Matuska * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3*c03c5b1cSMartin Matuska * All rights reserved. 4*c03c5b1cSMartin Matuska * 5*c03c5b1cSMartin Matuska * This source code is licensed under both the BSD-style license (found in the 6*c03c5b1cSMartin Matuska * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7*c03c5b1cSMartin Matuska * in the COPYING file in the root directory of this source tree). 8*c03c5b1cSMartin Matuska * You may select, at your option, one of the above-listed licenses. 9*c03c5b1cSMartin Matuska */ 10*c03c5b1cSMartin Matuska 11*c03c5b1cSMartin Matuska #ifndef ZSTD_LDM_H 12*c03c5b1cSMartin Matuska #define ZSTD_LDM_H 13*c03c5b1cSMartin Matuska 14*c03c5b1cSMartin Matuska #if defined (__cplusplus) 15*c03c5b1cSMartin Matuska extern "C" { 16*c03c5b1cSMartin Matuska #endif 17*c03c5b1cSMartin Matuska 18*c03c5b1cSMartin Matuska #include "zstd_compress_internal.h" /* ldmParams_t, U32 */ 19*c03c5b1cSMartin Matuska #include "../zstd.h" /* ZSTD_CCtx, size_t */ 20*c03c5b1cSMartin Matuska 21*c03c5b1cSMartin Matuska /*-************************************* 22*c03c5b1cSMartin Matuska * Long distance matching 23*c03c5b1cSMartin Matuska ***************************************/ 24*c03c5b1cSMartin Matuska 25*c03c5b1cSMartin Matuska #define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT 26*c03c5b1cSMartin Matuska 27*c03c5b1cSMartin Matuska void ZSTD_ldm_fillHashTable( 28*c03c5b1cSMartin Matuska ldmState_t* state, const BYTE* ip, 29*c03c5b1cSMartin Matuska const BYTE* iend, ldmParams_t const* params); 30*c03c5b1cSMartin Matuska 31*c03c5b1cSMartin Matuska /** 32*c03c5b1cSMartin Matuska * ZSTD_ldm_generateSequences(): 33*c03c5b1cSMartin Matuska * 34*c03c5b1cSMartin Matuska * Generates the sequences using the long distance match finder. 35*c03c5b1cSMartin Matuska * Generates long range matching sequences in `sequences`, which parse a prefix 36*c03c5b1cSMartin Matuska * of the source. `sequences` must be large enough to store every sequence, 37*c03c5b1cSMartin Matuska * which can be checked with `ZSTD_ldm_getMaxNbSeq()`. 38*c03c5b1cSMartin Matuska * @returns 0 or an error code. 39*c03c5b1cSMartin Matuska * 40*c03c5b1cSMartin Matuska * NOTE: The user must have called ZSTD_window_update() for all of the input 41*c03c5b1cSMartin Matuska * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. 42*c03c5b1cSMartin Matuska * NOTE: This function returns an error if it runs out of space to store 43*c03c5b1cSMartin Matuska * sequences. 44*c03c5b1cSMartin Matuska */ 45*c03c5b1cSMartin Matuska size_t ZSTD_ldm_generateSequences( 46*c03c5b1cSMartin Matuska ldmState_t* ldms, rawSeqStore_t* sequences, 47*c03c5b1cSMartin Matuska ldmParams_t const* params, void const* src, size_t srcSize); 48*c03c5b1cSMartin Matuska 49*c03c5b1cSMartin Matuska /** 50*c03c5b1cSMartin Matuska * ZSTD_ldm_blockCompress(): 51*c03c5b1cSMartin Matuska * 52*c03c5b1cSMartin Matuska * Compresses a block using the predefined sequences, along with a secondary 53*c03c5b1cSMartin Matuska * block compressor. The literals section of every sequence is passed to the 54*c03c5b1cSMartin Matuska * secondary block compressor, and those sequences are interspersed with the 55*c03c5b1cSMartin Matuska * predefined sequences. Returns the length of the last literals. 56*c03c5b1cSMartin Matuska * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed. 57*c03c5b1cSMartin Matuska * `rawSeqStore.seq` may also be updated to split the last sequence between two 58*c03c5b1cSMartin Matuska * blocks. 59*c03c5b1cSMartin Matuska * @return The length of the last literals. 60*c03c5b1cSMartin Matuska * 61*c03c5b1cSMartin Matuska * NOTE: The source must be at most the maximum block size, but the predefined 62*c03c5b1cSMartin Matuska * sequences can be any size, and may be longer than the block. In the case that 63*c03c5b1cSMartin Matuska * they are longer than the block, the last sequences may need to be split into 64*c03c5b1cSMartin Matuska * two. We handle that case correctly, and update `rawSeqStore` appropriately. 65*c03c5b1cSMartin Matuska * NOTE: This function does not return any errors. 66*c03c5b1cSMartin Matuska */ 67*c03c5b1cSMartin Matuska size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, 68*c03c5b1cSMartin Matuska ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 69*c03c5b1cSMartin Matuska void const* src, size_t srcSize); 70*c03c5b1cSMartin Matuska 71*c03c5b1cSMartin Matuska /** 72*c03c5b1cSMartin Matuska * ZSTD_ldm_skipSequences(): 73*c03c5b1cSMartin Matuska * 74*c03c5b1cSMartin Matuska * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. 75*c03c5b1cSMartin Matuska * Avoids emitting matches less than `minMatch` bytes. 76*c03c5b1cSMartin Matuska * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). 77*c03c5b1cSMartin Matuska */ 78*c03c5b1cSMartin Matuska void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, 79*c03c5b1cSMartin Matuska U32 const minMatch); 80*c03c5b1cSMartin Matuska 81*c03c5b1cSMartin Matuska 82*c03c5b1cSMartin Matuska /** ZSTD_ldm_getTableSize() : 83*c03c5b1cSMartin Matuska * Estimate the space needed for long distance matching tables or 0 if LDM is 84*c03c5b1cSMartin Matuska * disabled. 85*c03c5b1cSMartin Matuska */ 86*c03c5b1cSMartin Matuska size_t ZSTD_ldm_getTableSize(ldmParams_t params); 87*c03c5b1cSMartin Matuska 88*c03c5b1cSMartin Matuska /** ZSTD_ldm_getSeqSpace() : 89*c03c5b1cSMartin Matuska * Return an upper bound on the number of sequences that can be produced by 90*c03c5b1cSMartin Matuska * the long distance matcher, or 0 if LDM is disabled. 91*c03c5b1cSMartin Matuska */ 92*c03c5b1cSMartin Matuska size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize); 93*c03c5b1cSMartin Matuska 94*c03c5b1cSMartin Matuska /** ZSTD_ldm_adjustParameters() : 95*c03c5b1cSMartin Matuska * If the params->hashRateLog is not set, set it to its default value based on 96*c03c5b1cSMartin Matuska * windowLog and params->hashLog. 97*c03c5b1cSMartin Matuska * 98*c03c5b1cSMartin Matuska * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to 99*c03c5b1cSMartin Matuska * params->hashLog if it is not). 100*c03c5b1cSMartin Matuska * 101*c03c5b1cSMartin Matuska * Ensures that the minMatchLength >= targetLength during optimal parsing. 102*c03c5b1cSMartin Matuska */ 103*c03c5b1cSMartin Matuska void ZSTD_ldm_adjustParameters(ldmParams_t* params, 104*c03c5b1cSMartin Matuska ZSTD_compressionParameters const* cParams); 105*c03c5b1cSMartin Matuska 106*c03c5b1cSMartin Matuska #if defined (__cplusplus) 107*c03c5b1cSMartin Matuska } 108*c03c5b1cSMartin Matuska #endif 109*c03c5b1cSMartin Matuska 110*c03c5b1cSMartin Matuska #endif /* ZSTD_FAST_H */ 111