1a0483764SConrad Meyer /* 2*37f1f268SConrad Meyer * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3a0483764SConrad Meyer * All rights reserved. 4a0483764SConrad Meyer * 5a0483764SConrad Meyer * This source code is licensed under both the BSD-style license (found in the 6a0483764SConrad Meyer * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7a0483764SConrad Meyer * in the COPYING file in the root directory of this source tree). 8a0483764SConrad Meyer * You may select, at your option, one of the above-listed licenses. 9a0483764SConrad Meyer */ 10a0483764SConrad Meyer 11a0483764SConrad Meyer /* zstd_decompress_block : 12a0483764SConrad Meyer * this module takes care of decompressing _compressed_ block */ 13a0483764SConrad Meyer 14a0483764SConrad Meyer /*-******************************************************* 15a0483764SConrad Meyer * Dependencies 16a0483764SConrad Meyer *********************************************************/ 17a0483764SConrad Meyer #include <string.h> /* memcpy, memmove, memset */ 18*37f1f268SConrad Meyer #include "../common/compiler.h" /* prefetch */ 19*37f1f268SConrad Meyer #include "../common/cpu.h" /* bmi2 */ 20*37f1f268SConrad Meyer #include "../common/mem.h" /* low level memory routines */ 21a0483764SConrad Meyer #define FSE_STATIC_LINKING_ONLY 22*37f1f268SConrad Meyer #include "../common/fse.h" 23a0483764SConrad Meyer #define HUF_STATIC_LINKING_ONLY 24*37f1f268SConrad Meyer #include "../common/huf.h" 25*37f1f268SConrad Meyer #include "../common/zstd_internal.h" 26a0483764SConrad Meyer #include "zstd_decompress_internal.h" /* ZSTD_DCtx */ 27a0483764SConrad Meyer #include "zstd_ddict.h" /* ZSTD_DDictDictContent */ 28a0483764SConrad Meyer #include "zstd_decompress_block.h" 29a0483764SConrad Meyer 30a0483764SConrad Meyer /*_******************************************************* 31a0483764SConrad Meyer * Macros 32a0483764SConrad Meyer **********************************************************/ 33a0483764SConrad Meyer 34a0483764SConrad Meyer /* These two optional macros force the use one way or another of the two 35a0483764SConrad Meyer * ZSTD_decompressSequences implementations. You can't force in both directions 36a0483764SConrad Meyer * at the same time. 37a0483764SConrad Meyer */ 38a0483764SConrad Meyer #if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ 39a0483764SConrad Meyer defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) 40a0483764SConrad Meyer #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!" 41a0483764SConrad Meyer #endif 42a0483764SConrad Meyer 43a0483764SConrad Meyer 44a0483764SConrad Meyer /*_******************************************************* 45a0483764SConrad Meyer * Memory operations 46a0483764SConrad Meyer **********************************************************/ 47a0483764SConrad Meyer static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } 48a0483764SConrad Meyer 49a0483764SConrad Meyer 50a0483764SConrad Meyer /*-************************************************************* 51a0483764SConrad Meyer * Block decoding 52a0483764SConrad Meyer ***************************************************************/ 53a0483764SConrad Meyer 54a0483764SConrad Meyer /*! ZSTD_getcBlockSize() : 55a0483764SConrad Meyer * Provides the size of compressed block from block header `src` */ 56a0483764SConrad Meyer size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, 57a0483764SConrad Meyer blockProperties_t* bpPtr) 58a0483764SConrad Meyer { 59*37f1f268SConrad Meyer RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, ""); 602b9c00cbSConrad Meyer 61a0483764SConrad Meyer { U32 const cBlockHeader = MEM_readLE24(src); 62a0483764SConrad Meyer U32 const cSize = cBlockHeader >> 3; 63a0483764SConrad Meyer bpPtr->lastBlock = cBlockHeader & 1; 64a0483764SConrad Meyer bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); 65a0483764SConrad Meyer bpPtr->origSize = cSize; /* only useful for RLE */ 66a0483764SConrad Meyer if (bpPtr->blockType == bt_rle) return 1; 67*37f1f268SConrad Meyer RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, ""); 68a0483764SConrad Meyer return cSize; 69a0483764SConrad Meyer } 70a0483764SConrad Meyer } 71a0483764SConrad Meyer 72a0483764SConrad Meyer 73a0483764SConrad Meyer /* Hidden declaration for fullbench */ 74a0483764SConrad Meyer size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, 75a0483764SConrad Meyer const void* src, size_t srcSize); 76a0483764SConrad Meyer /*! ZSTD_decodeLiteralsBlock() : 77a0483764SConrad Meyer * @return : nb of bytes read from src (< srcSize ) 78a0483764SConrad Meyer * note : symbol not declared but exposed for fullbench */ 79a0483764SConrad Meyer size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, 80a0483764SConrad Meyer const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ 81a0483764SConrad Meyer { 829cbefe25SConrad Meyer DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); 83*37f1f268SConrad Meyer RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); 84a0483764SConrad Meyer 85a0483764SConrad Meyer { const BYTE* const istart = (const BYTE*) src; 86a0483764SConrad Meyer symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); 87a0483764SConrad Meyer 88a0483764SConrad Meyer switch(litEncType) 89a0483764SConrad Meyer { 90a0483764SConrad Meyer case set_repeat: 919cbefe25SConrad Meyer DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block"); 92*37f1f268SConrad Meyer RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, ""); 93a0483764SConrad Meyer /* fall-through */ 94a0483764SConrad Meyer 95a0483764SConrad Meyer case set_compressed: 962b9c00cbSConrad Meyer RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); 97a0483764SConrad Meyer { size_t lhSize, litSize, litCSize; 98a0483764SConrad Meyer U32 singleStream=0; 99a0483764SConrad Meyer U32 const lhlCode = (istart[0] >> 2) & 3; 100a0483764SConrad Meyer U32 const lhc = MEM_readLE32(istart); 101a0483764SConrad Meyer size_t hufSuccess; 102a0483764SConrad Meyer switch(lhlCode) 103a0483764SConrad Meyer { 104a0483764SConrad Meyer case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ 105a0483764SConrad Meyer /* 2 - 2 - 10 - 10 */ 106a0483764SConrad Meyer singleStream = !lhlCode; 107a0483764SConrad Meyer lhSize = 3; 108a0483764SConrad Meyer litSize = (lhc >> 4) & 0x3FF; 109a0483764SConrad Meyer litCSize = (lhc >> 14) & 0x3FF; 110a0483764SConrad Meyer break; 111a0483764SConrad Meyer case 2: 112a0483764SConrad Meyer /* 2 - 2 - 14 - 14 */ 113a0483764SConrad Meyer lhSize = 4; 114a0483764SConrad Meyer litSize = (lhc >> 4) & 0x3FFF; 115a0483764SConrad Meyer litCSize = lhc >> 18; 116a0483764SConrad Meyer break; 117a0483764SConrad Meyer case 3: 118a0483764SConrad Meyer /* 2 - 2 - 18 - 18 */ 119a0483764SConrad Meyer lhSize = 5; 120a0483764SConrad Meyer litSize = (lhc >> 4) & 0x3FFFF; 1219cbefe25SConrad Meyer litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); 122a0483764SConrad Meyer break; 123a0483764SConrad Meyer } 124*37f1f268SConrad Meyer RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); 125*37f1f268SConrad Meyer RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); 126a0483764SConrad Meyer 127a0483764SConrad Meyer /* prefetch huffman table if cold */ 128a0483764SConrad Meyer if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { 129a0483764SConrad Meyer PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable)); 130a0483764SConrad Meyer } 131a0483764SConrad Meyer 132a0483764SConrad Meyer if (litEncType==set_repeat) { 133a0483764SConrad Meyer if (singleStream) { 134a0483764SConrad Meyer hufSuccess = HUF_decompress1X_usingDTable_bmi2( 135a0483764SConrad Meyer dctx->litBuffer, litSize, istart+lhSize, litCSize, 136a0483764SConrad Meyer dctx->HUFptr, dctx->bmi2); 137a0483764SConrad Meyer } else { 138a0483764SConrad Meyer hufSuccess = HUF_decompress4X_usingDTable_bmi2( 139a0483764SConrad Meyer dctx->litBuffer, litSize, istart+lhSize, litCSize, 140a0483764SConrad Meyer dctx->HUFptr, dctx->bmi2); 141a0483764SConrad Meyer } 142a0483764SConrad Meyer } else { 143a0483764SConrad Meyer if (singleStream) { 144a0483764SConrad Meyer #if defined(HUF_FORCE_DECOMPRESS_X2) 145a0483764SConrad Meyer hufSuccess = HUF_decompress1X_DCtx_wksp( 146a0483764SConrad Meyer dctx->entropy.hufTable, dctx->litBuffer, litSize, 147a0483764SConrad Meyer istart+lhSize, litCSize, dctx->workspace, 148a0483764SConrad Meyer sizeof(dctx->workspace)); 149a0483764SConrad Meyer #else 150a0483764SConrad Meyer hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( 151a0483764SConrad Meyer dctx->entropy.hufTable, dctx->litBuffer, litSize, 152a0483764SConrad Meyer istart+lhSize, litCSize, dctx->workspace, 153a0483764SConrad Meyer sizeof(dctx->workspace), dctx->bmi2); 154a0483764SConrad Meyer #endif 155a0483764SConrad Meyer } else { 156a0483764SConrad Meyer hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( 157a0483764SConrad Meyer dctx->entropy.hufTable, dctx->litBuffer, litSize, 158a0483764SConrad Meyer istart+lhSize, litCSize, dctx->workspace, 159a0483764SConrad Meyer sizeof(dctx->workspace), dctx->bmi2); 160a0483764SConrad Meyer } 161a0483764SConrad Meyer } 162a0483764SConrad Meyer 163*37f1f268SConrad Meyer RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); 164a0483764SConrad Meyer 165a0483764SConrad Meyer dctx->litPtr = dctx->litBuffer; 166a0483764SConrad Meyer dctx->litSize = litSize; 167a0483764SConrad Meyer dctx->litEntropy = 1; 168a0483764SConrad Meyer if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; 169a0483764SConrad Meyer memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); 170a0483764SConrad Meyer return litCSize + lhSize; 171a0483764SConrad Meyer } 172a0483764SConrad Meyer 173a0483764SConrad Meyer case set_basic: 174a0483764SConrad Meyer { size_t litSize, lhSize; 175a0483764SConrad Meyer U32 const lhlCode = ((istart[0]) >> 2) & 3; 176a0483764SConrad Meyer switch(lhlCode) 177a0483764SConrad Meyer { 178a0483764SConrad Meyer case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ 179a0483764SConrad Meyer lhSize = 1; 180a0483764SConrad Meyer litSize = istart[0] >> 3; 181a0483764SConrad Meyer break; 182a0483764SConrad Meyer case 1: 183a0483764SConrad Meyer lhSize = 2; 184a0483764SConrad Meyer litSize = MEM_readLE16(istart) >> 4; 185a0483764SConrad Meyer break; 186a0483764SConrad Meyer case 3: 187a0483764SConrad Meyer lhSize = 3; 188a0483764SConrad Meyer litSize = MEM_readLE24(istart) >> 4; 189a0483764SConrad Meyer break; 190a0483764SConrad Meyer } 191a0483764SConrad Meyer 192a0483764SConrad Meyer if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ 193*37f1f268SConrad Meyer RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, ""); 194a0483764SConrad Meyer memcpy(dctx->litBuffer, istart+lhSize, litSize); 195a0483764SConrad Meyer dctx->litPtr = dctx->litBuffer; 196a0483764SConrad Meyer dctx->litSize = litSize; 197a0483764SConrad Meyer memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); 198a0483764SConrad Meyer return lhSize+litSize; 199a0483764SConrad Meyer } 200a0483764SConrad Meyer /* direct reference into compressed stream */ 201a0483764SConrad Meyer dctx->litPtr = istart+lhSize; 202a0483764SConrad Meyer dctx->litSize = litSize; 203a0483764SConrad Meyer return lhSize+litSize; 204a0483764SConrad Meyer } 205a0483764SConrad Meyer 206a0483764SConrad Meyer case set_rle: 207a0483764SConrad Meyer { U32 const lhlCode = ((istart[0]) >> 2) & 3; 208a0483764SConrad Meyer size_t litSize, lhSize; 209a0483764SConrad Meyer switch(lhlCode) 210a0483764SConrad Meyer { 211a0483764SConrad Meyer case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ 212a0483764SConrad Meyer lhSize = 1; 213a0483764SConrad Meyer litSize = istart[0] >> 3; 214a0483764SConrad Meyer break; 215a0483764SConrad Meyer case 1: 216a0483764SConrad Meyer lhSize = 2; 217a0483764SConrad Meyer litSize = MEM_readLE16(istart) >> 4; 218a0483764SConrad Meyer break; 219a0483764SConrad Meyer case 3: 220a0483764SConrad Meyer lhSize = 3; 221a0483764SConrad Meyer litSize = MEM_readLE24(istart) >> 4; 2222b9c00cbSConrad Meyer RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); 223a0483764SConrad Meyer break; 224a0483764SConrad Meyer } 225*37f1f268SConrad Meyer RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); 226a0483764SConrad Meyer memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); 227a0483764SConrad Meyer dctx->litPtr = dctx->litBuffer; 228a0483764SConrad Meyer dctx->litSize = litSize; 229a0483764SConrad Meyer return lhSize+1; 230a0483764SConrad Meyer } 231a0483764SConrad Meyer default: 2322b9c00cbSConrad Meyer RETURN_ERROR(corruption_detected, "impossible"); 233a0483764SConrad Meyer } 234a0483764SConrad Meyer } 235a0483764SConrad Meyer } 236a0483764SConrad Meyer 237a0483764SConrad Meyer /* Default FSE distribution tables. 238a0483764SConrad Meyer * These are pre-calculated FSE decoding tables using default distributions as defined in specification : 239a0483764SConrad Meyer * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions 240a0483764SConrad Meyer * They were generated programmatically with following method : 241a0483764SConrad Meyer * - start from default distributions, present in /lib/common/zstd_internal.h 242a0483764SConrad Meyer * - generate tables normally, using ZSTD_buildFSETable() 243a0483764SConrad Meyer * - printout the content of tables 244a0483764SConrad Meyer * - pretify output, report below, test with fuzzer to ensure it's correct */ 245a0483764SConrad Meyer 246a0483764SConrad Meyer /* Default FSE distribution table for Literal Lengths */ 247a0483764SConrad Meyer static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = { 248a0483764SConrad Meyer { 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */ 249a0483764SConrad Meyer /* nextState, nbAddBits, nbBits, baseVal */ 250a0483764SConrad Meyer { 0, 0, 4, 0}, { 16, 0, 4, 0}, 251a0483764SConrad Meyer { 32, 0, 5, 1}, { 0, 0, 5, 3}, 252a0483764SConrad Meyer { 0, 0, 5, 4}, { 0, 0, 5, 6}, 253a0483764SConrad Meyer { 0, 0, 5, 7}, { 0, 0, 5, 9}, 254a0483764SConrad Meyer { 0, 0, 5, 10}, { 0, 0, 5, 12}, 255a0483764SConrad Meyer { 0, 0, 6, 14}, { 0, 1, 5, 16}, 256a0483764SConrad Meyer { 0, 1, 5, 20}, { 0, 1, 5, 22}, 257a0483764SConrad Meyer { 0, 2, 5, 28}, { 0, 3, 5, 32}, 258a0483764SConrad Meyer { 0, 4, 5, 48}, { 32, 6, 5, 64}, 259a0483764SConrad Meyer { 0, 7, 5, 128}, { 0, 8, 6, 256}, 260a0483764SConrad Meyer { 0, 10, 6, 1024}, { 0, 12, 6, 4096}, 261a0483764SConrad Meyer { 32, 0, 4, 0}, { 0, 0, 4, 1}, 262a0483764SConrad Meyer { 0, 0, 5, 2}, { 32, 0, 5, 4}, 263a0483764SConrad Meyer { 0, 0, 5, 5}, { 32, 0, 5, 7}, 264a0483764SConrad Meyer { 0, 0, 5, 8}, { 32, 0, 5, 10}, 265a0483764SConrad Meyer { 0, 0, 5, 11}, { 0, 0, 6, 13}, 266a0483764SConrad Meyer { 32, 1, 5, 16}, { 0, 1, 5, 18}, 267a0483764SConrad Meyer { 32, 1, 5, 22}, { 0, 2, 5, 24}, 268a0483764SConrad Meyer { 32, 3, 5, 32}, { 0, 3, 5, 40}, 269a0483764SConrad Meyer { 0, 6, 4, 64}, { 16, 6, 4, 64}, 270a0483764SConrad Meyer { 32, 7, 5, 128}, { 0, 9, 6, 512}, 271a0483764SConrad Meyer { 0, 11, 6, 2048}, { 48, 0, 4, 0}, 272a0483764SConrad Meyer { 16, 0, 4, 1}, { 32, 0, 5, 2}, 273a0483764SConrad Meyer { 32, 0, 5, 3}, { 32, 0, 5, 5}, 274a0483764SConrad Meyer { 32, 0, 5, 6}, { 32, 0, 5, 8}, 275a0483764SConrad Meyer { 32, 0, 5, 9}, { 32, 0, 5, 11}, 276a0483764SConrad Meyer { 32, 0, 5, 12}, { 0, 0, 6, 15}, 277a0483764SConrad Meyer { 32, 1, 5, 18}, { 32, 1, 5, 20}, 278a0483764SConrad Meyer { 32, 2, 5, 24}, { 32, 2, 5, 28}, 279a0483764SConrad Meyer { 32, 3, 5, 40}, { 32, 4, 5, 48}, 280a0483764SConrad Meyer { 0, 16, 6,65536}, { 0, 15, 6,32768}, 281a0483764SConrad Meyer { 0, 14, 6,16384}, { 0, 13, 6, 8192}, 282a0483764SConrad Meyer }; /* LL_defaultDTable */ 283a0483764SConrad Meyer 284a0483764SConrad Meyer /* Default FSE distribution table for Offset Codes */ 285a0483764SConrad Meyer static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = { 286a0483764SConrad Meyer { 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */ 287a0483764SConrad Meyer /* nextState, nbAddBits, nbBits, baseVal */ 288a0483764SConrad Meyer { 0, 0, 5, 0}, { 0, 6, 4, 61}, 289a0483764SConrad Meyer { 0, 9, 5, 509}, { 0, 15, 5,32765}, 290a0483764SConrad Meyer { 0, 21, 5,2097149}, { 0, 3, 5, 5}, 291a0483764SConrad Meyer { 0, 7, 4, 125}, { 0, 12, 5, 4093}, 292a0483764SConrad Meyer { 0, 18, 5,262141}, { 0, 23, 5,8388605}, 293a0483764SConrad Meyer { 0, 5, 5, 29}, { 0, 8, 4, 253}, 294a0483764SConrad Meyer { 0, 14, 5,16381}, { 0, 20, 5,1048573}, 295a0483764SConrad Meyer { 0, 2, 5, 1}, { 16, 7, 4, 125}, 296a0483764SConrad Meyer { 0, 11, 5, 2045}, { 0, 17, 5,131069}, 297a0483764SConrad Meyer { 0, 22, 5,4194301}, { 0, 4, 5, 13}, 298a0483764SConrad Meyer { 16, 8, 4, 253}, { 0, 13, 5, 8189}, 299a0483764SConrad Meyer { 0, 19, 5,524285}, { 0, 1, 5, 1}, 300a0483764SConrad Meyer { 16, 6, 4, 61}, { 0, 10, 5, 1021}, 301a0483764SConrad Meyer { 0, 16, 5,65533}, { 0, 28, 5,268435453}, 302a0483764SConrad Meyer { 0, 27, 5,134217725}, { 0, 26, 5,67108861}, 303a0483764SConrad Meyer { 0, 25, 5,33554429}, { 0, 24, 5,16777213}, 304a0483764SConrad Meyer }; /* OF_defaultDTable */ 305a0483764SConrad Meyer 306a0483764SConrad Meyer 307a0483764SConrad Meyer /* Default FSE distribution table for Match Lengths */ 308a0483764SConrad Meyer static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = { 309a0483764SConrad Meyer { 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */ 310a0483764SConrad Meyer /* nextState, nbAddBits, nbBits, baseVal */ 311a0483764SConrad Meyer { 0, 0, 6, 3}, { 0, 0, 4, 4}, 312a0483764SConrad Meyer { 32, 0, 5, 5}, { 0, 0, 5, 6}, 313a0483764SConrad Meyer { 0, 0, 5, 8}, { 0, 0, 5, 9}, 314a0483764SConrad Meyer { 0, 0, 5, 11}, { 0, 0, 6, 13}, 315a0483764SConrad Meyer { 0, 0, 6, 16}, { 0, 0, 6, 19}, 316a0483764SConrad Meyer { 0, 0, 6, 22}, { 0, 0, 6, 25}, 317a0483764SConrad Meyer { 0, 0, 6, 28}, { 0, 0, 6, 31}, 318a0483764SConrad Meyer { 0, 0, 6, 34}, { 0, 1, 6, 37}, 319a0483764SConrad Meyer { 0, 1, 6, 41}, { 0, 2, 6, 47}, 320a0483764SConrad Meyer { 0, 3, 6, 59}, { 0, 4, 6, 83}, 321a0483764SConrad Meyer { 0, 7, 6, 131}, { 0, 9, 6, 515}, 322a0483764SConrad Meyer { 16, 0, 4, 4}, { 0, 0, 4, 5}, 323a0483764SConrad Meyer { 32, 0, 5, 6}, { 0, 0, 5, 7}, 324a0483764SConrad Meyer { 32, 0, 5, 9}, { 0, 0, 5, 10}, 325a0483764SConrad Meyer { 0, 0, 6, 12}, { 0, 0, 6, 15}, 326a0483764SConrad Meyer { 0, 0, 6, 18}, { 0, 0, 6, 21}, 327a0483764SConrad Meyer { 0, 0, 6, 24}, { 0, 0, 6, 27}, 328a0483764SConrad Meyer { 0, 0, 6, 30}, { 0, 0, 6, 33}, 329a0483764SConrad Meyer { 0, 1, 6, 35}, { 0, 1, 6, 39}, 330a0483764SConrad Meyer { 0, 2, 6, 43}, { 0, 3, 6, 51}, 331a0483764SConrad Meyer { 0, 4, 6, 67}, { 0, 5, 6, 99}, 332a0483764SConrad Meyer { 0, 8, 6, 259}, { 32, 0, 4, 4}, 333a0483764SConrad Meyer { 48, 0, 4, 4}, { 16, 0, 4, 5}, 334a0483764SConrad Meyer { 32, 0, 5, 7}, { 32, 0, 5, 8}, 335a0483764SConrad Meyer { 32, 0, 5, 10}, { 32, 0, 5, 11}, 336a0483764SConrad Meyer { 0, 0, 6, 14}, { 0, 0, 6, 17}, 337a0483764SConrad Meyer { 0, 0, 6, 20}, { 0, 0, 6, 23}, 338a0483764SConrad Meyer { 0, 0, 6, 26}, { 0, 0, 6, 29}, 339a0483764SConrad Meyer { 0, 0, 6, 32}, { 0, 16, 6,65539}, 340a0483764SConrad Meyer { 0, 15, 6,32771}, { 0, 14, 6,16387}, 341a0483764SConrad Meyer { 0, 13, 6, 8195}, { 0, 12, 6, 4099}, 342a0483764SConrad Meyer { 0, 11, 6, 2051}, { 0, 10, 6, 1027}, 343a0483764SConrad Meyer }; /* ML_defaultDTable */ 344a0483764SConrad Meyer 345a0483764SConrad Meyer 346a0483764SConrad Meyer static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits) 347a0483764SConrad Meyer { 348a0483764SConrad Meyer void* ptr = dt; 349a0483764SConrad Meyer ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr; 350a0483764SConrad Meyer ZSTD_seqSymbol* const cell = dt + 1; 351a0483764SConrad Meyer 352a0483764SConrad Meyer DTableH->tableLog = 0; 353a0483764SConrad Meyer DTableH->fastMode = 0; 354a0483764SConrad Meyer 355a0483764SConrad Meyer cell->nbBits = 0; 356a0483764SConrad Meyer cell->nextState = 0; 357a0483764SConrad Meyer assert(nbAddBits < 255); 358a0483764SConrad Meyer cell->nbAdditionalBits = (BYTE)nbAddBits; 359a0483764SConrad Meyer cell->baseValue = baseValue; 360a0483764SConrad Meyer } 361a0483764SConrad Meyer 362a0483764SConrad Meyer 363a0483764SConrad Meyer /* ZSTD_buildFSETable() : 364a0483764SConrad Meyer * generate FSE decoding table for one symbol (ll, ml or off) 365a0483764SConrad Meyer * cannot fail if input is valid => 366a0483764SConrad Meyer * all inputs are presumed validated at this stage */ 367a0483764SConrad Meyer void 368a0483764SConrad Meyer ZSTD_buildFSETable(ZSTD_seqSymbol* dt, 369a0483764SConrad Meyer const short* normalizedCounter, unsigned maxSymbolValue, 370a0483764SConrad Meyer const U32* baseValue, const U32* nbAdditionalBits, 371a0483764SConrad Meyer unsigned tableLog) 372a0483764SConrad Meyer { 373a0483764SConrad Meyer ZSTD_seqSymbol* const tableDecode = dt+1; 374a0483764SConrad Meyer U16 symbolNext[MaxSeq+1]; 375a0483764SConrad Meyer 376a0483764SConrad Meyer U32 const maxSV1 = maxSymbolValue + 1; 377a0483764SConrad Meyer U32 const tableSize = 1 << tableLog; 378a0483764SConrad Meyer U32 highThreshold = tableSize-1; 379a0483764SConrad Meyer 380a0483764SConrad Meyer /* Sanity Checks */ 381a0483764SConrad Meyer assert(maxSymbolValue <= MaxSeq); 382a0483764SConrad Meyer assert(tableLog <= MaxFSELog); 383a0483764SConrad Meyer 384a0483764SConrad Meyer /* Init, lay down lowprob symbols */ 385a0483764SConrad Meyer { ZSTD_seqSymbol_header DTableH; 386a0483764SConrad Meyer DTableH.tableLog = tableLog; 387a0483764SConrad Meyer DTableH.fastMode = 1; 388a0483764SConrad Meyer { S16 const largeLimit= (S16)(1 << (tableLog-1)); 389a0483764SConrad Meyer U32 s; 390a0483764SConrad Meyer for (s=0; s<maxSV1; s++) { 391a0483764SConrad Meyer if (normalizedCounter[s]==-1) { 392a0483764SConrad Meyer tableDecode[highThreshold--].baseValue = s; 393a0483764SConrad Meyer symbolNext[s] = 1; 394a0483764SConrad Meyer } else { 395a0483764SConrad Meyer if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0; 3969cbefe25SConrad Meyer assert(normalizedCounter[s]>=0); 3979cbefe25SConrad Meyer symbolNext[s] = (U16)normalizedCounter[s]; 398a0483764SConrad Meyer } } } 399a0483764SConrad Meyer memcpy(dt, &DTableH, sizeof(DTableH)); 400a0483764SConrad Meyer } 401a0483764SConrad Meyer 402a0483764SConrad Meyer /* Spread symbols */ 403a0483764SConrad Meyer { U32 const tableMask = tableSize-1; 404a0483764SConrad Meyer U32 const step = FSE_TABLESTEP(tableSize); 405a0483764SConrad Meyer U32 s, position = 0; 406a0483764SConrad Meyer for (s=0; s<maxSV1; s++) { 407a0483764SConrad Meyer int i; 408a0483764SConrad Meyer for (i=0; i<normalizedCounter[s]; i++) { 409a0483764SConrad Meyer tableDecode[position].baseValue = s; 410a0483764SConrad Meyer position = (position + step) & tableMask; 411a0483764SConrad Meyer while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */ 412a0483764SConrad Meyer } } 413a0483764SConrad Meyer assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ 414a0483764SConrad Meyer } 415a0483764SConrad Meyer 416a0483764SConrad Meyer /* Build Decoding table */ 417a0483764SConrad Meyer { U32 u; 418a0483764SConrad Meyer for (u=0; u<tableSize; u++) { 419a0483764SConrad Meyer U32 const symbol = tableDecode[u].baseValue; 420a0483764SConrad Meyer U32 const nextState = symbolNext[symbol]++; 421a0483764SConrad Meyer tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) ); 422a0483764SConrad Meyer tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize); 423a0483764SConrad Meyer assert(nbAdditionalBits[symbol] < 255); 424a0483764SConrad Meyer tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol]; 425a0483764SConrad Meyer tableDecode[u].baseValue = baseValue[symbol]; 426a0483764SConrad Meyer } } 427a0483764SConrad Meyer } 428a0483764SConrad Meyer 429a0483764SConrad Meyer 430a0483764SConrad Meyer /*! ZSTD_buildSeqTable() : 431a0483764SConrad Meyer * @return : nb bytes read from src, 432a0483764SConrad Meyer * or an error code if it fails */ 433a0483764SConrad Meyer static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr, 434a0483764SConrad Meyer symbolEncodingType_e type, unsigned max, U32 maxLog, 435a0483764SConrad Meyer const void* src, size_t srcSize, 436a0483764SConrad Meyer const U32* baseValue, const U32* nbAdditionalBits, 437a0483764SConrad Meyer const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable, 438a0483764SConrad Meyer int ddictIsCold, int nbSeq) 439a0483764SConrad Meyer { 440a0483764SConrad Meyer switch(type) 441a0483764SConrad Meyer { 442a0483764SConrad Meyer case set_rle : 443*37f1f268SConrad Meyer RETURN_ERROR_IF(!srcSize, srcSize_wrong, ""); 444*37f1f268SConrad Meyer RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, ""); 445a0483764SConrad Meyer { U32 const symbol = *(const BYTE*)src; 446a0483764SConrad Meyer U32 const baseline = baseValue[symbol]; 447a0483764SConrad Meyer U32 const nbBits = nbAdditionalBits[symbol]; 448a0483764SConrad Meyer ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits); 449a0483764SConrad Meyer } 450a0483764SConrad Meyer *DTablePtr = DTableSpace; 451a0483764SConrad Meyer return 1; 452a0483764SConrad Meyer case set_basic : 453a0483764SConrad Meyer *DTablePtr = defaultTable; 454a0483764SConrad Meyer return 0; 455a0483764SConrad Meyer case set_repeat: 456*37f1f268SConrad Meyer RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, ""); 457a0483764SConrad Meyer /* prefetch FSE table if used */ 458a0483764SConrad Meyer if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { 459a0483764SConrad Meyer const void* const pStart = *DTablePtr; 460a0483764SConrad Meyer size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog)); 461a0483764SConrad Meyer PREFETCH_AREA(pStart, pSize); 462a0483764SConrad Meyer } 463a0483764SConrad Meyer return 0; 464a0483764SConrad Meyer case set_compressed : 465a0483764SConrad Meyer { unsigned tableLog; 466a0483764SConrad Meyer S16 norm[MaxSeq+1]; 467a0483764SConrad Meyer size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); 468*37f1f268SConrad Meyer RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, ""); 469*37f1f268SConrad Meyer RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, ""); 470a0483764SConrad Meyer ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog); 471a0483764SConrad Meyer *DTablePtr = DTableSpace; 472a0483764SConrad Meyer return headerSize; 473a0483764SConrad Meyer } 4742b9c00cbSConrad Meyer default : 475a0483764SConrad Meyer assert(0); 4762b9c00cbSConrad Meyer RETURN_ERROR(GENERIC, "impossible"); 477a0483764SConrad Meyer } 478a0483764SConrad Meyer } 479a0483764SConrad Meyer 480a0483764SConrad Meyer size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, 481a0483764SConrad Meyer const void* src, size_t srcSize) 482a0483764SConrad Meyer { 483a0483764SConrad Meyer const BYTE* const istart = (const BYTE* const)src; 484a0483764SConrad Meyer const BYTE* const iend = istart + srcSize; 485a0483764SConrad Meyer const BYTE* ip = istart; 486a0483764SConrad Meyer int nbSeq; 487a0483764SConrad Meyer DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); 488a0483764SConrad Meyer 489a0483764SConrad Meyer /* check */ 490*37f1f268SConrad Meyer RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, ""); 491a0483764SConrad Meyer 492a0483764SConrad Meyer /* SeqHead */ 493a0483764SConrad Meyer nbSeq = *ip++; 494a0483764SConrad Meyer if (!nbSeq) { 495a0483764SConrad Meyer *nbSeqPtr=0; 496*37f1f268SConrad Meyer RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, ""); 497a0483764SConrad Meyer return 1; 498a0483764SConrad Meyer } 499a0483764SConrad Meyer if (nbSeq > 0x7F) { 500a0483764SConrad Meyer if (nbSeq == 0xFF) { 501*37f1f268SConrad Meyer RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); 502a0483764SConrad Meyer nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; 503a0483764SConrad Meyer } else { 504*37f1f268SConrad Meyer RETURN_ERROR_IF(ip >= iend, srcSize_wrong, ""); 505a0483764SConrad Meyer nbSeq = ((nbSeq-0x80)<<8) + *ip++; 506a0483764SConrad Meyer } 507a0483764SConrad Meyer } 508a0483764SConrad Meyer *nbSeqPtr = nbSeq; 509a0483764SConrad Meyer 510a0483764SConrad Meyer /* FSE table descriptors */ 511*37f1f268SConrad Meyer RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ 512a0483764SConrad Meyer { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); 513a0483764SConrad Meyer symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); 514a0483764SConrad Meyer symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); 515a0483764SConrad Meyer ip++; 516a0483764SConrad Meyer 517a0483764SConrad Meyer /* Build DTables */ 518a0483764SConrad Meyer { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, 519a0483764SConrad Meyer LLtype, MaxLL, LLFSELog, 520a0483764SConrad Meyer ip, iend-ip, 521a0483764SConrad Meyer LL_base, LL_bits, 522a0483764SConrad Meyer LL_defaultDTable, dctx->fseEntropy, 523a0483764SConrad Meyer dctx->ddictIsCold, nbSeq); 524*37f1f268SConrad Meyer RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); 525a0483764SConrad Meyer ip += llhSize; 526a0483764SConrad Meyer } 527a0483764SConrad Meyer 528a0483764SConrad Meyer { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, 529a0483764SConrad Meyer OFtype, MaxOff, OffFSELog, 530a0483764SConrad Meyer ip, iend-ip, 531a0483764SConrad Meyer OF_base, OF_bits, 532a0483764SConrad Meyer OF_defaultDTable, dctx->fseEntropy, 533a0483764SConrad Meyer dctx->ddictIsCold, nbSeq); 534*37f1f268SConrad Meyer RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); 535a0483764SConrad Meyer ip += ofhSize; 536a0483764SConrad Meyer } 537a0483764SConrad Meyer 538a0483764SConrad Meyer { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, 539a0483764SConrad Meyer MLtype, MaxML, MLFSELog, 540a0483764SConrad Meyer ip, iend-ip, 541a0483764SConrad Meyer ML_base, ML_bits, 542a0483764SConrad Meyer ML_defaultDTable, dctx->fseEntropy, 543a0483764SConrad Meyer dctx->ddictIsCold, nbSeq); 544*37f1f268SConrad Meyer RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); 545a0483764SConrad Meyer ip += mlhSize; 546a0483764SConrad Meyer } 547a0483764SConrad Meyer } 548a0483764SConrad Meyer 549a0483764SConrad Meyer return ip-istart; 550a0483764SConrad Meyer } 551a0483764SConrad Meyer 552a0483764SConrad Meyer 553a0483764SConrad Meyer typedef struct { 554a0483764SConrad Meyer size_t litLength; 555a0483764SConrad Meyer size_t matchLength; 556a0483764SConrad Meyer size_t offset; 557a0483764SConrad Meyer const BYTE* match; 558a0483764SConrad Meyer } seq_t; 559a0483764SConrad Meyer 560a0483764SConrad Meyer typedef struct { 561a0483764SConrad Meyer size_t state; 562a0483764SConrad Meyer const ZSTD_seqSymbol* table; 563a0483764SConrad Meyer } ZSTD_fseState; 564a0483764SConrad Meyer 565a0483764SConrad Meyer typedef struct { 566a0483764SConrad Meyer BIT_DStream_t DStream; 567a0483764SConrad Meyer ZSTD_fseState stateLL; 568a0483764SConrad Meyer ZSTD_fseState stateOffb; 569a0483764SConrad Meyer ZSTD_fseState stateML; 570a0483764SConrad Meyer size_t prevOffset[ZSTD_REP_NUM]; 571a0483764SConrad Meyer const BYTE* prefixStart; 572a0483764SConrad Meyer const BYTE* dictEnd; 573a0483764SConrad Meyer size_t pos; 574a0483764SConrad Meyer } seqState_t; 575a0483764SConrad Meyer 5769cbefe25SConrad Meyer /*! ZSTD_overlapCopy8() : 5779cbefe25SConrad Meyer * Copies 8 bytes from ip to op and updates op and ip where ip <= op. 5789cbefe25SConrad Meyer * If the offset is < 8 then the offset is spread to at least 8 bytes. 5799cbefe25SConrad Meyer * 5809cbefe25SConrad Meyer * Precondition: *ip <= *op 5819cbefe25SConrad Meyer * Postcondition: *op - *op >= 8 5829cbefe25SConrad Meyer */ 583*37f1f268SConrad Meyer HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { 5849cbefe25SConrad Meyer assert(*ip <= *op); 5859cbefe25SConrad Meyer if (offset < 8) { 5869cbefe25SConrad Meyer /* close range match, overlap */ 5879cbefe25SConrad Meyer static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ 5889cbefe25SConrad Meyer static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ 5899cbefe25SConrad Meyer int const sub2 = dec64table[offset]; 5909cbefe25SConrad Meyer (*op)[0] = (*ip)[0]; 5919cbefe25SConrad Meyer (*op)[1] = (*ip)[1]; 5929cbefe25SConrad Meyer (*op)[2] = (*ip)[2]; 5939cbefe25SConrad Meyer (*op)[3] = (*ip)[3]; 5949cbefe25SConrad Meyer *ip += dec32table[offset]; 5959cbefe25SConrad Meyer ZSTD_copy4(*op+4, *ip); 5969cbefe25SConrad Meyer *ip -= sub2; 5979cbefe25SConrad Meyer } else { 5989cbefe25SConrad Meyer ZSTD_copy8(*op, *ip); 5999cbefe25SConrad Meyer } 6009cbefe25SConrad Meyer *ip += 8; 6019cbefe25SConrad Meyer *op += 8; 6029cbefe25SConrad Meyer assert(*op - *ip >= 8); 6039cbefe25SConrad Meyer } 604a0483764SConrad Meyer 6059cbefe25SConrad Meyer /*! ZSTD_safecopy() : 6069cbefe25SConrad Meyer * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer 6079cbefe25SConrad Meyer * and write up to 16 bytes past oend_w (op >= oend_w is allowed). 6089cbefe25SConrad Meyer * This function is only called in the uncommon case where the sequence is near the end of the block. It 6099cbefe25SConrad Meyer * should be fast for a single long sequence, but can be slow for several short sequences. 6109cbefe25SConrad Meyer * 6119cbefe25SConrad Meyer * @param ovtype controls the overlap detection 6129cbefe25SConrad Meyer * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. 6139cbefe25SConrad Meyer * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart. 6149cbefe25SConrad Meyer * The src buffer must be before the dst buffer. 6159cbefe25SConrad Meyer */ 6169cbefe25SConrad Meyer static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) { 6179cbefe25SConrad Meyer ptrdiff_t const diff = op - ip; 6189cbefe25SConrad Meyer BYTE* const oend = op + length; 6199cbefe25SConrad Meyer 6209cbefe25SConrad Meyer assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) || 6219cbefe25SConrad Meyer (ovtype == ZSTD_overlap_src_before_dst && diff >= 0)); 6229cbefe25SConrad Meyer 6239cbefe25SConrad Meyer if (length < 8) { 6249cbefe25SConrad Meyer /* Handle short lengths. */ 6259cbefe25SConrad Meyer while (op < oend) *op++ = *ip++; 6269cbefe25SConrad Meyer return; 6279cbefe25SConrad Meyer } 6289cbefe25SConrad Meyer if (ovtype == ZSTD_overlap_src_before_dst) { 6299cbefe25SConrad Meyer /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */ 6309cbefe25SConrad Meyer assert(length >= 8); 6319cbefe25SConrad Meyer ZSTD_overlapCopy8(&op, &ip, diff); 6329cbefe25SConrad Meyer assert(op - ip >= 8); 6339cbefe25SConrad Meyer assert(op <= oend); 6349cbefe25SConrad Meyer } 6359cbefe25SConrad Meyer 6369cbefe25SConrad Meyer if (oend <= oend_w) { 6379cbefe25SConrad Meyer /* No risk of overwrite. */ 6389cbefe25SConrad Meyer ZSTD_wildcopy(op, ip, length, ovtype); 6399cbefe25SConrad Meyer return; 6409cbefe25SConrad Meyer } 6419cbefe25SConrad Meyer if (op <= oend_w) { 6429cbefe25SConrad Meyer /* Wildcopy until we get close to the end. */ 6439cbefe25SConrad Meyer assert(oend > oend_w); 6449cbefe25SConrad Meyer ZSTD_wildcopy(op, ip, oend_w - op, ovtype); 6459cbefe25SConrad Meyer ip += oend_w - op; 6469cbefe25SConrad Meyer op = oend_w; 6479cbefe25SConrad Meyer } 6489cbefe25SConrad Meyer /* Handle the leftovers. */ 6499cbefe25SConrad Meyer while (op < oend) *op++ = *ip++; 6509cbefe25SConrad Meyer } 6519cbefe25SConrad Meyer 6529cbefe25SConrad Meyer /* ZSTD_execSequenceEnd(): 6539cbefe25SConrad Meyer * This version handles cases that are near the end of the output buffer. It requires 6549cbefe25SConrad Meyer * more careful checks to make sure there is no overflow. By separating out these hard 6559cbefe25SConrad Meyer * and unlikely cases, we can speed up the common cases. 6569cbefe25SConrad Meyer * 6579cbefe25SConrad Meyer * NOTE: This function needs to be fast for a single long sequence, but doesn't need 6589cbefe25SConrad Meyer * to be optimized for many small sequences, since those fall into ZSTD_execSequence(). 6599cbefe25SConrad Meyer */ 660a0483764SConrad Meyer FORCE_NOINLINE 6619cbefe25SConrad Meyer size_t ZSTD_execSequenceEnd(BYTE* op, 662a0483764SConrad Meyer BYTE* const oend, seq_t sequence, 663a0483764SConrad Meyer const BYTE** litPtr, const BYTE* const litLimit, 6649cbefe25SConrad Meyer const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) 665a0483764SConrad Meyer { 666a0483764SConrad Meyer BYTE* const oLitEnd = op + sequence.litLength; 667a0483764SConrad Meyer size_t const sequenceLength = sequence.litLength + sequence.matchLength; 668a0483764SConrad Meyer const BYTE* const iLitEnd = *litPtr + sequence.litLength; 669a0483764SConrad Meyer const BYTE* match = oLitEnd - sequence.offset; 6709cbefe25SConrad Meyer BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; 671a0483764SConrad Meyer 672*37f1f268SConrad Meyer /* bounds checks : careful of address space overflow in 32-bit mode */ 673*37f1f268SConrad Meyer RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); 674*37f1f268SConrad Meyer RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); 675*37f1f268SConrad Meyer assert(op < op + sequenceLength); 676*37f1f268SConrad Meyer assert(oLitEnd < op + sequenceLength); 677a0483764SConrad Meyer 678a0483764SConrad Meyer /* copy literals */ 6799cbefe25SConrad Meyer ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap); 6809cbefe25SConrad Meyer op = oLitEnd; 6819cbefe25SConrad Meyer *litPtr = iLitEnd; 682a0483764SConrad Meyer 683a0483764SConrad Meyer /* copy Match */ 6849cbefe25SConrad Meyer if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { 685a0483764SConrad Meyer /* offset beyond prefix */ 686*37f1f268SConrad Meyer RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); 6879cbefe25SConrad Meyer match = dictEnd - (prefixStart-match); 688a0483764SConrad Meyer if (match + sequence.matchLength <= dictEnd) { 689a0483764SConrad Meyer memmove(oLitEnd, match, sequence.matchLength); 690a0483764SConrad Meyer return sequenceLength; 691a0483764SConrad Meyer } 692a0483764SConrad Meyer /* span extDict & currentPrefixSegment */ 693a0483764SConrad Meyer { size_t const length1 = dictEnd - match; 694a0483764SConrad Meyer memmove(oLitEnd, match, length1); 695a0483764SConrad Meyer op = oLitEnd + length1; 696a0483764SConrad Meyer sequence.matchLength -= length1; 6979cbefe25SConrad Meyer match = prefixStart; 698a0483764SConrad Meyer } } 6999cbefe25SConrad Meyer ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); 700a0483764SConrad Meyer return sequenceLength; 701a0483764SConrad Meyer } 702a0483764SConrad Meyer 703a0483764SConrad Meyer HINT_INLINE 704a0483764SConrad Meyer size_t ZSTD_execSequence(BYTE* op, 705a0483764SConrad Meyer BYTE* const oend, seq_t sequence, 706a0483764SConrad Meyer const BYTE** litPtr, const BYTE* const litLimit, 707a0483764SConrad Meyer const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) 708a0483764SConrad Meyer { 709a0483764SConrad Meyer BYTE* const oLitEnd = op + sequence.litLength; 710a0483764SConrad Meyer size_t const sequenceLength = sequence.litLength + sequence.matchLength; 711a0483764SConrad Meyer BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ 712*37f1f268SConrad Meyer BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */ 713a0483764SConrad Meyer const BYTE* const iLitEnd = *litPtr + sequence.litLength; 714a0483764SConrad Meyer const BYTE* match = oLitEnd - sequence.offset; 715a0483764SConrad Meyer 716*37f1f268SConrad Meyer assert(op != NULL /* Precondition */); 717*37f1f268SConrad Meyer assert(oend_w < oend /* No underflow */); 718*37f1f268SConrad Meyer /* Handle edge cases in a slow path: 719*37f1f268SConrad Meyer * - Read beyond end of literals 720*37f1f268SConrad Meyer * - Match end is within WILDCOPY_OVERLIMIT of oend 721*37f1f268SConrad Meyer * - 32-bit mode and the match length overflows 722*37f1f268SConrad Meyer */ 723*37f1f268SConrad Meyer if (UNLIKELY( 724*37f1f268SConrad Meyer iLitEnd > litLimit || 725*37f1f268SConrad Meyer oMatchEnd > oend_w || 726*37f1f268SConrad Meyer (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) 7279cbefe25SConrad Meyer return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); 728a0483764SConrad Meyer 7299cbefe25SConrad Meyer /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ 730*37f1f268SConrad Meyer assert(op <= oLitEnd /* No overflow */); 731*37f1f268SConrad Meyer assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */); 732*37f1f268SConrad Meyer assert(oMatchEnd <= oend /* No underflow */); 7339cbefe25SConrad Meyer assert(iLitEnd <= litLimit /* Literal length is in bounds */); 7349cbefe25SConrad Meyer assert(oLitEnd <= oend_w /* Can wildcopy literals */); 7359cbefe25SConrad Meyer assert(oMatchEnd <= oend_w /* Can wildcopy matches */); 7369cbefe25SConrad Meyer 7379cbefe25SConrad Meyer /* Copy Literals: 7389cbefe25SConrad Meyer * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9. 7399cbefe25SConrad Meyer * We likely don't need the full 32-byte wildcopy. 7409cbefe25SConrad Meyer */ 7419cbefe25SConrad Meyer assert(WILDCOPY_OVERLENGTH >= 16); 7429cbefe25SConrad Meyer ZSTD_copy16(op, (*litPtr)); 743*37f1f268SConrad Meyer if (UNLIKELY(sequence.litLength > 16)) { 7449cbefe25SConrad Meyer ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap); 7459cbefe25SConrad Meyer } 746a0483764SConrad Meyer op = oLitEnd; 747a0483764SConrad Meyer *litPtr = iLitEnd; /* update for next sequence */ 748a0483764SConrad Meyer 7499cbefe25SConrad Meyer /* Copy Match */ 750a0483764SConrad Meyer if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { 751a0483764SConrad Meyer /* offset beyond prefix -> go into extDict */ 752*37f1f268SConrad Meyer RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); 753a0483764SConrad Meyer match = dictEnd + (match - prefixStart); 754a0483764SConrad Meyer if (match + sequence.matchLength <= dictEnd) { 755a0483764SConrad Meyer memmove(oLitEnd, match, sequence.matchLength); 756a0483764SConrad Meyer return sequenceLength; 757a0483764SConrad Meyer } 758a0483764SConrad Meyer /* span extDict & currentPrefixSegment */ 759a0483764SConrad Meyer { size_t const length1 = dictEnd - match; 760a0483764SConrad Meyer memmove(oLitEnd, match, length1); 761a0483764SConrad Meyer op = oLitEnd + length1; 762a0483764SConrad Meyer sequence.matchLength -= length1; 763a0483764SConrad Meyer match = prefixStart; 764a0483764SConrad Meyer } } 7659cbefe25SConrad Meyer /* Match within prefix of 1 or more bytes */ 7669cbefe25SConrad Meyer assert(op <= oMatchEnd); 7679cbefe25SConrad Meyer assert(oMatchEnd <= oend_w); 7689cbefe25SConrad Meyer assert(match >= prefixStart); 7699cbefe25SConrad Meyer assert(sequence.matchLength >= 1); 770a0483764SConrad Meyer 7719cbefe25SConrad Meyer /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy 7729cbefe25SConrad Meyer * without overlap checking. 7739cbefe25SConrad Meyer */ 774*37f1f268SConrad Meyer if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) { 7759cbefe25SConrad Meyer /* We bet on a full wildcopy for matches, since we expect matches to be 7769cbefe25SConrad Meyer * longer than literals (in general). In silesia, ~10% of matches are longer 7779cbefe25SConrad Meyer * than 16 bytes. 7789cbefe25SConrad Meyer */ 7799cbefe25SConrad Meyer ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap); 780a0483764SConrad Meyer return sequenceLength; 781a0483764SConrad Meyer } 7829cbefe25SConrad Meyer assert(sequence.offset < WILDCOPY_VECLEN); 783a0483764SConrad Meyer 7849cbefe25SConrad Meyer /* Copy 8 bytes and spread the offset to be >= 8. */ 7859cbefe25SConrad Meyer ZSTD_overlapCopy8(&op, &match, sequence.offset); 786a0483764SConrad Meyer 7879cbefe25SConrad Meyer /* If the match length is > 8 bytes, then continue with the wildcopy. */ 7889cbefe25SConrad Meyer if (sequence.matchLength > 8) { 7899cbefe25SConrad Meyer assert(op < oMatchEnd); 7909cbefe25SConrad Meyer ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); 791a0483764SConrad Meyer } 792a0483764SConrad Meyer return sequenceLength; 793a0483764SConrad Meyer } 794a0483764SConrad Meyer 795a0483764SConrad Meyer static void 796a0483764SConrad Meyer ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) 797a0483764SConrad Meyer { 798a0483764SConrad Meyer const void* ptr = dt; 799a0483764SConrad Meyer const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr; 800a0483764SConrad Meyer DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); 801a0483764SConrad Meyer DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits", 802a0483764SConrad Meyer (U32)DStatePtr->state, DTableH->tableLog); 803a0483764SConrad Meyer BIT_reloadDStream(bitD); 804a0483764SConrad Meyer DStatePtr->table = dt + 1; 805a0483764SConrad Meyer } 806a0483764SConrad Meyer 807a0483764SConrad Meyer FORCE_INLINE_TEMPLATE void 808a0483764SConrad Meyer ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) 809a0483764SConrad Meyer { 810a0483764SConrad Meyer ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state]; 811a0483764SConrad Meyer U32 const nbBits = DInfo.nbBits; 812a0483764SConrad Meyer size_t const lowBits = BIT_readBits(bitD, nbBits); 813a0483764SConrad Meyer DStatePtr->state = DInfo.nextState + lowBits; 814a0483764SConrad Meyer } 815a0483764SConrad Meyer 816*37f1f268SConrad Meyer FORCE_INLINE_TEMPLATE void 817*37f1f268SConrad Meyer ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo) 818*37f1f268SConrad Meyer { 819*37f1f268SConrad Meyer U32 const nbBits = DInfo.nbBits; 820*37f1f268SConrad Meyer size_t const lowBits = BIT_readBits(bitD, nbBits); 821*37f1f268SConrad Meyer DStatePtr->state = DInfo.nextState + lowBits; 822*37f1f268SConrad Meyer } 823*37f1f268SConrad Meyer 824a0483764SConrad Meyer /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum 825a0483764SConrad Meyer * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) 826a0483764SConrad Meyer * bits before reloading. This value is the maximum number of bytes we read 8272b9c00cbSConrad Meyer * after reloading when we are decoding long offsets. 828a0483764SConrad Meyer */ 829a0483764SConrad Meyer #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ 830a0483764SConrad Meyer (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ 831a0483764SConrad Meyer ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \ 832a0483764SConrad Meyer : 0) 833a0483764SConrad Meyer 834a0483764SConrad Meyer typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; 835*37f1f268SConrad Meyer typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e; 836a0483764SConrad Meyer 837a0483764SConrad Meyer FORCE_INLINE_TEMPLATE seq_t 838*37f1f268SConrad Meyer ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch) 839a0483764SConrad Meyer { 840a0483764SConrad Meyer seq_t seq; 841*37f1f268SConrad Meyer ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state]; 842*37f1f268SConrad Meyer ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state]; 843*37f1f268SConrad Meyer ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state]; 844*37f1f268SConrad Meyer U32 const llBase = llDInfo.baseValue; 845*37f1f268SConrad Meyer U32 const mlBase = mlDInfo.baseValue; 846*37f1f268SConrad Meyer U32 const ofBase = ofDInfo.baseValue; 847*37f1f268SConrad Meyer BYTE const llBits = llDInfo.nbAdditionalBits; 848*37f1f268SConrad Meyer BYTE const mlBits = mlDInfo.nbAdditionalBits; 849*37f1f268SConrad Meyer BYTE const ofBits = ofDInfo.nbAdditionalBits; 850*37f1f268SConrad Meyer BYTE const totalBits = llBits+mlBits+ofBits; 851a0483764SConrad Meyer 852a0483764SConrad Meyer /* sequence */ 853a0483764SConrad Meyer { size_t offset; 854*37f1f268SConrad Meyer if (ofBits > 1) { 855a0483764SConrad Meyer ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); 856a0483764SConrad Meyer ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); 857a0483764SConrad Meyer assert(ofBits <= MaxOff); 858a0483764SConrad Meyer if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { 859a0483764SConrad Meyer U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); 860a0483764SConrad Meyer offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); 861a0483764SConrad Meyer BIT_reloadDStream(&seqState->DStream); 862a0483764SConrad Meyer if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); 863a0483764SConrad Meyer assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ 864a0483764SConrad Meyer } else { 865a0483764SConrad Meyer offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ 866a0483764SConrad Meyer if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); 867a0483764SConrad Meyer } 868*37f1f268SConrad Meyer seqState->prevOffset[2] = seqState->prevOffset[1]; 869*37f1f268SConrad Meyer seqState->prevOffset[1] = seqState->prevOffset[0]; 870*37f1f268SConrad Meyer seqState->prevOffset[0] = offset; 871*37f1f268SConrad Meyer } else { 872*37f1f268SConrad Meyer U32 const ll0 = (llBase == 0); 873*37f1f268SConrad Meyer if (LIKELY((ofBits == 0))) { 874*37f1f268SConrad Meyer if (LIKELY(!ll0)) 875*37f1f268SConrad Meyer offset = seqState->prevOffset[0]; 876*37f1f268SConrad Meyer else { 877*37f1f268SConrad Meyer offset = seqState->prevOffset[1]; 878*37f1f268SConrad Meyer seqState->prevOffset[1] = seqState->prevOffset[0]; 879*37f1f268SConrad Meyer seqState->prevOffset[0] = offset; 880a0483764SConrad Meyer } 881*37f1f268SConrad Meyer } else { 882*37f1f268SConrad Meyer offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); 883*37f1f268SConrad Meyer { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; 884a0483764SConrad Meyer temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ 885a0483764SConrad Meyer if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; 886a0483764SConrad Meyer seqState->prevOffset[1] = seqState->prevOffset[0]; 887a0483764SConrad Meyer seqState->prevOffset[0] = offset = temp; 888*37f1f268SConrad Meyer } } } 889a0483764SConrad Meyer seq.offset = offset; 890a0483764SConrad Meyer } 891a0483764SConrad Meyer 892*37f1f268SConrad Meyer seq.matchLength = mlBase; 893*37f1f268SConrad Meyer if (mlBits > 0) 894*37f1f268SConrad Meyer seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); 895*37f1f268SConrad Meyer 896a0483764SConrad Meyer if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) 897a0483764SConrad Meyer BIT_reloadDStream(&seqState->DStream); 898*37f1f268SConrad Meyer if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) 899a0483764SConrad Meyer BIT_reloadDStream(&seqState->DStream); 900a0483764SConrad Meyer /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ 901a0483764SConrad Meyer ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); 902a0483764SConrad Meyer 903*37f1f268SConrad Meyer seq.litLength = llBase; 904*37f1f268SConrad Meyer if (llBits > 0) 905*37f1f268SConrad Meyer seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); 906*37f1f268SConrad Meyer 907a0483764SConrad Meyer if (MEM_32bits()) 908a0483764SConrad Meyer BIT_reloadDStream(&seqState->DStream); 909a0483764SConrad Meyer 910a0483764SConrad Meyer DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", 911a0483764SConrad Meyer (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); 912a0483764SConrad Meyer 913*37f1f268SConrad Meyer if (prefetch == ZSTD_p_prefetch) { 914*37f1f268SConrad Meyer size_t const pos = seqState->pos + seq.litLength; 915*37f1f268SConrad Meyer const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; 916*37f1f268SConrad Meyer seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. 917*37f1f268SConrad Meyer * No consequence though : no memory access will occur, offset is only used for prefetching */ 918*37f1f268SConrad Meyer seqState->pos = pos + seq.matchLength; 919*37f1f268SConrad Meyer } 920*37f1f268SConrad Meyer 921*37f1f268SConrad Meyer /* ANS state update 922*37f1f268SConrad Meyer * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo(). 923*37f1f268SConrad Meyer * clang-9.2.0 does 7% worse with ZSTD_updateFseState(). 924*37f1f268SConrad Meyer * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the 925*37f1f268SConrad Meyer * better option, so it is the default for other compilers. But, if you 926*37f1f268SConrad Meyer * measure that it is worse, please put up a pull request. 927*37f1f268SConrad Meyer */ 928*37f1f268SConrad Meyer { 929*37f1f268SConrad Meyer #if defined(__GNUC__) && !defined(__clang__) 930*37f1f268SConrad Meyer const int kUseUpdateFseState = 1; 931*37f1f268SConrad Meyer #else 932*37f1f268SConrad Meyer const int kUseUpdateFseState = 0; 933*37f1f268SConrad Meyer #endif 934*37f1f268SConrad Meyer if (kUseUpdateFseState) { 935a0483764SConrad Meyer ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ 936a0483764SConrad Meyer ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ 937a0483764SConrad Meyer if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ 938a0483764SConrad Meyer ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ 939*37f1f268SConrad Meyer } else { 940*37f1f268SConrad Meyer ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */ 941*37f1f268SConrad Meyer ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */ 942*37f1f268SConrad Meyer if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ 943*37f1f268SConrad Meyer ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */ 944*37f1f268SConrad Meyer } 945*37f1f268SConrad Meyer } 946a0483764SConrad Meyer 947a0483764SConrad Meyer return seq; 948a0483764SConrad Meyer } 949a0483764SConrad Meyer 950*37f1f268SConrad Meyer #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 951*37f1f268SConrad Meyer static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) 952*37f1f268SConrad Meyer { 953*37f1f268SConrad Meyer size_t const windowSize = dctx->fParams.windowSize; 954*37f1f268SConrad Meyer /* No dictionary used. */ 955*37f1f268SConrad Meyer if (dctx->dictContentEndForFuzzing == NULL) return 0; 956*37f1f268SConrad Meyer /* Dictionary is our prefix. */ 957*37f1f268SConrad Meyer if (prefixStart == dctx->dictContentBeginForFuzzing) return 1; 958*37f1f268SConrad Meyer /* Dictionary is not our ext-dict. */ 959*37f1f268SConrad Meyer if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0; 960*37f1f268SConrad Meyer /* Dictionary is not within our window size. */ 961*37f1f268SConrad Meyer if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0; 962*37f1f268SConrad Meyer /* Dictionary is active. */ 963*37f1f268SConrad Meyer return 1; 964*37f1f268SConrad Meyer } 965*37f1f268SConrad Meyer 966*37f1f268SConrad Meyer MEM_STATIC void ZSTD_assertValidSequence( 967*37f1f268SConrad Meyer ZSTD_DCtx const* dctx, 968*37f1f268SConrad Meyer BYTE const* op, BYTE const* oend, 969*37f1f268SConrad Meyer seq_t const seq, 970*37f1f268SConrad Meyer BYTE const* prefixStart, BYTE const* virtualStart) 971*37f1f268SConrad Meyer { 972*37f1f268SConrad Meyer size_t const windowSize = dctx->fParams.windowSize; 973*37f1f268SConrad Meyer size_t const sequenceSize = seq.litLength + seq.matchLength; 974*37f1f268SConrad Meyer BYTE const* const oLitEnd = op + seq.litLength; 975*37f1f268SConrad Meyer DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", 976*37f1f268SConrad Meyer (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); 977*37f1f268SConrad Meyer assert(op <= oend); 978*37f1f268SConrad Meyer assert((size_t)(oend - op) >= sequenceSize); 979*37f1f268SConrad Meyer assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); 980*37f1f268SConrad Meyer if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { 981*37f1f268SConrad Meyer size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); 982*37f1f268SConrad Meyer /* Offset must be within the dictionary. */ 983*37f1f268SConrad Meyer assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); 984*37f1f268SConrad Meyer assert(seq.offset <= windowSize + dictSize); 985*37f1f268SConrad Meyer } else { 986*37f1f268SConrad Meyer /* Offset must be within our window. */ 987*37f1f268SConrad Meyer assert(seq.offset <= windowSize); 988*37f1f268SConrad Meyer } 989*37f1f268SConrad Meyer } 990*37f1f268SConrad Meyer #endif 991*37f1f268SConrad Meyer 992*37f1f268SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG 993a0483764SConrad Meyer FORCE_INLINE_TEMPLATE size_t 9944d3f1eafSConrad Meyer DONT_VECTORIZE 995a0483764SConrad Meyer ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, 996a0483764SConrad Meyer void* dst, size_t maxDstSize, 997a0483764SConrad Meyer const void* seqStart, size_t seqSize, int nbSeq, 998*37f1f268SConrad Meyer const ZSTD_longOffset_e isLongOffset, 999*37f1f268SConrad Meyer const int frame) 1000a0483764SConrad Meyer { 1001a0483764SConrad Meyer const BYTE* ip = (const BYTE*)seqStart; 1002a0483764SConrad Meyer const BYTE* const iend = ip + seqSize; 1003a0483764SConrad Meyer BYTE* const ostart = (BYTE* const)dst; 1004a0483764SConrad Meyer BYTE* const oend = ostart + maxDstSize; 1005a0483764SConrad Meyer BYTE* op = ostart; 1006a0483764SConrad Meyer const BYTE* litPtr = dctx->litPtr; 1007a0483764SConrad Meyer const BYTE* const litEnd = litPtr + dctx->litSize; 1008a0483764SConrad Meyer const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); 1009a0483764SConrad Meyer const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); 1010a0483764SConrad Meyer const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); 1011a0483764SConrad Meyer DEBUGLOG(5, "ZSTD_decompressSequences_body"); 1012*37f1f268SConrad Meyer (void)frame; 1013a0483764SConrad Meyer 1014a0483764SConrad Meyer /* Regen sequences */ 1015a0483764SConrad Meyer if (nbSeq) { 1016a0483764SConrad Meyer seqState_t seqState; 1017*37f1f268SConrad Meyer size_t error = 0; 1018a0483764SConrad Meyer dctx->fseEntropy = 1; 1019a0483764SConrad Meyer { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; } 10202b9c00cbSConrad Meyer RETURN_ERROR_IF( 10212b9c00cbSConrad Meyer ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), 1022*37f1f268SConrad Meyer corruption_detected, ""); 1023a0483764SConrad Meyer ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); 1024a0483764SConrad Meyer ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); 1025a0483764SConrad Meyer ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); 1026*37f1f268SConrad Meyer assert(dst != NULL); 1027a0483764SConrad Meyer 10284d3f1eafSConrad Meyer ZSTD_STATIC_ASSERT( 10294d3f1eafSConrad Meyer BIT_DStream_unfinished < BIT_DStream_completed && 10304d3f1eafSConrad Meyer BIT_DStream_endOfBuffer < BIT_DStream_completed && 10314d3f1eafSConrad Meyer BIT_DStream_completed < BIT_DStream_overflow); 10324d3f1eafSConrad Meyer 1033*37f1f268SConrad Meyer #if defined(__GNUC__) && defined(__x86_64__) 1034*37f1f268SConrad Meyer /* Align the decompression loop to 32 + 16 bytes. 1035*37f1f268SConrad Meyer * 1036*37f1f268SConrad Meyer * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression 1037*37f1f268SConrad Meyer * speed swings based on the alignment of the decompression loop. This 1038*37f1f268SConrad Meyer * performance swing is caused by parts of the decompression loop falling 1039*37f1f268SConrad Meyer * out of the DSB. The entire decompression loop should fit in the DSB, 1040*37f1f268SConrad Meyer * when it can't we get much worse performance. You can measure if you've 1041*37f1f268SConrad Meyer * hit the good case or the bad case with this perf command for some 1042*37f1f268SConrad Meyer * compressed file test.zst: 1043*37f1f268SConrad Meyer * 1044*37f1f268SConrad Meyer * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \ 1045*37f1f268SConrad Meyer * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst 1046*37f1f268SConrad Meyer * 1047*37f1f268SConrad Meyer * If you see most cycles served out of the MITE you've hit the bad case. 1048*37f1f268SConrad Meyer * If you see most cycles served out of the DSB you've hit the good case. 1049*37f1f268SConrad Meyer * If it is pretty even then you may be in an okay case. 1050*37f1f268SConrad Meyer * 1051*37f1f268SConrad Meyer * I've been able to reproduce this issue on the following CPUs: 1052*37f1f268SConrad Meyer * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 1053*37f1f268SConrad Meyer * Use Instruments->Counters to get DSB/MITE cycles. 1054*37f1f268SConrad Meyer * I never got performance swings, but I was able to 1055*37f1f268SConrad Meyer * go from the good case of mostly DSB to half of the 1056*37f1f268SConrad Meyer * cycles served from MITE. 1057*37f1f268SConrad Meyer * - Coffeelake: Intel i9-9900k 1058*37f1f268SConrad Meyer * 1059*37f1f268SConrad Meyer * I haven't been able to reproduce the instability or DSB misses on any 1060*37f1f268SConrad Meyer * of the following CPUS: 1061*37f1f268SConrad Meyer * - Haswell 1062*37f1f268SConrad Meyer * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH 1063*37f1f268SConrad Meyer * - Skylake 1064*37f1f268SConrad Meyer * 1065*37f1f268SConrad Meyer * If you are seeing performance stability this script can help test. 1066*37f1f268SConrad Meyer * It tests on 4 commits in zstd where I saw performance change. 1067*37f1f268SConrad Meyer * 1068*37f1f268SConrad Meyer * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 1069*37f1f268SConrad Meyer */ 1070*37f1f268SConrad Meyer __asm__(".p2align 5"); 1071*37f1f268SConrad Meyer __asm__("nop"); 1072*37f1f268SConrad Meyer __asm__(".p2align 4"); 1073*37f1f268SConrad Meyer #endif 1074*37f1f268SConrad Meyer for ( ; ; ) { 1075*37f1f268SConrad Meyer seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch); 1076a0483764SConrad Meyer size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); 1077*37f1f268SConrad Meyer #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) 1078*37f1f268SConrad Meyer assert(!ZSTD_isError(oneSeqSize)); 1079*37f1f268SConrad Meyer if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); 1080*37f1f268SConrad Meyer #endif 1081a0483764SConrad Meyer DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); 1082*37f1f268SConrad Meyer BIT_reloadDStream(&(seqState.DStream)); 1083*37f1f268SConrad Meyer /* gcc and clang both don't like early returns in this loop. 1084*37f1f268SConrad Meyer * gcc doesn't like early breaks either. 1085*37f1f268SConrad Meyer * Instead save an error and report it at the end. 1086*37f1f268SConrad Meyer * When there is an error, don't increment op, so we don't 1087*37f1f268SConrad Meyer * overwrite. 1088*37f1f268SConrad Meyer */ 1089*37f1f268SConrad Meyer if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize; 1090*37f1f268SConrad Meyer else op += oneSeqSize; 1091*37f1f268SConrad Meyer if (UNLIKELY(!--nbSeq)) break; 1092*37f1f268SConrad Meyer } 1093a0483764SConrad Meyer 1094a0483764SConrad Meyer /* check if reached exact end */ 1095a0483764SConrad Meyer DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); 1096*37f1f268SConrad Meyer if (ZSTD_isError(error)) return error; 1097*37f1f268SConrad Meyer RETURN_ERROR_IF(nbSeq, corruption_detected, ""); 1098*37f1f268SConrad Meyer RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); 1099a0483764SConrad Meyer /* save reps for next block */ 1100a0483764SConrad Meyer { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); } 1101a0483764SConrad Meyer } 1102a0483764SConrad Meyer 1103a0483764SConrad Meyer /* last literal segment */ 1104a0483764SConrad Meyer { size_t const lastLLSize = litEnd - litPtr; 1105*37f1f268SConrad Meyer RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); 1106*37f1f268SConrad Meyer if (op != NULL) { 1107a0483764SConrad Meyer memcpy(op, litPtr, lastLLSize); 1108a0483764SConrad Meyer op += lastLLSize; 1109a0483764SConrad Meyer } 1110*37f1f268SConrad Meyer } 1111a0483764SConrad Meyer 1112a0483764SConrad Meyer return op-ostart; 1113a0483764SConrad Meyer } 1114a0483764SConrad Meyer 1115a0483764SConrad Meyer static size_t 1116a0483764SConrad Meyer ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, 1117a0483764SConrad Meyer void* dst, size_t maxDstSize, 1118a0483764SConrad Meyer const void* seqStart, size_t seqSize, int nbSeq, 1119*37f1f268SConrad Meyer const ZSTD_longOffset_e isLongOffset, 1120*37f1f268SConrad Meyer const int frame) 1121a0483764SConrad Meyer { 1122*37f1f268SConrad Meyer return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1123a0483764SConrad Meyer } 1124a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ 1125a0483764SConrad Meyer 1126a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT 1127a0483764SConrad Meyer FORCE_INLINE_TEMPLATE size_t 1128a0483764SConrad Meyer ZSTD_decompressSequencesLong_body( 1129a0483764SConrad Meyer ZSTD_DCtx* dctx, 1130a0483764SConrad Meyer void* dst, size_t maxDstSize, 1131a0483764SConrad Meyer const void* seqStart, size_t seqSize, int nbSeq, 1132*37f1f268SConrad Meyer const ZSTD_longOffset_e isLongOffset, 1133*37f1f268SConrad Meyer const int frame) 1134a0483764SConrad Meyer { 1135a0483764SConrad Meyer const BYTE* ip = (const BYTE*)seqStart; 1136a0483764SConrad Meyer const BYTE* const iend = ip + seqSize; 1137a0483764SConrad Meyer BYTE* const ostart = (BYTE* const)dst; 1138a0483764SConrad Meyer BYTE* const oend = ostart + maxDstSize; 1139a0483764SConrad Meyer BYTE* op = ostart; 1140a0483764SConrad Meyer const BYTE* litPtr = dctx->litPtr; 1141a0483764SConrad Meyer const BYTE* const litEnd = litPtr + dctx->litSize; 1142a0483764SConrad Meyer const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); 1143a0483764SConrad Meyer const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); 1144a0483764SConrad Meyer const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); 1145*37f1f268SConrad Meyer (void)frame; 1146a0483764SConrad Meyer 1147a0483764SConrad Meyer /* Regen sequences */ 1148a0483764SConrad Meyer if (nbSeq) { 1149a0483764SConrad Meyer #define STORED_SEQS 4 1150a0483764SConrad Meyer #define STORED_SEQS_MASK (STORED_SEQS-1) 1151a0483764SConrad Meyer #define ADVANCED_SEQS 4 1152a0483764SConrad Meyer seq_t sequences[STORED_SEQS]; 1153a0483764SConrad Meyer int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); 1154a0483764SConrad Meyer seqState_t seqState; 1155a0483764SConrad Meyer int seqNb; 1156a0483764SConrad Meyer dctx->fseEntropy = 1; 1157a0483764SConrad Meyer { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; } 1158a0483764SConrad Meyer seqState.prefixStart = prefixStart; 1159a0483764SConrad Meyer seqState.pos = (size_t)(op-prefixStart); 1160a0483764SConrad Meyer seqState.dictEnd = dictEnd; 1161*37f1f268SConrad Meyer assert(dst != NULL); 1162a0483764SConrad Meyer assert(iend >= ip); 11632b9c00cbSConrad Meyer RETURN_ERROR_IF( 11642b9c00cbSConrad Meyer ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), 1165*37f1f268SConrad Meyer corruption_detected, ""); 1166a0483764SConrad Meyer ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); 1167a0483764SConrad Meyer ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); 1168a0483764SConrad Meyer ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); 1169a0483764SConrad Meyer 1170a0483764SConrad Meyer /* prepare in advance */ 1171a0483764SConrad Meyer for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) { 1172*37f1f268SConrad Meyer sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch); 1173a0483764SConrad Meyer PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ 1174a0483764SConrad Meyer } 1175*37f1f268SConrad Meyer RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, ""); 1176a0483764SConrad Meyer 1177a0483764SConrad Meyer /* decode and decompress */ 1178a0483764SConrad Meyer for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) { 1179*37f1f268SConrad Meyer seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch); 11809cbefe25SConrad Meyer size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd); 1181*37f1f268SConrad Meyer #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) 1182*37f1f268SConrad Meyer assert(!ZSTD_isError(oneSeqSize)); 1183*37f1f268SConrad Meyer if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); 1184*37f1f268SConrad Meyer #endif 1185a0483764SConrad Meyer if (ZSTD_isError(oneSeqSize)) return oneSeqSize; 1186a0483764SConrad Meyer PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ 1187a0483764SConrad Meyer sequences[seqNb & STORED_SEQS_MASK] = sequence; 1188a0483764SConrad Meyer op += oneSeqSize; 1189a0483764SConrad Meyer } 1190*37f1f268SConrad Meyer RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, ""); 1191a0483764SConrad Meyer 1192a0483764SConrad Meyer /* finish queue */ 1193a0483764SConrad Meyer seqNb -= seqAdvance; 1194a0483764SConrad Meyer for ( ; seqNb<nbSeq ; seqNb++) { 11959cbefe25SConrad Meyer size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd); 1196*37f1f268SConrad Meyer #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) 1197*37f1f268SConrad Meyer assert(!ZSTD_isError(oneSeqSize)); 1198*37f1f268SConrad Meyer if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); 1199*37f1f268SConrad Meyer #endif 1200a0483764SConrad Meyer if (ZSTD_isError(oneSeqSize)) return oneSeqSize; 1201a0483764SConrad Meyer op += oneSeqSize; 1202a0483764SConrad Meyer } 1203a0483764SConrad Meyer 1204a0483764SConrad Meyer /* save reps for next block */ 1205a0483764SConrad Meyer { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); } 1206a0483764SConrad Meyer } 1207a0483764SConrad Meyer 1208a0483764SConrad Meyer /* last literal segment */ 1209a0483764SConrad Meyer { size_t const lastLLSize = litEnd - litPtr; 1210*37f1f268SConrad Meyer RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); 1211*37f1f268SConrad Meyer if (op != NULL) { 1212a0483764SConrad Meyer memcpy(op, litPtr, lastLLSize); 1213a0483764SConrad Meyer op += lastLLSize; 1214a0483764SConrad Meyer } 1215*37f1f268SConrad Meyer } 1216a0483764SConrad Meyer 1217a0483764SConrad Meyer return op-ostart; 1218a0483764SConrad Meyer } 1219a0483764SConrad Meyer 1220a0483764SConrad Meyer static size_t 1221a0483764SConrad Meyer ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, 1222a0483764SConrad Meyer void* dst, size_t maxDstSize, 1223a0483764SConrad Meyer const void* seqStart, size_t seqSize, int nbSeq, 1224*37f1f268SConrad Meyer const ZSTD_longOffset_e isLongOffset, 1225*37f1f268SConrad Meyer const int frame) 1226a0483764SConrad Meyer { 1227*37f1f268SConrad Meyer return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1228a0483764SConrad Meyer } 1229a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ 1230a0483764SConrad Meyer 1231a0483764SConrad Meyer 1232a0483764SConrad Meyer 1233a0483764SConrad Meyer #if DYNAMIC_BMI2 1234a0483764SConrad Meyer 1235a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG 1236a0483764SConrad Meyer static TARGET_ATTRIBUTE("bmi2") size_t 12374d3f1eafSConrad Meyer DONT_VECTORIZE 1238a0483764SConrad Meyer ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, 1239a0483764SConrad Meyer void* dst, size_t maxDstSize, 1240a0483764SConrad Meyer const void* seqStart, size_t seqSize, int nbSeq, 1241*37f1f268SConrad Meyer const ZSTD_longOffset_e isLongOffset, 1242*37f1f268SConrad Meyer const int frame) 1243a0483764SConrad Meyer { 1244*37f1f268SConrad Meyer return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1245a0483764SConrad Meyer } 1246a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ 1247a0483764SConrad Meyer 1248a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT 1249a0483764SConrad Meyer static TARGET_ATTRIBUTE("bmi2") size_t 1250a0483764SConrad Meyer ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, 1251a0483764SConrad Meyer void* dst, size_t maxDstSize, 1252a0483764SConrad Meyer const void* seqStart, size_t seqSize, int nbSeq, 1253*37f1f268SConrad Meyer const ZSTD_longOffset_e isLongOffset, 1254*37f1f268SConrad Meyer const int frame) 1255a0483764SConrad Meyer { 1256*37f1f268SConrad Meyer return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1257a0483764SConrad Meyer } 1258a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ 1259a0483764SConrad Meyer 1260a0483764SConrad Meyer #endif /* DYNAMIC_BMI2 */ 1261a0483764SConrad Meyer 1262a0483764SConrad Meyer typedef size_t (*ZSTD_decompressSequences_t)( 1263a0483764SConrad Meyer ZSTD_DCtx* dctx, 1264a0483764SConrad Meyer void* dst, size_t maxDstSize, 1265a0483764SConrad Meyer const void* seqStart, size_t seqSize, int nbSeq, 1266*37f1f268SConrad Meyer const ZSTD_longOffset_e isLongOffset, 1267*37f1f268SConrad Meyer const int frame); 1268a0483764SConrad Meyer 1269a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG 1270a0483764SConrad Meyer static size_t 1271a0483764SConrad Meyer ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, 1272a0483764SConrad Meyer const void* seqStart, size_t seqSize, int nbSeq, 1273*37f1f268SConrad Meyer const ZSTD_longOffset_e isLongOffset, 1274*37f1f268SConrad Meyer const int frame) 1275a0483764SConrad Meyer { 1276a0483764SConrad Meyer DEBUGLOG(5, "ZSTD_decompressSequences"); 1277a0483764SConrad Meyer #if DYNAMIC_BMI2 1278a0483764SConrad Meyer if (dctx->bmi2) { 1279*37f1f268SConrad Meyer return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1280a0483764SConrad Meyer } 1281a0483764SConrad Meyer #endif 1282*37f1f268SConrad Meyer return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1283a0483764SConrad Meyer } 1284a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ 1285a0483764SConrad Meyer 1286a0483764SConrad Meyer 1287a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT 1288a0483764SConrad Meyer /* ZSTD_decompressSequencesLong() : 1289a0483764SConrad Meyer * decompression function triggered when a minimum share of offsets is considered "long", 1290a0483764SConrad Meyer * aka out of cache. 12912b9c00cbSConrad Meyer * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance". 1292a0483764SConrad Meyer * This function will try to mitigate main memory latency through the use of prefetching */ 1293a0483764SConrad Meyer static size_t 1294a0483764SConrad Meyer ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, 1295a0483764SConrad Meyer void* dst, size_t maxDstSize, 1296a0483764SConrad Meyer const void* seqStart, size_t seqSize, int nbSeq, 1297*37f1f268SConrad Meyer const ZSTD_longOffset_e isLongOffset, 1298*37f1f268SConrad Meyer const int frame) 1299a0483764SConrad Meyer { 1300a0483764SConrad Meyer DEBUGLOG(5, "ZSTD_decompressSequencesLong"); 1301a0483764SConrad Meyer #if DYNAMIC_BMI2 1302a0483764SConrad Meyer if (dctx->bmi2) { 1303*37f1f268SConrad Meyer return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1304a0483764SConrad Meyer } 1305a0483764SConrad Meyer #endif 1306*37f1f268SConrad Meyer return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1307a0483764SConrad Meyer } 1308a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ 1309a0483764SConrad Meyer 1310a0483764SConrad Meyer 1311a0483764SConrad Meyer 1312a0483764SConrad Meyer #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ 1313a0483764SConrad Meyer !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) 1314a0483764SConrad Meyer /* ZSTD_getLongOffsetsShare() : 1315a0483764SConrad Meyer * condition : offTable must be valid 1316a0483764SConrad Meyer * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) 1317a0483764SConrad Meyer * compared to maximum possible of (1<<OffFSELog) */ 1318a0483764SConrad Meyer static unsigned 1319a0483764SConrad Meyer ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable) 1320a0483764SConrad Meyer { 1321a0483764SConrad Meyer const void* ptr = offTable; 1322a0483764SConrad Meyer U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog; 1323a0483764SConrad Meyer const ZSTD_seqSymbol* table = offTable + 1; 1324a0483764SConrad Meyer U32 const max = 1 << tableLog; 1325a0483764SConrad Meyer U32 u, total = 0; 1326a0483764SConrad Meyer DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog); 1327a0483764SConrad Meyer 1328a0483764SConrad Meyer assert(max <= (1 << OffFSELog)); /* max not too large */ 1329a0483764SConrad Meyer for (u=0; u<max; u++) { 1330a0483764SConrad Meyer if (table[u].nbAdditionalBits > 22) total += 1; 1331a0483764SConrad Meyer } 1332a0483764SConrad Meyer 1333a0483764SConrad Meyer assert(tableLog <= OffFSELog); 1334a0483764SConrad Meyer total <<= (OffFSELog - tableLog); /* scale to OffFSELog */ 1335a0483764SConrad Meyer 1336a0483764SConrad Meyer return total; 1337a0483764SConrad Meyer } 1338a0483764SConrad Meyer #endif 1339a0483764SConrad Meyer 1340a0483764SConrad Meyer size_t 1341a0483764SConrad Meyer ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, 1342a0483764SConrad Meyer void* dst, size_t dstCapacity, 1343a0483764SConrad Meyer const void* src, size_t srcSize, const int frame) 1344a0483764SConrad Meyer { /* blockType == blockCompressed */ 1345a0483764SConrad Meyer const BYTE* ip = (const BYTE*)src; 1346a0483764SConrad Meyer /* isLongOffset must be true if there are long offsets. 1347a0483764SConrad Meyer * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. 1348a0483764SConrad Meyer * We don't expect that to be the case in 64-bit mode. 1349a0483764SConrad Meyer * In block mode, window size is not known, so we have to be conservative. 1350a0483764SConrad Meyer * (note: but it could be evaluated from current-lowLimit) 1351a0483764SConrad Meyer */ 1352a0483764SConrad Meyer ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); 1353a0483764SConrad Meyer DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); 1354a0483764SConrad Meyer 1355*37f1f268SConrad Meyer RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); 1356a0483764SConrad Meyer 1357a0483764SConrad Meyer /* Decode literals section */ 1358a0483764SConrad Meyer { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); 1359a0483764SConrad Meyer DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); 1360a0483764SConrad Meyer if (ZSTD_isError(litCSize)) return litCSize; 1361a0483764SConrad Meyer ip += litCSize; 1362a0483764SConrad Meyer srcSize -= litCSize; 1363a0483764SConrad Meyer } 1364a0483764SConrad Meyer 1365a0483764SConrad Meyer /* Build Decoding Tables */ 1366a0483764SConrad Meyer { 1367a0483764SConrad Meyer /* These macros control at build-time which decompressor implementation 1368a0483764SConrad Meyer * we use. If neither is defined, we do some inspection and dispatch at 1369a0483764SConrad Meyer * runtime. 1370a0483764SConrad Meyer */ 1371a0483764SConrad Meyer #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ 1372a0483764SConrad Meyer !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) 1373a0483764SConrad Meyer int usePrefetchDecoder = dctx->ddictIsCold; 1374a0483764SConrad Meyer #endif 1375a0483764SConrad Meyer int nbSeq; 1376a0483764SConrad Meyer size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); 1377a0483764SConrad Meyer if (ZSTD_isError(seqHSize)) return seqHSize; 1378a0483764SConrad Meyer ip += seqHSize; 1379a0483764SConrad Meyer srcSize -= seqHSize; 1380a0483764SConrad Meyer 1381*37f1f268SConrad Meyer RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); 1382*37f1f268SConrad Meyer 1383a0483764SConrad Meyer #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ 1384a0483764SConrad Meyer !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) 1385a0483764SConrad Meyer if ( !usePrefetchDecoder 1386a0483764SConrad Meyer && (!frame || (dctx->fParams.windowSize > (1<<24))) 1387a0483764SConrad Meyer && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ 1388a0483764SConrad Meyer U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); 1389a0483764SConrad Meyer U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ 1390a0483764SConrad Meyer usePrefetchDecoder = (shareLongOffsets >= minShare); 1391a0483764SConrad Meyer } 1392a0483764SConrad Meyer #endif 1393a0483764SConrad Meyer 1394a0483764SConrad Meyer dctx->ddictIsCold = 0; 1395a0483764SConrad Meyer 1396a0483764SConrad Meyer #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ 1397a0483764SConrad Meyer !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) 1398a0483764SConrad Meyer if (usePrefetchDecoder) 1399a0483764SConrad Meyer #endif 1400a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT 1401*37f1f268SConrad Meyer return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); 1402a0483764SConrad Meyer #endif 1403a0483764SConrad Meyer 1404a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG 1405a0483764SConrad Meyer /* else */ 1406*37f1f268SConrad Meyer return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); 1407a0483764SConrad Meyer #endif 1408a0483764SConrad Meyer } 1409a0483764SConrad Meyer } 1410a0483764SConrad Meyer 1411a0483764SConrad Meyer 1412*37f1f268SConrad Meyer void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) 1413*37f1f268SConrad Meyer { 1414*37f1f268SConrad Meyer if (dst != dctx->previousDstEnd) { /* not contiguous */ 1415*37f1f268SConrad Meyer dctx->dictEnd = dctx->previousDstEnd; 1416*37f1f268SConrad Meyer dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); 1417*37f1f268SConrad Meyer dctx->prefixStart = dst; 1418*37f1f268SConrad Meyer dctx->previousDstEnd = dst; 1419*37f1f268SConrad Meyer } 1420*37f1f268SConrad Meyer } 1421*37f1f268SConrad Meyer 1422*37f1f268SConrad Meyer 1423a0483764SConrad Meyer size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, 1424a0483764SConrad Meyer void* dst, size_t dstCapacity, 1425a0483764SConrad Meyer const void* src, size_t srcSize) 1426a0483764SConrad Meyer { 1427a0483764SConrad Meyer size_t dSize; 1428a0483764SConrad Meyer ZSTD_checkContinuity(dctx, dst); 1429a0483764SConrad Meyer dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0); 1430a0483764SConrad Meyer dctx->previousDstEnd = (char*)dst + dSize; 1431a0483764SConrad Meyer return dSize; 1432a0483764SConrad Meyer } 1433