1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 /*-************************************* 12 * Dependencies 13 ***************************************/ 14 #include "zstd_compress_literals.h" 15 16 17 /* ************************************************************** 18 * Debug Traces 19 ****************************************************************/ 20 #if DEBUGLEVEL >= 2 21 22 static size_t showHexa(const void* src, size_t srcSize) 23 { 24 const BYTE* const ip = (const BYTE*)src; 25 size_t u; 26 for (u=0; u<srcSize; u++) { 27 RAWLOG(5, " %02X", ip[u]); (void)ip; 28 } 29 RAWLOG(5, " \n"); 30 return srcSize; 31 } 32 33 #endif 34 35 36 /* ************************************************************** 37 * Literals compression - special cases 38 ****************************************************************/ 39 size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) 40 { 41 BYTE* const ostart = (BYTE*)dst; 42 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); 43 44 DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity); 45 46 RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); 47 48 switch(flSize) 49 { 50 case 1: /* 2 - 1 - 5 */ 51 ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); 52 break; 53 case 2: /* 2 - 2 - 12 */ 54 MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); 55 break; 56 case 3: /* 2 - 2 - 20 */ 57 MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); 58 break; 59 default: /* not necessary : flSize is {1,2,3} */ 60 assert(0); 61 } 62 63 ZSTD_memcpy(ostart + flSize, src, srcSize); 64 DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); 65 return srcSize + flSize; 66 } 67 68 static int allBytesIdentical(const void* src, size_t srcSize) 69 { 70 assert(srcSize >= 1); 71 assert(src != NULL); 72 { const BYTE b = ((const BYTE*)src)[0]; 73 size_t p; 74 for (p=1; p<srcSize; p++) { 75 if (((const BYTE*)src)[p] != b) return 0; 76 } 77 return 1; 78 } 79 } 80 81 size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) 82 { 83 BYTE* const ostart = (BYTE*)dst; 84 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); 85 86 assert(dstCapacity >= 4); (void)dstCapacity; 87 assert(allBytesIdentical(src, srcSize)); 88 89 switch(flSize) 90 { 91 case 1: /* 2 - 1 - 5 */ 92 ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); 93 break; 94 case 2: /* 2 - 2 - 12 */ 95 MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); 96 break; 97 case 3: /* 2 - 2 - 20 */ 98 MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); 99 break; 100 default: /* not necessary : flSize is {1,2,3} */ 101 assert(0); 102 } 103 104 ostart[flSize] = *(const BYTE*)src; 105 DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1); 106 return flSize+1; 107 } 108 109 /* ZSTD_minLiteralsToCompress() : 110 * returns minimal amount of literals 111 * for literal compression to even be attempted. 112 * Minimum is made tighter as compression strategy increases. 113 */ 114 static size_t 115 ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat) 116 { 117 assert((int)strategy >= 0); 118 assert((int)strategy <= 9); 119 /* btultra2 : min 8 bytes; 120 * then 2x larger for each successive compression strategy 121 * max threshold 64 bytes */ 122 { int const shift = MIN(9-(int)strategy, 3); 123 size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift; 124 DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc); 125 return mintc; 126 } 127 } 128 129 size_t ZSTD_compressLiterals ( 130 void* dst, size_t dstCapacity, 131 const void* src, size_t srcSize, 132 void* entropyWorkspace, size_t entropyWorkspaceSize, 133 const ZSTD_hufCTables_t* prevHuf, 134 ZSTD_hufCTables_t* nextHuf, 135 ZSTD_strategy strategy, 136 int disableLiteralCompression, 137 int suspectUncompressible, 138 int bmi2) 139 { 140 size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); 141 BYTE* const ostart = (BYTE*)dst; 142 U32 singleStream = srcSize < 256; 143 symbolEncodingType_e hType = set_compressed; 144 size_t cLitSize; 145 146 DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)", 147 disableLiteralCompression, (U32)srcSize, dstCapacity); 148 149 DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize)); 150 151 /* Prepare nextEntropy assuming reusing the existing table */ 152 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); 153 154 if (disableLiteralCompression) 155 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); 156 157 /* if too small, don't even attempt compression (speed opt) */ 158 if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode)) 159 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); 160 161 RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); 162 { HUF_repeat repeat = prevHuf->repeatMode; 163 int const flags = 0 164 | (bmi2 ? HUF_flags_bmi2 : 0) 165 | (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0) 166 | (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0) 167 | (suspectUncompressible ? HUF_flags_suspectUncompressible : 0); 168 169 typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int); 170 huf_compress_f huf_compress; 171 if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; 172 huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat; 173 cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize, 174 src, srcSize, 175 HUF_SYMBOLVALUE_MAX, LitHufLog, 176 entropyWorkspace, entropyWorkspaceSize, 177 (HUF_CElt*)nextHuf->CTable, 178 &repeat, flags); 179 DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize); 180 if (repeat != HUF_repeat_none) { 181 /* reused the existing table */ 182 DEBUGLOG(5, "reusing statistics from previous huffman block"); 183 hType = set_repeat; 184 } 185 } 186 187 { size_t const minGain = ZSTD_minGain(srcSize, strategy); 188 if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { 189 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); 190 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); 191 } } 192 if (cLitSize==1) { 193 /* A return value of 1 signals that the alphabet consists of a single symbol. 194 * However, in some rare circumstances, it could be the compressed size (a single byte). 195 * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`. 196 * (it's also necessary to not generate statistics). 197 * Therefore, in such a case, actively check that all bytes are identical. */ 198 if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) { 199 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); 200 return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); 201 } } 202 203 if (hType == set_compressed) { 204 /* using a newly constructed table */ 205 nextHuf->repeatMode = HUF_repeat_check; 206 } 207 208 /* Build header */ 209 switch(lhSize) 210 { 211 case 3: /* 2 - 2 - 10 - 10 */ 212 if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); 213 { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); 214 MEM_writeLE24(ostart, lhc); 215 break; 216 } 217 case 4: /* 2 - 2 - 14 - 14 */ 218 assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); 219 { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); 220 MEM_writeLE32(ostart, lhc); 221 break; 222 } 223 case 5: /* 2 - 2 - 18 - 18 */ 224 assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); 225 { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); 226 MEM_writeLE32(ostart, lhc); 227 ostart[4] = (BYTE)(cLitSize >> 10); 228 break; 229 } 230 default: /* not possible : lhSize is {3,4,5} */ 231 assert(0); 232 } 233 DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize)); 234 return lhSize+cLitSize; 235 } 236