14d3f1eafSConrad Meyer /*
2*5ff13fbcSAllan Jude * Copyright (c) Yann Collet, Facebook, Inc.
34d3f1eafSConrad Meyer * All rights reserved.
44d3f1eafSConrad Meyer *
54d3f1eafSConrad Meyer * This source code is licensed under both the BSD-style license (found in the
64d3f1eafSConrad Meyer * LICENSE file in the root directory of this source tree) and the GPLv2 (found
74d3f1eafSConrad Meyer * in the COPYING file in the root directory of this source tree).
84d3f1eafSConrad Meyer * You may select, at your option, one of the above-listed licenses.
94d3f1eafSConrad Meyer */
104d3f1eafSConrad Meyer
114d3f1eafSConrad Meyer /*-*************************************
124d3f1eafSConrad Meyer * Dependencies
134d3f1eafSConrad Meyer ***************************************/
144d3f1eafSConrad Meyer #include "zstd_compress_sequences.h"
154d3f1eafSConrad Meyer
164d3f1eafSConrad Meyer /**
174d3f1eafSConrad Meyer * -log2(x / 256) lookup table for x in [0, 256).
184d3f1eafSConrad Meyer * If x == 0: Return 0
194d3f1eafSConrad Meyer * Else: Return floor(-log2(x / 256) * 256)
204d3f1eafSConrad Meyer */
214d3f1eafSConrad Meyer static unsigned const kInverseProbabilityLog256[256] = {
224d3f1eafSConrad Meyer 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
234d3f1eafSConrad Meyer 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
244d3f1eafSConrad Meyer 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
254d3f1eafSConrad Meyer 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
264d3f1eafSConrad Meyer 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
274d3f1eafSConrad Meyer 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
284d3f1eafSConrad Meyer 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
294d3f1eafSConrad Meyer 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
304d3f1eafSConrad Meyer 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
314d3f1eafSConrad Meyer 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
324d3f1eafSConrad Meyer 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
334d3f1eafSConrad Meyer 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
344d3f1eafSConrad Meyer 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
354d3f1eafSConrad Meyer 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
364d3f1eafSConrad Meyer 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
374d3f1eafSConrad Meyer 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
384d3f1eafSConrad Meyer 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
394d3f1eafSConrad Meyer 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
404d3f1eafSConrad Meyer 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
414d3f1eafSConrad Meyer 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
424d3f1eafSConrad Meyer 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
434d3f1eafSConrad Meyer 5, 4, 2, 1,
444d3f1eafSConrad Meyer };
454d3f1eafSConrad Meyer
ZSTD_getFSEMaxSymbolValue(FSE_CTable const * ctable)464d3f1eafSConrad Meyer static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
474d3f1eafSConrad Meyer void const* ptr = ctable;
484d3f1eafSConrad Meyer U16 const* u16ptr = (U16 const*)ptr;
494d3f1eafSConrad Meyer U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
504d3f1eafSConrad Meyer return maxSymbolValue;
514d3f1eafSConrad Meyer }
524d3f1eafSConrad Meyer
534d3f1eafSConrad Meyer /**
54f7cd7fe5SConrad Meyer * Returns true if we should use ncount=-1 else we should
55f7cd7fe5SConrad Meyer * use ncount=1 for low probability symbols instead.
56f7cd7fe5SConrad Meyer */
ZSTD_useLowProbCount(size_t const nbSeq)57f7cd7fe5SConrad Meyer static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
58f7cd7fe5SConrad Meyer {
59f7cd7fe5SConrad Meyer /* Heuristic: This should cover most blocks <= 16K and
60f7cd7fe5SConrad Meyer * start to fade out after 16K to about 32K depending on
61f7cd7fe5SConrad Meyer * comprssibility.
62f7cd7fe5SConrad Meyer */
63f7cd7fe5SConrad Meyer return nbSeq >= 2048;
64f7cd7fe5SConrad Meyer }
65f7cd7fe5SConrad Meyer
66f7cd7fe5SConrad Meyer /**
674d3f1eafSConrad Meyer * Returns the cost in bytes of encoding the normalized count header.
684d3f1eafSConrad Meyer * Returns an error if any of the helper functions return an error.
694d3f1eafSConrad Meyer */
ZSTD_NCountCost(unsigned const * count,unsigned const max,size_t const nbSeq,unsigned const FSELog)704d3f1eafSConrad Meyer static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
714d3f1eafSConrad Meyer size_t const nbSeq, unsigned const FSELog)
724d3f1eafSConrad Meyer {
734d3f1eafSConrad Meyer BYTE wksp[FSE_NCOUNTBOUND];
744d3f1eafSConrad Meyer S16 norm[MaxSeq + 1];
754d3f1eafSConrad Meyer const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
76f7cd7fe5SConrad Meyer FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), "");
774d3f1eafSConrad Meyer return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
784d3f1eafSConrad Meyer }
794d3f1eafSConrad Meyer
804d3f1eafSConrad Meyer /**
814d3f1eafSConrad Meyer * Returns the cost in bits of encoding the distribution described by count
824d3f1eafSConrad Meyer * using the entropy bound.
834d3f1eafSConrad Meyer */
ZSTD_entropyCost(unsigned const * count,unsigned const max,size_t const total)844d3f1eafSConrad Meyer static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
854d3f1eafSConrad Meyer {
864d3f1eafSConrad Meyer unsigned cost = 0;
874d3f1eafSConrad Meyer unsigned s;
88*5ff13fbcSAllan Jude
89*5ff13fbcSAllan Jude assert(total > 0);
904d3f1eafSConrad Meyer for (s = 0; s <= max; ++s) {
914d3f1eafSConrad Meyer unsigned norm = (unsigned)((256 * count[s]) / total);
924d3f1eafSConrad Meyer if (count[s] != 0 && norm == 0)
934d3f1eafSConrad Meyer norm = 1;
944d3f1eafSConrad Meyer assert(count[s] < total);
954d3f1eafSConrad Meyer cost += count[s] * kInverseProbabilityLog256[norm];
964d3f1eafSConrad Meyer }
974d3f1eafSConrad Meyer return cost >> 8;
984d3f1eafSConrad Meyer }
994d3f1eafSConrad Meyer
1004d3f1eafSConrad Meyer /**
1014d3f1eafSConrad Meyer * Returns the cost in bits of encoding the distribution in count using ctable.
1024d3f1eafSConrad Meyer * Returns an error if ctable cannot represent all the symbols in count.
1034d3f1eafSConrad Meyer */
ZSTD_fseBitCost(FSE_CTable const * ctable,unsigned const * count,unsigned const max)10437f1f268SConrad Meyer size_t ZSTD_fseBitCost(
1054d3f1eafSConrad Meyer FSE_CTable const* ctable,
1064d3f1eafSConrad Meyer unsigned const* count,
1074d3f1eafSConrad Meyer unsigned const max)
1084d3f1eafSConrad Meyer {
1094d3f1eafSConrad Meyer unsigned const kAccuracyLog = 8;
1104d3f1eafSConrad Meyer size_t cost = 0;
1114d3f1eafSConrad Meyer unsigned s;
1124d3f1eafSConrad Meyer FSE_CState_t cstate;
1134d3f1eafSConrad Meyer FSE_initCState(&cstate, ctable);
11437f1f268SConrad Meyer if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
11537f1f268SConrad Meyer DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
1164d3f1eafSConrad Meyer ZSTD_getFSEMaxSymbolValue(ctable), max);
11737f1f268SConrad Meyer return ERROR(GENERIC);
11837f1f268SConrad Meyer }
1194d3f1eafSConrad Meyer for (s = 0; s <= max; ++s) {
1204d3f1eafSConrad Meyer unsigned const tableLog = cstate.stateLog;
1214d3f1eafSConrad Meyer unsigned const badCost = (tableLog + 1) << kAccuracyLog;
1224d3f1eafSConrad Meyer unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
1234d3f1eafSConrad Meyer if (count[s] == 0)
1244d3f1eafSConrad Meyer continue;
12537f1f268SConrad Meyer if (bitCost >= badCost) {
12637f1f268SConrad Meyer DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
12737f1f268SConrad Meyer return ERROR(GENERIC);
12837f1f268SConrad Meyer }
12937f1f268SConrad Meyer cost += (size_t)count[s] * bitCost;
1304d3f1eafSConrad Meyer }
1314d3f1eafSConrad Meyer return cost >> kAccuracyLog;
1324d3f1eafSConrad Meyer }
1334d3f1eafSConrad Meyer
1344d3f1eafSConrad Meyer /**
1354d3f1eafSConrad Meyer * Returns the cost in bits of encoding the distribution in count using the
1364d3f1eafSConrad Meyer * table described by norm. The max symbol support by norm is assumed >= max.
1374d3f1eafSConrad Meyer * norm must be valid for every symbol with non-zero probability in count.
1384d3f1eafSConrad Meyer */
ZSTD_crossEntropyCost(short const * norm,unsigned accuracyLog,unsigned const * count,unsigned const max)13937f1f268SConrad Meyer size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
1404d3f1eafSConrad Meyer unsigned const* count, unsigned const max)
1414d3f1eafSConrad Meyer {
1424d3f1eafSConrad Meyer unsigned const shift = 8 - accuracyLog;
1434d3f1eafSConrad Meyer size_t cost = 0;
1444d3f1eafSConrad Meyer unsigned s;
1454d3f1eafSConrad Meyer assert(accuracyLog <= 8);
1464d3f1eafSConrad Meyer for (s = 0; s <= max; ++s) {
14737f1f268SConrad Meyer unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1;
1484d3f1eafSConrad Meyer unsigned const norm256 = normAcc << shift;
1494d3f1eafSConrad Meyer assert(norm256 > 0);
1504d3f1eafSConrad Meyer assert(norm256 < 256);
1514d3f1eafSConrad Meyer cost += count[s] * kInverseProbabilityLog256[norm256];
1524d3f1eafSConrad Meyer }
1534d3f1eafSConrad Meyer return cost >> 8;
1544d3f1eafSConrad Meyer }
1554d3f1eafSConrad Meyer
1564d3f1eafSConrad Meyer symbolEncodingType_e
ZSTD_selectEncodingType(FSE_repeat * repeatMode,unsigned const * count,unsigned const max,size_t const mostFrequent,size_t nbSeq,unsigned const FSELog,FSE_CTable const * prevCTable,short const * defaultNorm,U32 defaultNormLog,ZSTD_defaultPolicy_e const isDefaultAllowed,ZSTD_strategy const strategy)1574d3f1eafSConrad Meyer ZSTD_selectEncodingType(
1584d3f1eafSConrad Meyer FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
1594d3f1eafSConrad Meyer size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
1604d3f1eafSConrad Meyer FSE_CTable const* prevCTable,
1614d3f1eafSConrad Meyer short const* defaultNorm, U32 defaultNormLog,
1624d3f1eafSConrad Meyer ZSTD_defaultPolicy_e const isDefaultAllowed,
1634d3f1eafSConrad Meyer ZSTD_strategy const strategy)
1644d3f1eafSConrad Meyer {
1654d3f1eafSConrad Meyer ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
1664d3f1eafSConrad Meyer if (mostFrequent == nbSeq) {
1674d3f1eafSConrad Meyer *repeatMode = FSE_repeat_none;
1684d3f1eafSConrad Meyer if (isDefaultAllowed && nbSeq <= 2) {
1694d3f1eafSConrad Meyer /* Prefer set_basic over set_rle when there are 2 or less symbols,
1704d3f1eafSConrad Meyer * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
1714d3f1eafSConrad Meyer * If basic encoding isn't possible, always choose RLE.
1724d3f1eafSConrad Meyer */
1734d3f1eafSConrad Meyer DEBUGLOG(5, "Selected set_basic");
1744d3f1eafSConrad Meyer return set_basic;
1754d3f1eafSConrad Meyer }
1764d3f1eafSConrad Meyer DEBUGLOG(5, "Selected set_rle");
1774d3f1eafSConrad Meyer return set_rle;
1784d3f1eafSConrad Meyer }
1794d3f1eafSConrad Meyer if (strategy < ZSTD_lazy) {
1804d3f1eafSConrad Meyer if (isDefaultAllowed) {
1814d3f1eafSConrad Meyer size_t const staticFse_nbSeq_max = 1000;
1824d3f1eafSConrad Meyer size_t const mult = 10 - strategy;
1834d3f1eafSConrad Meyer size_t const baseLog = 3;
1844d3f1eafSConrad Meyer size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */
1854d3f1eafSConrad Meyer assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */
1864d3f1eafSConrad Meyer assert(mult <= 9 && mult >= 7);
1874d3f1eafSConrad Meyer if ( (*repeatMode == FSE_repeat_valid)
1884d3f1eafSConrad Meyer && (nbSeq < staticFse_nbSeq_max) ) {
1894d3f1eafSConrad Meyer DEBUGLOG(5, "Selected set_repeat");
1904d3f1eafSConrad Meyer return set_repeat;
1914d3f1eafSConrad Meyer }
1924d3f1eafSConrad Meyer if ( (nbSeq < dynamicFse_nbSeq_min)
1934d3f1eafSConrad Meyer || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
1944d3f1eafSConrad Meyer DEBUGLOG(5, "Selected set_basic");
1954d3f1eafSConrad Meyer /* The format allows default tables to be repeated, but it isn't useful.
1964d3f1eafSConrad Meyer * When using simple heuristics to select encoding type, we don't want
1974d3f1eafSConrad Meyer * to confuse these tables with dictionaries. When running more careful
1984d3f1eafSConrad Meyer * analysis, we don't need to waste time checking both repeating tables
1994d3f1eafSConrad Meyer * and default tables.
2004d3f1eafSConrad Meyer */
2014d3f1eafSConrad Meyer *repeatMode = FSE_repeat_none;
2024d3f1eafSConrad Meyer return set_basic;
2034d3f1eafSConrad Meyer }
2044d3f1eafSConrad Meyer }
2054d3f1eafSConrad Meyer } else {
2064d3f1eafSConrad Meyer size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
2074d3f1eafSConrad Meyer size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
2084d3f1eafSConrad Meyer size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
2094d3f1eafSConrad Meyer size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
2104d3f1eafSConrad Meyer
2114d3f1eafSConrad Meyer if (isDefaultAllowed) {
2124d3f1eafSConrad Meyer assert(!ZSTD_isError(basicCost));
2134d3f1eafSConrad Meyer assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
2144d3f1eafSConrad Meyer }
2154d3f1eafSConrad Meyer assert(!ZSTD_isError(NCountCost));
2164d3f1eafSConrad Meyer assert(compressedCost < ERROR(maxCode));
2174d3f1eafSConrad Meyer DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
2184d3f1eafSConrad Meyer (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
2194d3f1eafSConrad Meyer if (basicCost <= repeatCost && basicCost <= compressedCost) {
2204d3f1eafSConrad Meyer DEBUGLOG(5, "Selected set_basic");
2214d3f1eafSConrad Meyer assert(isDefaultAllowed);
2224d3f1eafSConrad Meyer *repeatMode = FSE_repeat_none;
2234d3f1eafSConrad Meyer return set_basic;
2244d3f1eafSConrad Meyer }
2254d3f1eafSConrad Meyer if (repeatCost <= compressedCost) {
2264d3f1eafSConrad Meyer DEBUGLOG(5, "Selected set_repeat");
2274d3f1eafSConrad Meyer assert(!ZSTD_isError(repeatCost));
2284d3f1eafSConrad Meyer return set_repeat;
2294d3f1eafSConrad Meyer }
2304d3f1eafSConrad Meyer assert(compressedCost < basicCost && compressedCost < repeatCost);
2314d3f1eafSConrad Meyer }
2324d3f1eafSConrad Meyer DEBUGLOG(5, "Selected set_compressed");
2334d3f1eafSConrad Meyer *repeatMode = FSE_repeat_check;
2344d3f1eafSConrad Meyer return set_compressed;
2354d3f1eafSConrad Meyer }
2364d3f1eafSConrad Meyer
237*5ff13fbcSAllan Jude typedef struct {
238*5ff13fbcSAllan Jude S16 norm[MaxSeq + 1];
239*5ff13fbcSAllan Jude U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)];
240*5ff13fbcSAllan Jude } ZSTD_BuildCTableWksp;
241*5ff13fbcSAllan Jude
2424d3f1eafSConrad Meyer size_t
ZSTD_buildCTable(void * dst,size_t dstCapacity,FSE_CTable * nextCTable,U32 FSELog,symbolEncodingType_e type,unsigned * count,U32 max,const BYTE * codeTable,size_t nbSeq,const S16 * defaultNorm,U32 defaultNormLog,U32 defaultMax,const FSE_CTable * prevCTable,size_t prevCTableSize,void * entropyWorkspace,size_t entropyWorkspaceSize)2434d3f1eafSConrad Meyer ZSTD_buildCTable(void* dst, size_t dstCapacity,
2444d3f1eafSConrad Meyer FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
2454d3f1eafSConrad Meyer unsigned* count, U32 max,
2464d3f1eafSConrad Meyer const BYTE* codeTable, size_t nbSeq,
2474d3f1eafSConrad Meyer const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
2484d3f1eafSConrad Meyer const FSE_CTable* prevCTable, size_t prevCTableSize,
2499cbefe25SConrad Meyer void* entropyWorkspace, size_t entropyWorkspaceSize)
2504d3f1eafSConrad Meyer {
2514d3f1eafSConrad Meyer BYTE* op = (BYTE*)dst;
2524d3f1eafSConrad Meyer const BYTE* const oend = op + dstCapacity;
2534d3f1eafSConrad Meyer DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
2544d3f1eafSConrad Meyer
2554d3f1eafSConrad Meyer switch (type) {
2564d3f1eafSConrad Meyer case set_rle:
25737f1f268SConrad Meyer FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), "");
25837f1f268SConrad Meyer RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space");
2594d3f1eafSConrad Meyer *op = codeTable[0];
2604d3f1eafSConrad Meyer return 1;
2614d3f1eafSConrad Meyer case set_repeat:
262f7cd7fe5SConrad Meyer ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize);
2634d3f1eafSConrad Meyer return 0;
2644d3f1eafSConrad Meyer case set_basic:
26537f1f268SConrad Meyer FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */
2664d3f1eafSConrad Meyer return 0;
2674d3f1eafSConrad Meyer case set_compressed: {
268*5ff13fbcSAllan Jude ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace;
2694d3f1eafSConrad Meyer size_t nbSeq_1 = nbSeq;
2704d3f1eafSConrad Meyer const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
2714d3f1eafSConrad Meyer if (count[codeTable[nbSeq-1]] > 1) {
2724d3f1eafSConrad Meyer count[codeTable[nbSeq-1]]--;
2734d3f1eafSConrad Meyer nbSeq_1--;
2744d3f1eafSConrad Meyer }
2754d3f1eafSConrad Meyer assert(nbSeq_1 > 1);
276*5ff13fbcSAllan Jude assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
277*5ff13fbcSAllan Jude (void)entropyWorkspaceSize;
278*5ff13fbcSAllan Jude FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed");
279*5ff13fbcSAllan Jude assert(oend >= op);
280*5ff13fbcSAllan Jude { size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog); /* overflow protected */
28137f1f268SConrad Meyer FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
282*5ff13fbcSAllan Jude FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed");
2834d3f1eafSConrad Meyer return NCountSize;
2844d3f1eafSConrad Meyer }
2854d3f1eafSConrad Meyer }
28637f1f268SConrad Meyer default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach");
2874d3f1eafSConrad Meyer }
2884d3f1eafSConrad Meyer }
2894d3f1eafSConrad Meyer
2904d3f1eafSConrad Meyer FORCE_INLINE_TEMPLATE size_t
ZSTD_encodeSequences_body(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets)2914d3f1eafSConrad Meyer ZSTD_encodeSequences_body(
2924d3f1eafSConrad Meyer void* dst, size_t dstCapacity,
2934d3f1eafSConrad Meyer FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
2944d3f1eafSConrad Meyer FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
2954d3f1eafSConrad Meyer FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
2964d3f1eafSConrad Meyer seqDef const* sequences, size_t nbSeq, int longOffsets)
2974d3f1eafSConrad Meyer {
2984d3f1eafSConrad Meyer BIT_CStream_t blockStream;
2994d3f1eafSConrad Meyer FSE_CState_t stateMatchLength;
3004d3f1eafSConrad Meyer FSE_CState_t stateOffsetBits;
3014d3f1eafSConrad Meyer FSE_CState_t stateLitLength;
3024d3f1eafSConrad Meyer
3034d3f1eafSConrad Meyer RETURN_ERROR_IF(
3044d3f1eafSConrad Meyer ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
3054d3f1eafSConrad Meyer dstSize_tooSmall, "not enough space remaining");
3064d3f1eafSConrad Meyer DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)",
3074d3f1eafSConrad Meyer (int)(blockStream.endPtr - blockStream.startPtr),
3084d3f1eafSConrad Meyer (unsigned)dstCapacity);
3094d3f1eafSConrad Meyer
3104d3f1eafSConrad Meyer /* first symbols */
3114d3f1eafSConrad Meyer FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
3124d3f1eafSConrad Meyer FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
3134d3f1eafSConrad Meyer FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
3144d3f1eafSConrad Meyer BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
3154d3f1eafSConrad Meyer if (MEM_32bits()) BIT_flushBits(&blockStream);
316*5ff13fbcSAllan Jude BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
3174d3f1eafSConrad Meyer if (MEM_32bits()) BIT_flushBits(&blockStream);
3184d3f1eafSConrad Meyer if (longOffsets) {
3194d3f1eafSConrad Meyer U32 const ofBits = ofCodeTable[nbSeq-1];
32037f1f268SConrad Meyer unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
3214d3f1eafSConrad Meyer if (extraBits) {
322*5ff13fbcSAllan Jude BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits);
3234d3f1eafSConrad Meyer BIT_flushBits(&blockStream);
3244d3f1eafSConrad Meyer }
325*5ff13fbcSAllan Jude BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits,
3264d3f1eafSConrad Meyer ofBits - extraBits);
3274d3f1eafSConrad Meyer } else {
328*5ff13fbcSAllan Jude BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
3294d3f1eafSConrad Meyer }
3304d3f1eafSConrad Meyer BIT_flushBits(&blockStream);
3314d3f1eafSConrad Meyer
3324d3f1eafSConrad Meyer { size_t n;
3334d3f1eafSConrad Meyer for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
3344d3f1eafSConrad Meyer BYTE const llCode = llCodeTable[n];
3354d3f1eafSConrad Meyer BYTE const ofCode = ofCodeTable[n];
3364d3f1eafSConrad Meyer BYTE const mlCode = mlCodeTable[n];
3374d3f1eafSConrad Meyer U32 const llBits = LL_bits[llCode];
3384d3f1eafSConrad Meyer U32 const ofBits = ofCode;
3394d3f1eafSConrad Meyer U32 const mlBits = ML_bits[mlCode];
3404d3f1eafSConrad Meyer DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
3414d3f1eafSConrad Meyer (unsigned)sequences[n].litLength,
342*5ff13fbcSAllan Jude (unsigned)sequences[n].mlBase + MINMATCH,
343*5ff13fbcSAllan Jude (unsigned)sequences[n].offBase);
3444d3f1eafSConrad Meyer /* 32b*/ /* 64b*/
3454d3f1eafSConrad Meyer /* (7)*/ /* (7)*/
3464d3f1eafSConrad Meyer FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
3474d3f1eafSConrad Meyer FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
3484d3f1eafSConrad Meyer if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
3494d3f1eafSConrad Meyer FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
3504d3f1eafSConrad Meyer if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
3514d3f1eafSConrad Meyer BIT_flushBits(&blockStream); /* (7)*/
3524d3f1eafSConrad Meyer BIT_addBits(&blockStream, sequences[n].litLength, llBits);
3534d3f1eafSConrad Meyer if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
354*5ff13fbcSAllan Jude BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
3554d3f1eafSConrad Meyer if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
3564d3f1eafSConrad Meyer if (longOffsets) {
35737f1f268SConrad Meyer unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
3584d3f1eafSConrad Meyer if (extraBits) {
359*5ff13fbcSAllan Jude BIT_addBits(&blockStream, sequences[n].offBase, extraBits);
3604d3f1eafSConrad Meyer BIT_flushBits(&blockStream); /* (7)*/
3614d3f1eafSConrad Meyer }
362*5ff13fbcSAllan Jude BIT_addBits(&blockStream, sequences[n].offBase >> extraBits,
3634d3f1eafSConrad Meyer ofBits - extraBits); /* 31 */
3644d3f1eafSConrad Meyer } else {
365*5ff13fbcSAllan Jude BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */
3664d3f1eafSConrad Meyer }
3674d3f1eafSConrad Meyer BIT_flushBits(&blockStream); /* (7)*/
3684d3f1eafSConrad Meyer DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
3694d3f1eafSConrad Meyer } }
3704d3f1eafSConrad Meyer
3714d3f1eafSConrad Meyer DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
3724d3f1eafSConrad Meyer FSE_flushCState(&blockStream, &stateMatchLength);
3734d3f1eafSConrad Meyer DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
3744d3f1eafSConrad Meyer FSE_flushCState(&blockStream, &stateOffsetBits);
3754d3f1eafSConrad Meyer DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
3764d3f1eafSConrad Meyer FSE_flushCState(&blockStream, &stateLitLength);
3774d3f1eafSConrad Meyer
3784d3f1eafSConrad Meyer { size_t const streamSize = BIT_closeCStream(&blockStream);
3794d3f1eafSConrad Meyer RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
3804d3f1eafSConrad Meyer return streamSize;
3814d3f1eafSConrad Meyer }
3824d3f1eafSConrad Meyer }
3834d3f1eafSConrad Meyer
3844d3f1eafSConrad Meyer static size_t
ZSTD_encodeSequences_default(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets)3854d3f1eafSConrad Meyer ZSTD_encodeSequences_default(
3864d3f1eafSConrad Meyer void* dst, size_t dstCapacity,
3874d3f1eafSConrad Meyer FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
3884d3f1eafSConrad Meyer FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
3894d3f1eafSConrad Meyer FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
3904d3f1eafSConrad Meyer seqDef const* sequences, size_t nbSeq, int longOffsets)
3914d3f1eafSConrad Meyer {
3924d3f1eafSConrad Meyer return ZSTD_encodeSequences_body(dst, dstCapacity,
3934d3f1eafSConrad Meyer CTable_MatchLength, mlCodeTable,
3944d3f1eafSConrad Meyer CTable_OffsetBits, ofCodeTable,
3954d3f1eafSConrad Meyer CTable_LitLength, llCodeTable,
3964d3f1eafSConrad Meyer sequences, nbSeq, longOffsets);
3974d3f1eafSConrad Meyer }
3984d3f1eafSConrad Meyer
3994d3f1eafSConrad Meyer
4004d3f1eafSConrad Meyer #if DYNAMIC_BMI2
4014d3f1eafSConrad Meyer
402*5ff13fbcSAllan Jude static BMI2_TARGET_ATTRIBUTE size_t
ZSTD_encodeSequences_bmi2(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets)4034d3f1eafSConrad Meyer ZSTD_encodeSequences_bmi2(
4044d3f1eafSConrad Meyer void* dst, size_t dstCapacity,
4054d3f1eafSConrad Meyer FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
4064d3f1eafSConrad Meyer FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
4074d3f1eafSConrad Meyer FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
4084d3f1eafSConrad Meyer seqDef const* sequences, size_t nbSeq, int longOffsets)
4094d3f1eafSConrad Meyer {
4104d3f1eafSConrad Meyer return ZSTD_encodeSequences_body(dst, dstCapacity,
4114d3f1eafSConrad Meyer CTable_MatchLength, mlCodeTable,
4124d3f1eafSConrad Meyer CTable_OffsetBits, ofCodeTable,
4134d3f1eafSConrad Meyer CTable_LitLength, llCodeTable,
4144d3f1eafSConrad Meyer sequences, nbSeq, longOffsets);
4154d3f1eafSConrad Meyer }
4164d3f1eafSConrad Meyer
4174d3f1eafSConrad Meyer #endif
4184d3f1eafSConrad Meyer
ZSTD_encodeSequences(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets,int bmi2)4194d3f1eafSConrad Meyer size_t ZSTD_encodeSequences(
4204d3f1eafSConrad Meyer void* dst, size_t dstCapacity,
4214d3f1eafSConrad Meyer FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
4224d3f1eafSConrad Meyer FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
4234d3f1eafSConrad Meyer FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
4244d3f1eafSConrad Meyer seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
4254d3f1eafSConrad Meyer {
4264d3f1eafSConrad Meyer DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
4274d3f1eafSConrad Meyer #if DYNAMIC_BMI2
4284d3f1eafSConrad Meyer if (bmi2) {
4294d3f1eafSConrad Meyer return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
4304d3f1eafSConrad Meyer CTable_MatchLength, mlCodeTable,
4314d3f1eafSConrad Meyer CTable_OffsetBits, ofCodeTable,
4324d3f1eafSConrad Meyer CTable_LitLength, llCodeTable,
4334d3f1eafSConrad Meyer sequences, nbSeq, longOffsets);
4344d3f1eafSConrad Meyer }
4354d3f1eafSConrad Meyer #endif
4364d3f1eafSConrad Meyer (void)bmi2;
4374d3f1eafSConrad Meyer return ZSTD_encodeSequences_default(dst, dstCapacity,
4384d3f1eafSConrad Meyer CTable_MatchLength, mlCodeTable,
4394d3f1eafSConrad Meyer CTable_OffsetBits, ofCodeTable,
4404d3f1eafSConrad Meyer CTable_LitLength, llCodeTable,
4414d3f1eafSConrad Meyer sequences, nbSeq, longOffsets);
4424d3f1eafSConrad Meyer }
443