xref: /freebsd-src/sys/contrib/zstd/lib/compress/zstd_compress_sequences.c (revision 5ff13fbc199bdf5f0572845351c68ee5ca828e71)
14d3f1eafSConrad Meyer /*
2*5ff13fbcSAllan Jude  * Copyright (c) Yann Collet, Facebook, Inc.
34d3f1eafSConrad Meyer  * All rights reserved.
44d3f1eafSConrad Meyer  *
54d3f1eafSConrad Meyer  * This source code is licensed under both the BSD-style license (found in the
64d3f1eafSConrad Meyer  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
74d3f1eafSConrad Meyer  * in the COPYING file in the root directory of this source tree).
84d3f1eafSConrad Meyer  * You may select, at your option, one of the above-listed licenses.
94d3f1eafSConrad Meyer  */
104d3f1eafSConrad Meyer 
114d3f1eafSConrad Meyer  /*-*************************************
124d3f1eafSConrad Meyer  *  Dependencies
134d3f1eafSConrad Meyer  ***************************************/
144d3f1eafSConrad Meyer #include "zstd_compress_sequences.h"
154d3f1eafSConrad Meyer 
164d3f1eafSConrad Meyer /**
174d3f1eafSConrad Meyer  * -log2(x / 256) lookup table for x in [0, 256).
184d3f1eafSConrad Meyer  * If x == 0: Return 0
194d3f1eafSConrad Meyer  * Else: Return floor(-log2(x / 256) * 256)
204d3f1eafSConrad Meyer  */
214d3f1eafSConrad Meyer static unsigned const kInverseProbabilityLog256[256] = {
224d3f1eafSConrad Meyer     0,    2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
234d3f1eafSConrad Meyer     1130, 1100, 1073, 1047, 1024, 1001, 980,  960,  941,  923,  906,  889,
244d3f1eafSConrad Meyer     874,  859,  844,  830,  817,  804,  791,  779,  768,  756,  745,  734,
254d3f1eafSConrad Meyer     724,  714,  704,  694,  685,  676,  667,  658,  650,  642,  633,  626,
264d3f1eafSConrad Meyer     618,  610,  603,  595,  588,  581,  574,  567,  561,  554,  548,  542,
274d3f1eafSConrad Meyer     535,  529,  523,  517,  512,  506,  500,  495,  489,  484,  478,  473,
284d3f1eafSConrad Meyer     468,  463,  458,  453,  448,  443,  438,  434,  429,  424,  420,  415,
294d3f1eafSConrad Meyer     411,  407,  402,  398,  394,  390,  386,  382,  377,  373,  370,  366,
304d3f1eafSConrad Meyer     362,  358,  354,  350,  347,  343,  339,  336,  332,  329,  325,  322,
314d3f1eafSConrad Meyer     318,  315,  311,  308,  305,  302,  298,  295,  292,  289,  286,  282,
324d3f1eafSConrad Meyer     279,  276,  273,  270,  267,  264,  261,  258,  256,  253,  250,  247,
334d3f1eafSConrad Meyer     244,  241,  239,  236,  233,  230,  228,  225,  222,  220,  217,  215,
344d3f1eafSConrad Meyer     212,  209,  207,  204,  202,  199,  197,  194,  192,  190,  187,  185,
354d3f1eafSConrad Meyer     182,  180,  178,  175,  173,  171,  168,  166,  164,  162,  159,  157,
364d3f1eafSConrad Meyer     155,  153,  151,  149,  146,  144,  142,  140,  138,  136,  134,  132,
374d3f1eafSConrad Meyer     130,  128,  126,  123,  121,  119,  117,  115,  114,  112,  110,  108,
384d3f1eafSConrad Meyer     106,  104,  102,  100,  98,   96,   94,   93,   91,   89,   87,   85,
394d3f1eafSConrad Meyer     83,   82,   80,   78,   76,   74,   73,   71,   69,   67,   66,   64,
404d3f1eafSConrad Meyer     62,   61,   59,   57,   55,   54,   52,   50,   49,   47,   46,   44,
414d3f1eafSConrad Meyer     42,   41,   39,   37,   36,   34,   33,   31,   30,   28,   26,   25,
424d3f1eafSConrad Meyer     23,   22,   20,   19,   17,   16,   14,   13,   11,   10,   8,    7,
434d3f1eafSConrad Meyer     5,    4,    2,    1,
444d3f1eafSConrad Meyer };
454d3f1eafSConrad Meyer 
ZSTD_getFSEMaxSymbolValue(FSE_CTable const * ctable)464d3f1eafSConrad Meyer static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
474d3f1eafSConrad Meyer   void const* ptr = ctable;
484d3f1eafSConrad Meyer   U16 const* u16ptr = (U16 const*)ptr;
494d3f1eafSConrad Meyer   U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
504d3f1eafSConrad Meyer   return maxSymbolValue;
514d3f1eafSConrad Meyer }
524d3f1eafSConrad Meyer 
534d3f1eafSConrad Meyer /**
54f7cd7fe5SConrad Meyer  * Returns true if we should use ncount=-1 else we should
55f7cd7fe5SConrad Meyer  * use ncount=1 for low probability symbols instead.
56f7cd7fe5SConrad Meyer  */
ZSTD_useLowProbCount(size_t const nbSeq)57f7cd7fe5SConrad Meyer static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
58f7cd7fe5SConrad Meyer {
59f7cd7fe5SConrad Meyer     /* Heuristic: This should cover most blocks <= 16K and
60f7cd7fe5SConrad Meyer      * start to fade out after 16K to about 32K depending on
61f7cd7fe5SConrad Meyer      * comprssibility.
62f7cd7fe5SConrad Meyer      */
63f7cd7fe5SConrad Meyer     return nbSeq >= 2048;
64f7cd7fe5SConrad Meyer }
65f7cd7fe5SConrad Meyer 
66f7cd7fe5SConrad Meyer /**
674d3f1eafSConrad Meyer  * Returns the cost in bytes of encoding the normalized count header.
684d3f1eafSConrad Meyer  * Returns an error if any of the helper functions return an error.
694d3f1eafSConrad Meyer  */
ZSTD_NCountCost(unsigned const * count,unsigned const max,size_t const nbSeq,unsigned const FSELog)704d3f1eafSConrad Meyer static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
714d3f1eafSConrad Meyer                               size_t const nbSeq, unsigned const FSELog)
724d3f1eafSConrad Meyer {
734d3f1eafSConrad Meyer     BYTE wksp[FSE_NCOUNTBOUND];
744d3f1eafSConrad Meyer     S16 norm[MaxSeq + 1];
754d3f1eafSConrad Meyer     const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
76f7cd7fe5SConrad Meyer     FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), "");
774d3f1eafSConrad Meyer     return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
784d3f1eafSConrad Meyer }
794d3f1eafSConrad Meyer 
804d3f1eafSConrad Meyer /**
814d3f1eafSConrad Meyer  * Returns the cost in bits of encoding the distribution described by count
824d3f1eafSConrad Meyer  * using the entropy bound.
834d3f1eafSConrad Meyer  */
ZSTD_entropyCost(unsigned const * count,unsigned const max,size_t const total)844d3f1eafSConrad Meyer static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
854d3f1eafSConrad Meyer {
864d3f1eafSConrad Meyer     unsigned cost = 0;
874d3f1eafSConrad Meyer     unsigned s;
88*5ff13fbcSAllan Jude 
89*5ff13fbcSAllan Jude     assert(total > 0);
904d3f1eafSConrad Meyer     for (s = 0; s <= max; ++s) {
914d3f1eafSConrad Meyer         unsigned norm = (unsigned)((256 * count[s]) / total);
924d3f1eafSConrad Meyer         if (count[s] != 0 && norm == 0)
934d3f1eafSConrad Meyer             norm = 1;
944d3f1eafSConrad Meyer         assert(count[s] < total);
954d3f1eafSConrad Meyer         cost += count[s] * kInverseProbabilityLog256[norm];
964d3f1eafSConrad Meyer     }
974d3f1eafSConrad Meyer     return cost >> 8;
984d3f1eafSConrad Meyer }
994d3f1eafSConrad Meyer 
1004d3f1eafSConrad Meyer /**
1014d3f1eafSConrad Meyer  * Returns the cost in bits of encoding the distribution in count using ctable.
1024d3f1eafSConrad Meyer  * Returns an error if ctable cannot represent all the symbols in count.
1034d3f1eafSConrad Meyer  */
ZSTD_fseBitCost(FSE_CTable const * ctable,unsigned const * count,unsigned const max)10437f1f268SConrad Meyer size_t ZSTD_fseBitCost(
1054d3f1eafSConrad Meyer     FSE_CTable const* ctable,
1064d3f1eafSConrad Meyer     unsigned const* count,
1074d3f1eafSConrad Meyer     unsigned const max)
1084d3f1eafSConrad Meyer {
1094d3f1eafSConrad Meyer     unsigned const kAccuracyLog = 8;
1104d3f1eafSConrad Meyer     size_t cost = 0;
1114d3f1eafSConrad Meyer     unsigned s;
1124d3f1eafSConrad Meyer     FSE_CState_t cstate;
1134d3f1eafSConrad Meyer     FSE_initCState(&cstate, ctable);
11437f1f268SConrad Meyer     if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
11537f1f268SConrad Meyer         DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
1164d3f1eafSConrad Meyer                     ZSTD_getFSEMaxSymbolValue(ctable), max);
11737f1f268SConrad Meyer         return ERROR(GENERIC);
11837f1f268SConrad Meyer     }
1194d3f1eafSConrad Meyer     for (s = 0; s <= max; ++s) {
1204d3f1eafSConrad Meyer         unsigned const tableLog = cstate.stateLog;
1214d3f1eafSConrad Meyer         unsigned const badCost = (tableLog + 1) << kAccuracyLog;
1224d3f1eafSConrad Meyer         unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
1234d3f1eafSConrad Meyer         if (count[s] == 0)
1244d3f1eafSConrad Meyer             continue;
12537f1f268SConrad Meyer         if (bitCost >= badCost) {
12637f1f268SConrad Meyer             DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
12737f1f268SConrad Meyer             return ERROR(GENERIC);
12837f1f268SConrad Meyer         }
12937f1f268SConrad Meyer         cost += (size_t)count[s] * bitCost;
1304d3f1eafSConrad Meyer     }
1314d3f1eafSConrad Meyer     return cost >> kAccuracyLog;
1324d3f1eafSConrad Meyer }
1334d3f1eafSConrad Meyer 
1344d3f1eafSConrad Meyer /**
1354d3f1eafSConrad Meyer  * Returns the cost in bits of encoding the distribution in count using the
1364d3f1eafSConrad Meyer  * table described by norm. The max symbol support by norm is assumed >= max.
1374d3f1eafSConrad Meyer  * norm must be valid for every symbol with non-zero probability in count.
1384d3f1eafSConrad Meyer  */
ZSTD_crossEntropyCost(short const * norm,unsigned accuracyLog,unsigned const * count,unsigned const max)13937f1f268SConrad Meyer size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
1404d3f1eafSConrad Meyer                              unsigned const* count, unsigned const max)
1414d3f1eafSConrad Meyer {
1424d3f1eafSConrad Meyer     unsigned const shift = 8 - accuracyLog;
1434d3f1eafSConrad Meyer     size_t cost = 0;
1444d3f1eafSConrad Meyer     unsigned s;
1454d3f1eafSConrad Meyer     assert(accuracyLog <= 8);
1464d3f1eafSConrad Meyer     for (s = 0; s <= max; ++s) {
14737f1f268SConrad Meyer         unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1;
1484d3f1eafSConrad Meyer         unsigned const norm256 = normAcc << shift;
1494d3f1eafSConrad Meyer         assert(norm256 > 0);
1504d3f1eafSConrad Meyer         assert(norm256 < 256);
1514d3f1eafSConrad Meyer         cost += count[s] * kInverseProbabilityLog256[norm256];
1524d3f1eafSConrad Meyer     }
1534d3f1eafSConrad Meyer     return cost >> 8;
1544d3f1eafSConrad Meyer }
1554d3f1eafSConrad Meyer 
1564d3f1eafSConrad Meyer symbolEncodingType_e
ZSTD_selectEncodingType(FSE_repeat * repeatMode,unsigned const * count,unsigned const max,size_t const mostFrequent,size_t nbSeq,unsigned const FSELog,FSE_CTable const * prevCTable,short const * defaultNorm,U32 defaultNormLog,ZSTD_defaultPolicy_e const isDefaultAllowed,ZSTD_strategy const strategy)1574d3f1eafSConrad Meyer ZSTD_selectEncodingType(
1584d3f1eafSConrad Meyer         FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
1594d3f1eafSConrad Meyer         size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
1604d3f1eafSConrad Meyer         FSE_CTable const* prevCTable,
1614d3f1eafSConrad Meyer         short const* defaultNorm, U32 defaultNormLog,
1624d3f1eafSConrad Meyer         ZSTD_defaultPolicy_e const isDefaultAllowed,
1634d3f1eafSConrad Meyer         ZSTD_strategy const strategy)
1644d3f1eafSConrad Meyer {
1654d3f1eafSConrad Meyer     ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
1664d3f1eafSConrad Meyer     if (mostFrequent == nbSeq) {
1674d3f1eafSConrad Meyer         *repeatMode = FSE_repeat_none;
1684d3f1eafSConrad Meyer         if (isDefaultAllowed && nbSeq <= 2) {
1694d3f1eafSConrad Meyer             /* Prefer set_basic over set_rle when there are 2 or less symbols,
1704d3f1eafSConrad Meyer              * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
1714d3f1eafSConrad Meyer              * If basic encoding isn't possible, always choose RLE.
1724d3f1eafSConrad Meyer              */
1734d3f1eafSConrad Meyer             DEBUGLOG(5, "Selected set_basic");
1744d3f1eafSConrad Meyer             return set_basic;
1754d3f1eafSConrad Meyer         }
1764d3f1eafSConrad Meyer         DEBUGLOG(5, "Selected set_rle");
1774d3f1eafSConrad Meyer         return set_rle;
1784d3f1eafSConrad Meyer     }
1794d3f1eafSConrad Meyer     if (strategy < ZSTD_lazy) {
1804d3f1eafSConrad Meyer         if (isDefaultAllowed) {
1814d3f1eafSConrad Meyer             size_t const staticFse_nbSeq_max = 1000;
1824d3f1eafSConrad Meyer             size_t const mult = 10 - strategy;
1834d3f1eafSConrad Meyer             size_t const baseLog = 3;
1844d3f1eafSConrad Meyer             size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog;  /* 28-36 for offset, 56-72 for lengths */
1854d3f1eafSConrad Meyer             assert(defaultNormLog >= 5 && defaultNormLog <= 6);  /* xx_DEFAULTNORMLOG */
1864d3f1eafSConrad Meyer             assert(mult <= 9 && mult >= 7);
1874d3f1eafSConrad Meyer             if ( (*repeatMode == FSE_repeat_valid)
1884d3f1eafSConrad Meyer               && (nbSeq < staticFse_nbSeq_max) ) {
1894d3f1eafSConrad Meyer                 DEBUGLOG(5, "Selected set_repeat");
1904d3f1eafSConrad Meyer                 return set_repeat;
1914d3f1eafSConrad Meyer             }
1924d3f1eafSConrad Meyer             if ( (nbSeq < dynamicFse_nbSeq_min)
1934d3f1eafSConrad Meyer               || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
1944d3f1eafSConrad Meyer                 DEBUGLOG(5, "Selected set_basic");
1954d3f1eafSConrad Meyer                 /* The format allows default tables to be repeated, but it isn't useful.
1964d3f1eafSConrad Meyer                  * When using simple heuristics to select encoding type, we don't want
1974d3f1eafSConrad Meyer                  * to confuse these tables with dictionaries. When running more careful
1984d3f1eafSConrad Meyer                  * analysis, we don't need to waste time checking both repeating tables
1994d3f1eafSConrad Meyer                  * and default tables.
2004d3f1eafSConrad Meyer                  */
2014d3f1eafSConrad Meyer                 *repeatMode = FSE_repeat_none;
2024d3f1eafSConrad Meyer                 return set_basic;
2034d3f1eafSConrad Meyer             }
2044d3f1eafSConrad Meyer         }
2054d3f1eafSConrad Meyer     } else {
2064d3f1eafSConrad Meyer         size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
2074d3f1eafSConrad Meyer         size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
2084d3f1eafSConrad Meyer         size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
2094d3f1eafSConrad Meyer         size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
2104d3f1eafSConrad Meyer 
2114d3f1eafSConrad Meyer         if (isDefaultAllowed) {
2124d3f1eafSConrad Meyer             assert(!ZSTD_isError(basicCost));
2134d3f1eafSConrad Meyer             assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
2144d3f1eafSConrad Meyer         }
2154d3f1eafSConrad Meyer         assert(!ZSTD_isError(NCountCost));
2164d3f1eafSConrad Meyer         assert(compressedCost < ERROR(maxCode));
2174d3f1eafSConrad Meyer         DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
2184d3f1eafSConrad Meyer                     (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
2194d3f1eafSConrad Meyer         if (basicCost <= repeatCost && basicCost <= compressedCost) {
2204d3f1eafSConrad Meyer             DEBUGLOG(5, "Selected set_basic");
2214d3f1eafSConrad Meyer             assert(isDefaultAllowed);
2224d3f1eafSConrad Meyer             *repeatMode = FSE_repeat_none;
2234d3f1eafSConrad Meyer             return set_basic;
2244d3f1eafSConrad Meyer         }
2254d3f1eafSConrad Meyer         if (repeatCost <= compressedCost) {
2264d3f1eafSConrad Meyer             DEBUGLOG(5, "Selected set_repeat");
2274d3f1eafSConrad Meyer             assert(!ZSTD_isError(repeatCost));
2284d3f1eafSConrad Meyer             return set_repeat;
2294d3f1eafSConrad Meyer         }
2304d3f1eafSConrad Meyer         assert(compressedCost < basicCost && compressedCost < repeatCost);
2314d3f1eafSConrad Meyer     }
2324d3f1eafSConrad Meyer     DEBUGLOG(5, "Selected set_compressed");
2334d3f1eafSConrad Meyer     *repeatMode = FSE_repeat_check;
2344d3f1eafSConrad Meyer     return set_compressed;
2354d3f1eafSConrad Meyer }
2364d3f1eafSConrad Meyer 
237*5ff13fbcSAllan Jude typedef struct {
238*5ff13fbcSAllan Jude     S16 norm[MaxSeq + 1];
239*5ff13fbcSAllan Jude     U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)];
240*5ff13fbcSAllan Jude } ZSTD_BuildCTableWksp;
241*5ff13fbcSAllan Jude 
2424d3f1eafSConrad Meyer size_t
ZSTD_buildCTable(void * dst,size_t dstCapacity,FSE_CTable * nextCTable,U32 FSELog,symbolEncodingType_e type,unsigned * count,U32 max,const BYTE * codeTable,size_t nbSeq,const S16 * defaultNorm,U32 defaultNormLog,U32 defaultMax,const FSE_CTable * prevCTable,size_t prevCTableSize,void * entropyWorkspace,size_t entropyWorkspaceSize)2434d3f1eafSConrad Meyer ZSTD_buildCTable(void* dst, size_t dstCapacity,
2444d3f1eafSConrad Meyer                 FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
2454d3f1eafSConrad Meyer                 unsigned* count, U32 max,
2464d3f1eafSConrad Meyer                 const BYTE* codeTable, size_t nbSeq,
2474d3f1eafSConrad Meyer                 const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
2484d3f1eafSConrad Meyer                 const FSE_CTable* prevCTable, size_t prevCTableSize,
2499cbefe25SConrad Meyer                 void* entropyWorkspace, size_t entropyWorkspaceSize)
2504d3f1eafSConrad Meyer {
2514d3f1eafSConrad Meyer     BYTE* op = (BYTE*)dst;
2524d3f1eafSConrad Meyer     const BYTE* const oend = op + dstCapacity;
2534d3f1eafSConrad Meyer     DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
2544d3f1eafSConrad Meyer 
2554d3f1eafSConrad Meyer     switch (type) {
2564d3f1eafSConrad Meyer     case set_rle:
25737f1f268SConrad Meyer         FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), "");
25837f1f268SConrad Meyer         RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space");
2594d3f1eafSConrad Meyer         *op = codeTable[0];
2604d3f1eafSConrad Meyer         return 1;
2614d3f1eafSConrad Meyer     case set_repeat:
262f7cd7fe5SConrad Meyer         ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize);
2634d3f1eafSConrad Meyer         return 0;
2644d3f1eafSConrad Meyer     case set_basic:
26537f1f268SConrad Meyer         FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), "");  /* note : could be pre-calculated */
2664d3f1eafSConrad Meyer         return 0;
2674d3f1eafSConrad Meyer     case set_compressed: {
268*5ff13fbcSAllan Jude         ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace;
2694d3f1eafSConrad Meyer         size_t nbSeq_1 = nbSeq;
2704d3f1eafSConrad Meyer         const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
2714d3f1eafSConrad Meyer         if (count[codeTable[nbSeq-1]] > 1) {
2724d3f1eafSConrad Meyer             count[codeTable[nbSeq-1]]--;
2734d3f1eafSConrad Meyer             nbSeq_1--;
2744d3f1eafSConrad Meyer         }
2754d3f1eafSConrad Meyer         assert(nbSeq_1 > 1);
276*5ff13fbcSAllan Jude         assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
277*5ff13fbcSAllan Jude         (void)entropyWorkspaceSize;
278*5ff13fbcSAllan Jude         FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed");
279*5ff13fbcSAllan Jude         assert(oend >= op);
280*5ff13fbcSAllan Jude         {   size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog);   /* overflow protected */
28137f1f268SConrad Meyer             FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
282*5ff13fbcSAllan Jude             FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed");
2834d3f1eafSConrad Meyer             return NCountSize;
2844d3f1eafSConrad Meyer         }
2854d3f1eafSConrad Meyer     }
28637f1f268SConrad Meyer     default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach");
2874d3f1eafSConrad Meyer     }
2884d3f1eafSConrad Meyer }
2894d3f1eafSConrad Meyer 
2904d3f1eafSConrad Meyer FORCE_INLINE_TEMPLATE size_t
ZSTD_encodeSequences_body(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets)2914d3f1eafSConrad Meyer ZSTD_encodeSequences_body(
2924d3f1eafSConrad Meyer             void* dst, size_t dstCapacity,
2934d3f1eafSConrad Meyer             FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
2944d3f1eafSConrad Meyer             FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
2954d3f1eafSConrad Meyer             FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
2964d3f1eafSConrad Meyer             seqDef const* sequences, size_t nbSeq, int longOffsets)
2974d3f1eafSConrad Meyer {
2984d3f1eafSConrad Meyer     BIT_CStream_t blockStream;
2994d3f1eafSConrad Meyer     FSE_CState_t  stateMatchLength;
3004d3f1eafSConrad Meyer     FSE_CState_t  stateOffsetBits;
3014d3f1eafSConrad Meyer     FSE_CState_t  stateLitLength;
3024d3f1eafSConrad Meyer 
3034d3f1eafSConrad Meyer     RETURN_ERROR_IF(
3044d3f1eafSConrad Meyer         ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
3054d3f1eafSConrad Meyer         dstSize_tooSmall, "not enough space remaining");
3064d3f1eafSConrad Meyer     DEBUGLOG(6, "available space for bitstream : %i  (dstCapacity=%u)",
3074d3f1eafSConrad Meyer                 (int)(blockStream.endPtr - blockStream.startPtr),
3084d3f1eafSConrad Meyer                 (unsigned)dstCapacity);
3094d3f1eafSConrad Meyer 
3104d3f1eafSConrad Meyer     /* first symbols */
3114d3f1eafSConrad Meyer     FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
3124d3f1eafSConrad Meyer     FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
3134d3f1eafSConrad Meyer     FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
3144d3f1eafSConrad Meyer     BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
3154d3f1eafSConrad Meyer     if (MEM_32bits()) BIT_flushBits(&blockStream);
316*5ff13fbcSAllan Jude     BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
3174d3f1eafSConrad Meyer     if (MEM_32bits()) BIT_flushBits(&blockStream);
3184d3f1eafSConrad Meyer     if (longOffsets) {
3194d3f1eafSConrad Meyer         U32 const ofBits = ofCodeTable[nbSeq-1];
32037f1f268SConrad Meyer         unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
3214d3f1eafSConrad Meyer         if (extraBits) {
322*5ff13fbcSAllan Jude             BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits);
3234d3f1eafSConrad Meyer             BIT_flushBits(&blockStream);
3244d3f1eafSConrad Meyer         }
325*5ff13fbcSAllan Jude         BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits,
3264d3f1eafSConrad Meyer                     ofBits - extraBits);
3274d3f1eafSConrad Meyer     } else {
328*5ff13fbcSAllan Jude         BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
3294d3f1eafSConrad Meyer     }
3304d3f1eafSConrad Meyer     BIT_flushBits(&blockStream);
3314d3f1eafSConrad Meyer 
3324d3f1eafSConrad Meyer     {   size_t n;
3334d3f1eafSConrad Meyer         for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
3344d3f1eafSConrad Meyer             BYTE const llCode = llCodeTable[n];
3354d3f1eafSConrad Meyer             BYTE const ofCode = ofCodeTable[n];
3364d3f1eafSConrad Meyer             BYTE const mlCode = mlCodeTable[n];
3374d3f1eafSConrad Meyer             U32  const llBits = LL_bits[llCode];
3384d3f1eafSConrad Meyer             U32  const ofBits = ofCode;
3394d3f1eafSConrad Meyer             U32  const mlBits = ML_bits[mlCode];
3404d3f1eafSConrad Meyer             DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
3414d3f1eafSConrad Meyer                         (unsigned)sequences[n].litLength,
342*5ff13fbcSAllan Jude                         (unsigned)sequences[n].mlBase + MINMATCH,
343*5ff13fbcSAllan Jude                         (unsigned)sequences[n].offBase);
3444d3f1eafSConrad Meyer                                                                             /* 32b*/  /* 64b*/
3454d3f1eafSConrad Meyer                                                                             /* (7)*/  /* (7)*/
3464d3f1eafSConrad Meyer             FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
3474d3f1eafSConrad Meyer             FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
3484d3f1eafSConrad Meyer             if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
3494d3f1eafSConrad Meyer             FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
3504d3f1eafSConrad Meyer             if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
3514d3f1eafSConrad Meyer                 BIT_flushBits(&blockStream);                                /* (7)*/
3524d3f1eafSConrad Meyer             BIT_addBits(&blockStream, sequences[n].litLength, llBits);
3534d3f1eafSConrad Meyer             if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
354*5ff13fbcSAllan Jude             BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
3554d3f1eafSConrad Meyer             if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
3564d3f1eafSConrad Meyer             if (longOffsets) {
35737f1f268SConrad Meyer                 unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
3584d3f1eafSConrad Meyer                 if (extraBits) {
359*5ff13fbcSAllan Jude                     BIT_addBits(&blockStream, sequences[n].offBase, extraBits);
3604d3f1eafSConrad Meyer                     BIT_flushBits(&blockStream);                            /* (7)*/
3614d3f1eafSConrad Meyer                 }
362*5ff13fbcSAllan Jude                 BIT_addBits(&blockStream, sequences[n].offBase >> extraBits,
3634d3f1eafSConrad Meyer                             ofBits - extraBits);                            /* 31 */
3644d3f1eafSConrad Meyer             } else {
365*5ff13fbcSAllan Jude                 BIT_addBits(&blockStream, sequences[n].offBase, ofBits);     /* 31 */
3664d3f1eafSConrad Meyer             }
3674d3f1eafSConrad Meyer             BIT_flushBits(&blockStream);                                    /* (7)*/
3684d3f1eafSConrad Meyer             DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
3694d3f1eafSConrad Meyer     }   }
3704d3f1eafSConrad Meyer 
3714d3f1eafSConrad Meyer     DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
3724d3f1eafSConrad Meyer     FSE_flushCState(&blockStream, &stateMatchLength);
3734d3f1eafSConrad Meyer     DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
3744d3f1eafSConrad Meyer     FSE_flushCState(&blockStream, &stateOffsetBits);
3754d3f1eafSConrad Meyer     DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
3764d3f1eafSConrad Meyer     FSE_flushCState(&blockStream, &stateLitLength);
3774d3f1eafSConrad Meyer 
3784d3f1eafSConrad Meyer     {   size_t const streamSize = BIT_closeCStream(&blockStream);
3794d3f1eafSConrad Meyer         RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
3804d3f1eafSConrad Meyer         return streamSize;
3814d3f1eafSConrad Meyer     }
3824d3f1eafSConrad Meyer }
3834d3f1eafSConrad Meyer 
3844d3f1eafSConrad Meyer static size_t
ZSTD_encodeSequences_default(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets)3854d3f1eafSConrad Meyer ZSTD_encodeSequences_default(
3864d3f1eafSConrad Meyer             void* dst, size_t dstCapacity,
3874d3f1eafSConrad Meyer             FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
3884d3f1eafSConrad Meyer             FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
3894d3f1eafSConrad Meyer             FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
3904d3f1eafSConrad Meyer             seqDef const* sequences, size_t nbSeq, int longOffsets)
3914d3f1eafSConrad Meyer {
3924d3f1eafSConrad Meyer     return ZSTD_encodeSequences_body(dst, dstCapacity,
3934d3f1eafSConrad Meyer                                     CTable_MatchLength, mlCodeTable,
3944d3f1eafSConrad Meyer                                     CTable_OffsetBits, ofCodeTable,
3954d3f1eafSConrad Meyer                                     CTable_LitLength, llCodeTable,
3964d3f1eafSConrad Meyer                                     sequences, nbSeq, longOffsets);
3974d3f1eafSConrad Meyer }
3984d3f1eafSConrad Meyer 
3994d3f1eafSConrad Meyer 
4004d3f1eafSConrad Meyer #if DYNAMIC_BMI2
4014d3f1eafSConrad Meyer 
402*5ff13fbcSAllan Jude static BMI2_TARGET_ATTRIBUTE size_t
ZSTD_encodeSequences_bmi2(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets)4034d3f1eafSConrad Meyer ZSTD_encodeSequences_bmi2(
4044d3f1eafSConrad Meyer             void* dst, size_t dstCapacity,
4054d3f1eafSConrad Meyer             FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
4064d3f1eafSConrad Meyer             FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
4074d3f1eafSConrad Meyer             FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
4084d3f1eafSConrad Meyer             seqDef const* sequences, size_t nbSeq, int longOffsets)
4094d3f1eafSConrad Meyer {
4104d3f1eafSConrad Meyer     return ZSTD_encodeSequences_body(dst, dstCapacity,
4114d3f1eafSConrad Meyer                                     CTable_MatchLength, mlCodeTable,
4124d3f1eafSConrad Meyer                                     CTable_OffsetBits, ofCodeTable,
4134d3f1eafSConrad Meyer                                     CTable_LitLength, llCodeTable,
4144d3f1eafSConrad Meyer                                     sequences, nbSeq, longOffsets);
4154d3f1eafSConrad Meyer }
4164d3f1eafSConrad Meyer 
4174d3f1eafSConrad Meyer #endif
4184d3f1eafSConrad Meyer 
ZSTD_encodeSequences(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets,int bmi2)4194d3f1eafSConrad Meyer size_t ZSTD_encodeSequences(
4204d3f1eafSConrad Meyer             void* dst, size_t dstCapacity,
4214d3f1eafSConrad Meyer             FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
4224d3f1eafSConrad Meyer             FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
4234d3f1eafSConrad Meyer             FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
4244d3f1eafSConrad Meyer             seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
4254d3f1eafSConrad Meyer {
4264d3f1eafSConrad Meyer     DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
4274d3f1eafSConrad Meyer #if DYNAMIC_BMI2
4284d3f1eafSConrad Meyer     if (bmi2) {
4294d3f1eafSConrad Meyer         return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
4304d3f1eafSConrad Meyer                                          CTable_MatchLength, mlCodeTable,
4314d3f1eafSConrad Meyer                                          CTable_OffsetBits, ofCodeTable,
4324d3f1eafSConrad Meyer                                          CTable_LitLength, llCodeTable,
4334d3f1eafSConrad Meyer                                          sequences, nbSeq, longOffsets);
4344d3f1eafSConrad Meyer     }
4354d3f1eafSConrad Meyer #endif
4364d3f1eafSConrad Meyer     (void)bmi2;
4374d3f1eafSConrad Meyer     return ZSTD_encodeSequences_default(dst, dstCapacity,
4384d3f1eafSConrad Meyer                                         CTable_MatchLength, mlCodeTable,
4394d3f1eafSConrad Meyer                                         CTable_OffsetBits, ofCodeTable,
4404d3f1eafSConrad Meyer                                         CTable_LitLength, llCodeTable,
4414d3f1eafSConrad Meyer                                         sequences, nbSeq, longOffsets);
4424d3f1eafSConrad Meyer }
443