xref: /netbsd-src/external/bsd/zstd/dist/lib/legacy/zstd_v01.c (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1*3117ece4Schristos /*
2*3117ece4Schristos  * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates.
3*3117ece4Schristos  * All rights reserved.
4*3117ece4Schristos  *
5*3117ece4Schristos  * This source code is licensed under both the BSD-style license (found in the
6*3117ece4Schristos  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*3117ece4Schristos  * in the COPYING file in the root directory of this source tree).
8*3117ece4Schristos  * You may select, at your option, one of the above-listed licenses.
9*3117ece4Schristos  */
10*3117ece4Schristos 
11*3117ece4Schristos 
12*3117ece4Schristos /******************************************
13*3117ece4Schristos *  Includes
14*3117ece4Schristos ******************************************/
15*3117ece4Schristos #include <stddef.h>    /* size_t, ptrdiff_t */
16*3117ece4Schristos #include "zstd_v01.h"
17*3117ece4Schristos #include "../common/compiler.h"
18*3117ece4Schristos #include "../common/error_private.h"
19*3117ece4Schristos 
20*3117ece4Schristos 
21*3117ece4Schristos /******************************************
22*3117ece4Schristos *  Static allocation
23*3117ece4Schristos ******************************************/
24*3117ece4Schristos /* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
25*3117ece4Schristos #define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
26*3117ece4Schristos 
27*3117ece4Schristos /* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */
28*3117ece4Schristos #define HUF_DTABLE_SIZE_U16(maxTableLog)   (1 + (1<<maxTableLog))
29*3117ece4Schristos #define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \
30*3117ece4Schristos         unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog }
31*3117ece4Schristos 
32*3117ece4Schristos 
33*3117ece4Schristos /******************************************
34*3117ece4Schristos *  Error Management
35*3117ece4Schristos ******************************************/
36*3117ece4Schristos #define FSE_LIST_ERRORS(ITEM) \
37*3117ece4Schristos         ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
38*3117ece4Schristos         ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \
39*3117ece4Schristos         ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
40*3117ece4Schristos         ITEM(FSE_ERROR_corruptionDetected) \
41*3117ece4Schristos         ITEM(FSE_ERROR_maxCode)
42*3117ece4Schristos 
43*3117ece4Schristos #define FSE_GENERATE_ENUM(ENUM) ENUM,
44*3117ece4Schristos typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
45*3117ece4Schristos 
46*3117ece4Schristos 
47*3117ece4Schristos /******************************************
48*3117ece4Schristos *  FSE symbol compression API
49*3117ece4Schristos ******************************************/
50*3117ece4Schristos /*
51*3117ece4Schristos    This API consists of small unitary functions, which highly benefit from being inlined.
52*3117ece4Schristos    You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
53*3117ece4Schristos    Visual seems to do it automatically.
54*3117ece4Schristos    For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
55*3117ece4Schristos    If none of these solutions is applicable, include "fse.c" directly.
56*3117ece4Schristos */
57*3117ece4Schristos 
58*3117ece4Schristos typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
59*3117ece4Schristos typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
60*3117ece4Schristos 
61*3117ece4Schristos typedef struct
62*3117ece4Schristos {
63*3117ece4Schristos     size_t bitContainer;
64*3117ece4Schristos     int    bitPos;
65*3117ece4Schristos     char*  startPtr;
66*3117ece4Schristos     char*  ptr;
67*3117ece4Schristos     char*  endPtr;
68*3117ece4Schristos } FSE_CStream_t;
69*3117ece4Schristos 
70*3117ece4Schristos typedef struct
71*3117ece4Schristos {
72*3117ece4Schristos     ptrdiff_t   value;
73*3117ece4Schristos     const void* stateTable;
74*3117ece4Schristos     const void* symbolTT;
75*3117ece4Schristos     unsigned    stateLog;
76*3117ece4Schristos } FSE_CState_t;
77*3117ece4Schristos 
78*3117ece4Schristos typedef struct
79*3117ece4Schristos {
80*3117ece4Schristos     size_t   bitContainer;
81*3117ece4Schristos     unsigned bitsConsumed;
82*3117ece4Schristos     const char* ptr;
83*3117ece4Schristos     const char* start;
84*3117ece4Schristos } FSE_DStream_t;
85*3117ece4Schristos 
86*3117ece4Schristos typedef struct
87*3117ece4Schristos {
88*3117ece4Schristos     size_t      state;
89*3117ece4Schristos     const void* table;   /* precise table may vary, depending on U16 */
90*3117ece4Schristos } FSE_DState_t;
91*3117ece4Schristos 
92*3117ece4Schristos typedef enum { FSE_DStream_unfinished = 0,
93*3117ece4Schristos                FSE_DStream_endOfBuffer = 1,
94*3117ece4Schristos                FSE_DStream_completed = 2,
95*3117ece4Schristos                FSE_DStream_tooFar = 3 } FSE_DStream_status;  /* result of FSE_reloadDStream() */
96*3117ece4Schristos                /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */
97*3117ece4Schristos 
98*3117ece4Schristos 
99*3117ece4Schristos /****************************************************************
100*3117ece4Schristos *  Tuning parameters
101*3117ece4Schristos ****************************************************************/
102*3117ece4Schristos /* MEMORY_USAGE :
103*3117ece4Schristos *  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
104*3117ece4Schristos *  Increasing memory usage improves compression ratio
105*3117ece4Schristos *  Reduced memory usage can improve speed, due to cache effect
106*3117ece4Schristos *  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
107*3117ece4Schristos #define FSE_MAX_MEMORY_USAGE 14
108*3117ece4Schristos #define FSE_DEFAULT_MEMORY_USAGE 13
109*3117ece4Schristos 
110*3117ece4Schristos /* FSE_MAX_SYMBOL_VALUE :
111*3117ece4Schristos *  Maximum symbol value authorized.
112*3117ece4Schristos *  Required for proper stack allocation */
113*3117ece4Schristos #define FSE_MAX_SYMBOL_VALUE 255
114*3117ece4Schristos 
115*3117ece4Schristos 
116*3117ece4Schristos /****************************************************************
117*3117ece4Schristos *  template functions type & suffix
118*3117ece4Schristos ****************************************************************/
119*3117ece4Schristos #define FSE_FUNCTION_TYPE BYTE
120*3117ece4Schristos #define FSE_FUNCTION_EXTENSION
121*3117ece4Schristos 
122*3117ece4Schristos 
123*3117ece4Schristos /****************************************************************
124*3117ece4Schristos *  Byte symbol type
125*3117ece4Schristos ****************************************************************/
126*3117ece4Schristos typedef struct
127*3117ece4Schristos {
128*3117ece4Schristos     unsigned short newState;
129*3117ece4Schristos     unsigned char  symbol;
130*3117ece4Schristos     unsigned char  nbBits;
131*3117ece4Schristos } FSE_decode_t;   /* size == U32 */
132*3117ece4Schristos 
133*3117ece4Schristos 
134*3117ece4Schristos 
135*3117ece4Schristos /****************************************************************
136*3117ece4Schristos *  Compiler specifics
137*3117ece4Schristos ****************************************************************/
138*3117ece4Schristos #ifdef _MSC_VER    /* Visual Studio */
139*3117ece4Schristos #  define FORCE_INLINE static __forceinline
140*3117ece4Schristos #  include <intrin.h>                    /* For Visual 2005 */
141*3117ece4Schristos #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
142*3117ece4Schristos #  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
143*3117ece4Schristos #else
144*3117ece4Schristos #  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
145*3117ece4Schristos #  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
146*3117ece4Schristos #    ifdef __GNUC__
147*3117ece4Schristos #      define FORCE_INLINE static inline __attribute__((always_inline))
148*3117ece4Schristos #    else
149*3117ece4Schristos #      define FORCE_INLINE static inline
150*3117ece4Schristos #    endif
151*3117ece4Schristos #  else
152*3117ece4Schristos #    define FORCE_INLINE static
153*3117ece4Schristos #  endif /* __STDC_VERSION__ */
154*3117ece4Schristos #endif
155*3117ece4Schristos 
156*3117ece4Schristos 
157*3117ece4Schristos /****************************************************************
158*3117ece4Schristos *  Includes
159*3117ece4Schristos ****************************************************************/
160*3117ece4Schristos #include <stdlib.h>     /* malloc, free, qsort */
161*3117ece4Schristos #include <string.h>     /* memcpy, memset */
162*3117ece4Schristos #include <stdio.h>      /* printf (debug) */
163*3117ece4Schristos 
164*3117ece4Schristos 
165*3117ece4Schristos #ifndef MEM_ACCESS_MODULE
166*3117ece4Schristos #define MEM_ACCESS_MODULE
167*3117ece4Schristos /****************************************************************
168*3117ece4Schristos *  Basic Types
169*3117ece4Schristos *****************************************************************/
170*3117ece4Schristos #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
171*3117ece4Schristos # include <stdint.h>
172*3117ece4Schristos typedef  uint8_t BYTE;
173*3117ece4Schristos typedef uint16_t U16;
174*3117ece4Schristos typedef  int16_t S16;
175*3117ece4Schristos typedef uint32_t U32;
176*3117ece4Schristos typedef  int32_t S32;
177*3117ece4Schristos typedef uint64_t U64;
178*3117ece4Schristos typedef  int64_t S64;
179*3117ece4Schristos #else
180*3117ece4Schristos typedef unsigned char       BYTE;
181*3117ece4Schristos typedef unsigned short      U16;
182*3117ece4Schristos typedef   signed short      S16;
183*3117ece4Schristos typedef unsigned int        U32;
184*3117ece4Schristos typedef   signed int        S32;
185*3117ece4Schristos typedef unsigned long long  U64;
186*3117ece4Schristos typedef   signed long long  S64;
187*3117ece4Schristos #endif
188*3117ece4Schristos 
189*3117ece4Schristos #endif   /* MEM_ACCESS_MODULE */
190*3117ece4Schristos 
191*3117ece4Schristos /****************************************************************
192*3117ece4Schristos *  Memory I/O
193*3117ece4Schristos *****************************************************************/
194*3117ece4Schristos 
195*3117ece4Schristos static unsigned FSE_32bits(void)
196*3117ece4Schristos {
197*3117ece4Schristos     return sizeof(void*)==4;
198*3117ece4Schristos }
199*3117ece4Schristos 
200*3117ece4Schristos static unsigned FSE_isLittleEndian(void)
201*3117ece4Schristos {
202*3117ece4Schristos     const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
203*3117ece4Schristos     return one.c[0];
204*3117ece4Schristos }
205*3117ece4Schristos 
206*3117ece4Schristos static U16 FSE_read16(const void* memPtr)
207*3117ece4Schristos {
208*3117ece4Schristos     U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
209*3117ece4Schristos }
210*3117ece4Schristos 
211*3117ece4Schristos static U32 FSE_read32(const void* memPtr)
212*3117ece4Schristos {
213*3117ece4Schristos     U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
214*3117ece4Schristos }
215*3117ece4Schristos 
216*3117ece4Schristos static U64 FSE_read64(const void* memPtr)
217*3117ece4Schristos {
218*3117ece4Schristos     U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
219*3117ece4Schristos }
220*3117ece4Schristos 
221*3117ece4Schristos static U16 FSE_readLE16(const void* memPtr)
222*3117ece4Schristos {
223*3117ece4Schristos     if (FSE_isLittleEndian())
224*3117ece4Schristos         return FSE_read16(memPtr);
225*3117ece4Schristos     else
226*3117ece4Schristos     {
227*3117ece4Schristos         const BYTE* p = (const BYTE*)memPtr;
228*3117ece4Schristos         return (U16)(p[0] + (p[1]<<8));
229*3117ece4Schristos     }
230*3117ece4Schristos }
231*3117ece4Schristos 
232*3117ece4Schristos static U32 FSE_readLE32(const void* memPtr)
233*3117ece4Schristos {
234*3117ece4Schristos     if (FSE_isLittleEndian())
235*3117ece4Schristos         return FSE_read32(memPtr);
236*3117ece4Schristos     else
237*3117ece4Schristos     {
238*3117ece4Schristos         const BYTE* p = (const BYTE*)memPtr;
239*3117ece4Schristos         return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
240*3117ece4Schristos     }
241*3117ece4Schristos }
242*3117ece4Schristos 
243*3117ece4Schristos 
244*3117ece4Schristos static U64 FSE_readLE64(const void* memPtr)
245*3117ece4Schristos {
246*3117ece4Schristos     if (FSE_isLittleEndian())
247*3117ece4Schristos         return FSE_read64(memPtr);
248*3117ece4Schristos     else
249*3117ece4Schristos     {
250*3117ece4Schristos         const BYTE* p = (const BYTE*)memPtr;
251*3117ece4Schristos         return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
252*3117ece4Schristos                      + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
253*3117ece4Schristos     }
254*3117ece4Schristos }
255*3117ece4Schristos 
256*3117ece4Schristos static size_t FSE_readLEST(const void* memPtr)
257*3117ece4Schristos {
258*3117ece4Schristos     if (FSE_32bits())
259*3117ece4Schristos         return (size_t)FSE_readLE32(memPtr);
260*3117ece4Schristos     else
261*3117ece4Schristos         return (size_t)FSE_readLE64(memPtr);
262*3117ece4Schristos }
263*3117ece4Schristos 
264*3117ece4Schristos 
265*3117ece4Schristos 
266*3117ece4Schristos /****************************************************************
267*3117ece4Schristos *  Constants
268*3117ece4Schristos *****************************************************************/
269*3117ece4Schristos #define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
270*3117ece4Schristos #define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
271*3117ece4Schristos #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
272*3117ece4Schristos #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
273*3117ece4Schristos #define FSE_MIN_TABLELOG 5
274*3117ece4Schristos 
275*3117ece4Schristos #define FSE_TABLELOG_ABSOLUTE_MAX 15
276*3117ece4Schristos #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
277*3117ece4Schristos #error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
278*3117ece4Schristos #endif
279*3117ece4Schristos 
280*3117ece4Schristos 
281*3117ece4Schristos /****************************************************************
282*3117ece4Schristos *  Error Management
283*3117ece4Schristos ****************************************************************/
284*3117ece4Schristos #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
285*3117ece4Schristos 
286*3117ece4Schristos 
287*3117ece4Schristos /****************************************************************
288*3117ece4Schristos *  Complex types
289*3117ece4Schristos ****************************************************************/
290*3117ece4Schristos typedef struct
291*3117ece4Schristos {
292*3117ece4Schristos     int deltaFindState;
293*3117ece4Schristos     U32 deltaNbBits;
294*3117ece4Schristos } FSE_symbolCompressionTransform; /* total 8 bytes */
295*3117ece4Schristos 
296*3117ece4Schristos typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
297*3117ece4Schristos 
298*3117ece4Schristos /****************************************************************
299*3117ece4Schristos *  Internal functions
300*3117ece4Schristos ****************************************************************/
301*3117ece4Schristos FORCE_INLINE unsigned FSE_highbit32 (U32 val)
302*3117ece4Schristos {
303*3117ece4Schristos #   if defined(_MSC_VER)   /* Visual */
304*3117ece4Schristos     unsigned long r;
305*3117ece4Schristos     return _BitScanReverse(&r, val) ? (unsigned)r : 0;
306*3117ece4Schristos #   elif defined(__GNUC__) && (GCC_VERSION >= 304)   /* GCC Intrinsic */
307*3117ece4Schristos     return __builtin_clz (val) ^ 31;
308*3117ece4Schristos #   else   /* Software version */
309*3117ece4Schristos     static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
310*3117ece4Schristos     U32 v = val;
311*3117ece4Schristos     unsigned r;
312*3117ece4Schristos     v |= v >> 1;
313*3117ece4Schristos     v |= v >> 2;
314*3117ece4Schristos     v |= v >> 4;
315*3117ece4Schristos     v |= v >> 8;
316*3117ece4Schristos     v |= v >> 16;
317*3117ece4Schristos     r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
318*3117ece4Schristos     return r;
319*3117ece4Schristos #   endif
320*3117ece4Schristos }
321*3117ece4Schristos 
322*3117ece4Schristos 
323*3117ece4Schristos /****************************************************************
324*3117ece4Schristos *  Templates
325*3117ece4Schristos ****************************************************************/
326*3117ece4Schristos /*
327*3117ece4Schristos   designed to be included
328*3117ece4Schristos   for type-specific functions (template emulation in C)
329*3117ece4Schristos   Objective is to write these functions only once, for improved maintenance
330*3117ece4Schristos */
331*3117ece4Schristos 
332*3117ece4Schristos /* safety checks */
333*3117ece4Schristos #ifndef FSE_FUNCTION_EXTENSION
334*3117ece4Schristos #  error "FSE_FUNCTION_EXTENSION must be defined"
335*3117ece4Schristos #endif
336*3117ece4Schristos #ifndef FSE_FUNCTION_TYPE
337*3117ece4Schristos #  error "FSE_FUNCTION_TYPE must be defined"
338*3117ece4Schristos #endif
339*3117ece4Schristos 
340*3117ece4Schristos /* Function names */
341*3117ece4Schristos #define FSE_CAT(X,Y) X##Y
342*3117ece4Schristos #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
343*3117ece4Schristos #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
344*3117ece4Schristos 
345*3117ece4Schristos 
346*3117ece4Schristos 
347*3117ece4Schristos static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
348*3117ece4Schristos 
349*3117ece4Schristos #define FSE_DECODE_TYPE FSE_decode_t
350*3117ece4Schristos 
351*3117ece4Schristos 
352*3117ece4Schristos typedef struct {
353*3117ece4Schristos     U16 tableLog;
354*3117ece4Schristos     U16 fastMode;
355*3117ece4Schristos } FSE_DTableHeader;   /* sizeof U32 */
356*3117ece4Schristos 
357*3117ece4Schristos static size_t FSE_buildDTable
358*3117ece4Schristos (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
359*3117ece4Schristos {
360*3117ece4Schristos     void* ptr = dt;
361*3117ece4Schristos     FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
362*3117ece4Schristos     FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
363*3117ece4Schristos     const U32 tableSize = 1 << tableLog;
364*3117ece4Schristos     const U32 tableMask = tableSize-1;
365*3117ece4Schristos     const U32 step = FSE_tableStep(tableSize);
366*3117ece4Schristos     U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
367*3117ece4Schristos     U32 position = 0;
368*3117ece4Schristos     U32 highThreshold = tableSize-1;
369*3117ece4Schristos     const S16 largeLimit= (S16)(1 << (tableLog-1));
370*3117ece4Schristos     U32 noLarge = 1;
371*3117ece4Schristos     U32 s;
372*3117ece4Schristos 
373*3117ece4Schristos     /* Sanity Checks */
374*3117ece4Schristos     if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
375*3117ece4Schristos     if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
376*3117ece4Schristos 
377*3117ece4Schristos     /* Init, lay down lowprob symbols */
378*3117ece4Schristos     DTableH[0].tableLog = (U16)tableLog;
379*3117ece4Schristos     for (s=0; s<=maxSymbolValue; s++)
380*3117ece4Schristos     {
381*3117ece4Schristos         if (normalizedCounter[s]==-1)
382*3117ece4Schristos         {
383*3117ece4Schristos             tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
384*3117ece4Schristos             symbolNext[s] = 1;
385*3117ece4Schristos         }
386*3117ece4Schristos         else
387*3117ece4Schristos         {
388*3117ece4Schristos             if (normalizedCounter[s] >= largeLimit) noLarge=0;
389*3117ece4Schristos             symbolNext[s] = normalizedCounter[s];
390*3117ece4Schristos         }
391*3117ece4Schristos     }
392*3117ece4Schristos 
393*3117ece4Schristos     /* Spread symbols */
394*3117ece4Schristos     for (s=0; s<=maxSymbolValue; s++)
395*3117ece4Schristos     {
396*3117ece4Schristos         int i;
397*3117ece4Schristos         for (i=0; i<normalizedCounter[s]; i++)
398*3117ece4Schristos         {
399*3117ece4Schristos             tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
400*3117ece4Schristos             position = (position + step) & tableMask;
401*3117ece4Schristos             while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
402*3117ece4Schristos         }
403*3117ece4Schristos     }
404*3117ece4Schristos 
405*3117ece4Schristos     if (position!=0) return (size_t)-FSE_ERROR_GENERIC;   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
406*3117ece4Schristos 
407*3117ece4Schristos     /* Build Decoding table */
408*3117ece4Schristos     {
409*3117ece4Schristos         U32 i;
410*3117ece4Schristos         for (i=0; i<tableSize; i++)
411*3117ece4Schristos         {
412*3117ece4Schristos             FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
413*3117ece4Schristos             U16 nextState = symbolNext[symbol]++;
414*3117ece4Schristos             tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
415*3117ece4Schristos             tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
416*3117ece4Schristos         }
417*3117ece4Schristos     }
418*3117ece4Schristos 
419*3117ece4Schristos     DTableH->fastMode = (U16)noLarge;
420*3117ece4Schristos     return 0;
421*3117ece4Schristos }
422*3117ece4Schristos 
423*3117ece4Schristos 
424*3117ece4Schristos /******************************************
425*3117ece4Schristos *  FSE byte symbol
426*3117ece4Schristos ******************************************/
427*3117ece4Schristos #ifndef FSE_COMMONDEFS_ONLY
428*3117ece4Schristos 
429*3117ece4Schristos static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
430*3117ece4Schristos 
431*3117ece4Schristos static short FSE_abs(short a)
432*3117ece4Schristos {
433*3117ece4Schristos     return a<0? -a : a;
434*3117ece4Schristos }
435*3117ece4Schristos 
436*3117ece4Schristos 
437*3117ece4Schristos /****************************************************************
438*3117ece4Schristos *  Header bitstream management
439*3117ece4Schristos ****************************************************************/
440*3117ece4Schristos static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
441*3117ece4Schristos                  const void* headerBuffer, size_t hbSize)
442*3117ece4Schristos {
443*3117ece4Schristos     const BYTE* const istart = (const BYTE*) headerBuffer;
444*3117ece4Schristos     const BYTE* const iend = istart + hbSize;
445*3117ece4Schristos     const BYTE* ip = istart;
446*3117ece4Schristos     int nbBits;
447*3117ece4Schristos     int remaining;
448*3117ece4Schristos     int threshold;
449*3117ece4Schristos     U32 bitStream;
450*3117ece4Schristos     int bitCount;
451*3117ece4Schristos     unsigned charnum = 0;
452*3117ece4Schristos     int previous0 = 0;
453*3117ece4Schristos 
454*3117ece4Schristos     if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
455*3117ece4Schristos     bitStream = FSE_readLE32(ip);
456*3117ece4Schristos     nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
457*3117ece4Schristos     if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
458*3117ece4Schristos     bitStream >>= 4;
459*3117ece4Schristos     bitCount = 4;
460*3117ece4Schristos     *tableLogPtr = nbBits;
461*3117ece4Schristos     remaining = (1<<nbBits)+1;
462*3117ece4Schristos     threshold = 1<<nbBits;
463*3117ece4Schristos     nbBits++;
464*3117ece4Schristos 
465*3117ece4Schristos     while ((remaining>1) && (charnum<=*maxSVPtr))
466*3117ece4Schristos     {
467*3117ece4Schristos         if (previous0)
468*3117ece4Schristos         {
469*3117ece4Schristos             unsigned n0 = charnum;
470*3117ece4Schristos             while ((bitStream & 0xFFFF) == 0xFFFF)
471*3117ece4Schristos             {
472*3117ece4Schristos                 n0+=24;
473*3117ece4Schristos                 if (ip < iend-5)
474*3117ece4Schristos                 {
475*3117ece4Schristos                     ip+=2;
476*3117ece4Schristos                     bitStream = FSE_readLE32(ip) >> bitCount;
477*3117ece4Schristos                 }
478*3117ece4Schristos                 else
479*3117ece4Schristos                 {
480*3117ece4Schristos                     bitStream >>= 16;
481*3117ece4Schristos                     bitCount+=16;
482*3117ece4Schristos                 }
483*3117ece4Schristos             }
484*3117ece4Schristos             while ((bitStream & 3) == 3)
485*3117ece4Schristos             {
486*3117ece4Schristos                 n0+=3;
487*3117ece4Schristos                 bitStream>>=2;
488*3117ece4Schristos                 bitCount+=2;
489*3117ece4Schristos             }
490*3117ece4Schristos             n0 += bitStream & 3;
491*3117ece4Schristos             bitCount += 2;
492*3117ece4Schristos             if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
493*3117ece4Schristos             while (charnum < n0) normalizedCounter[charnum++] = 0;
494*3117ece4Schristos             if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
495*3117ece4Schristos             {
496*3117ece4Schristos                 ip += bitCount>>3;
497*3117ece4Schristos                 bitCount &= 7;
498*3117ece4Schristos                 bitStream = FSE_readLE32(ip) >> bitCount;
499*3117ece4Schristos             }
500*3117ece4Schristos             else
501*3117ece4Schristos                 bitStream >>= 2;
502*3117ece4Schristos         }
503*3117ece4Schristos         {
504*3117ece4Schristos             const short max = (short)((2*threshold-1)-remaining);
505*3117ece4Schristos             short count;
506*3117ece4Schristos 
507*3117ece4Schristos             if ((bitStream & (threshold-1)) < (U32)max)
508*3117ece4Schristos             {
509*3117ece4Schristos                 count = (short)(bitStream & (threshold-1));
510*3117ece4Schristos                 bitCount   += nbBits-1;
511*3117ece4Schristos             }
512*3117ece4Schristos             else
513*3117ece4Schristos             {
514*3117ece4Schristos                 count = (short)(bitStream & (2*threshold-1));
515*3117ece4Schristos                 if (count >= threshold) count -= max;
516*3117ece4Schristos                 bitCount   += nbBits;
517*3117ece4Schristos             }
518*3117ece4Schristos 
519*3117ece4Schristos             count--;   /* extra accuracy */
520*3117ece4Schristos             remaining -= FSE_abs(count);
521*3117ece4Schristos             normalizedCounter[charnum++] = count;
522*3117ece4Schristos             previous0 = !count;
523*3117ece4Schristos             while (remaining < threshold)
524*3117ece4Schristos             {
525*3117ece4Schristos                 nbBits--;
526*3117ece4Schristos                 threshold >>= 1;
527*3117ece4Schristos             }
528*3117ece4Schristos 
529*3117ece4Schristos             {
530*3117ece4Schristos                 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
531*3117ece4Schristos                 {
532*3117ece4Schristos                     ip += bitCount>>3;
533*3117ece4Schristos                     bitCount &= 7;
534*3117ece4Schristos                 }
535*3117ece4Schristos                 else
536*3117ece4Schristos                 {
537*3117ece4Schristos                     bitCount -= (int)(8 * (iend - 4 - ip));
538*3117ece4Schristos                     ip = iend - 4;
539*3117ece4Schristos                 }
540*3117ece4Schristos                 bitStream = FSE_readLE32(ip) >> (bitCount & 31);
541*3117ece4Schristos             }
542*3117ece4Schristos         }
543*3117ece4Schristos     }
544*3117ece4Schristos     if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
545*3117ece4Schristos     *maxSVPtr = charnum-1;
546*3117ece4Schristos 
547*3117ece4Schristos     ip += (bitCount+7)>>3;
548*3117ece4Schristos     if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
549*3117ece4Schristos     return ip-istart;
550*3117ece4Schristos }
551*3117ece4Schristos 
552*3117ece4Schristos 
553*3117ece4Schristos /*********************************************************
554*3117ece4Schristos *  Decompression (Byte symbols)
555*3117ece4Schristos *********************************************************/
556*3117ece4Schristos static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
557*3117ece4Schristos {
558*3117ece4Schristos     void* ptr = dt;
559*3117ece4Schristos     FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
560*3117ece4Schristos     FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
561*3117ece4Schristos 
562*3117ece4Schristos     DTableH->tableLog = 0;
563*3117ece4Schristos     DTableH->fastMode = 0;
564*3117ece4Schristos 
565*3117ece4Schristos     cell->newState = 0;
566*3117ece4Schristos     cell->symbol = symbolValue;
567*3117ece4Schristos     cell->nbBits = 0;
568*3117ece4Schristos 
569*3117ece4Schristos     return 0;
570*3117ece4Schristos }
571*3117ece4Schristos 
572*3117ece4Schristos 
573*3117ece4Schristos static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
574*3117ece4Schristos {
575*3117ece4Schristos     void* ptr = dt;
576*3117ece4Schristos     FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
577*3117ece4Schristos     FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
578*3117ece4Schristos     const unsigned tableSize = 1 << nbBits;
579*3117ece4Schristos     const unsigned tableMask = tableSize - 1;
580*3117ece4Schristos     const unsigned maxSymbolValue = tableMask;
581*3117ece4Schristos     unsigned s;
582*3117ece4Schristos 
583*3117ece4Schristos     /* Sanity checks */
584*3117ece4Schristos     if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC;             /* min size */
585*3117ece4Schristos 
586*3117ece4Schristos     /* Build Decoding Table */
587*3117ece4Schristos     DTableH->tableLog = (U16)nbBits;
588*3117ece4Schristos     DTableH->fastMode = 1;
589*3117ece4Schristos     for (s=0; s<=maxSymbolValue; s++)
590*3117ece4Schristos     {
591*3117ece4Schristos         dinfo[s].newState = 0;
592*3117ece4Schristos         dinfo[s].symbol = (BYTE)s;
593*3117ece4Schristos         dinfo[s].nbBits = (BYTE)nbBits;
594*3117ece4Schristos     }
595*3117ece4Schristos 
596*3117ece4Schristos     return 0;
597*3117ece4Schristos }
598*3117ece4Schristos 
599*3117ece4Schristos 
600*3117ece4Schristos /* FSE_initDStream
601*3117ece4Schristos  * Initialize a FSE_DStream_t.
602*3117ece4Schristos  * srcBuffer must point at the beginning of an FSE block.
603*3117ece4Schristos  * The function result is the size of the FSE_block (== srcSize).
604*3117ece4Schristos  * If srcSize is too small, the function will return an errorCode;
605*3117ece4Schristos  */
606*3117ece4Schristos static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
607*3117ece4Schristos {
608*3117ece4Schristos     if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
609*3117ece4Schristos 
610*3117ece4Schristos     if (srcSize >=  sizeof(size_t))
611*3117ece4Schristos     {
612*3117ece4Schristos         U32 contain32;
613*3117ece4Schristos         bitD->start = (const char*)srcBuffer;
614*3117ece4Schristos         bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
615*3117ece4Schristos         bitD->bitContainer = FSE_readLEST(bitD->ptr);
616*3117ece4Schristos         contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
617*3117ece4Schristos         if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
618*3117ece4Schristos         bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
619*3117ece4Schristos     }
620*3117ece4Schristos     else
621*3117ece4Schristos     {
622*3117ece4Schristos         U32 contain32;
623*3117ece4Schristos         bitD->start = (const char*)srcBuffer;
624*3117ece4Schristos         bitD->ptr   = bitD->start;
625*3117ece4Schristos         bitD->bitContainer = *(const BYTE*)(bitD->start);
626*3117ece4Schristos         switch(srcSize)
627*3117ece4Schristos         {
628*3117ece4Schristos             case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
629*3117ece4Schristos                     /* fallthrough */
630*3117ece4Schristos             case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
631*3117ece4Schristos                     /* fallthrough */
632*3117ece4Schristos             case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
633*3117ece4Schristos                     /* fallthrough */
634*3117ece4Schristos             case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
635*3117ece4Schristos                     /* fallthrough */
636*3117ece4Schristos             case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
637*3117ece4Schristos                     /* fallthrough */
638*3117ece4Schristos             case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
639*3117ece4Schristos                     /* fallthrough */
640*3117ece4Schristos             default:;
641*3117ece4Schristos         }
642*3117ece4Schristos         contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
643*3117ece4Schristos         if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
644*3117ece4Schristos         bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
645*3117ece4Schristos         bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
646*3117ece4Schristos     }
647*3117ece4Schristos 
648*3117ece4Schristos     return srcSize;
649*3117ece4Schristos }
650*3117ece4Schristos 
651*3117ece4Schristos 
652*3117ece4Schristos /*!FSE_lookBits
653*3117ece4Schristos  * Provides next n bits from the bitContainer.
654*3117ece4Schristos  * bitContainer is not modified (bits are still present for next read/look)
655*3117ece4Schristos  * On 32-bits, maxNbBits==25
656*3117ece4Schristos  * On 64-bits, maxNbBits==57
657*3117ece4Schristos  * return : value extracted.
658*3117ece4Schristos  */
659*3117ece4Schristos static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
660*3117ece4Schristos {
661*3117ece4Schristos     const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
662*3117ece4Schristos     return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
663*3117ece4Schristos }
664*3117ece4Schristos 
665*3117ece4Schristos static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
666*3117ece4Schristos {
667*3117ece4Schristos     const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
668*3117ece4Schristos     return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
669*3117ece4Schristos }
670*3117ece4Schristos 
671*3117ece4Schristos static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
672*3117ece4Schristos {
673*3117ece4Schristos     bitD->bitsConsumed += nbBits;
674*3117ece4Schristos }
675*3117ece4Schristos 
676*3117ece4Schristos 
677*3117ece4Schristos /*!FSE_readBits
678*3117ece4Schristos  * Read next n bits from the bitContainer.
679*3117ece4Schristos  * On 32-bits, don't read more than maxNbBits==25
680*3117ece4Schristos  * On 64-bits, don't read more than maxNbBits==57
681*3117ece4Schristos  * Use the fast variant *only* if n >= 1.
682*3117ece4Schristos  * return : value extracted.
683*3117ece4Schristos  */
684*3117ece4Schristos static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
685*3117ece4Schristos {
686*3117ece4Schristos     size_t value = FSE_lookBits(bitD, nbBits);
687*3117ece4Schristos     FSE_skipBits(bitD, nbBits);
688*3117ece4Schristos     return value;
689*3117ece4Schristos }
690*3117ece4Schristos 
691*3117ece4Schristos static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
692*3117ece4Schristos {
693*3117ece4Schristos     size_t value = FSE_lookBitsFast(bitD, nbBits);
694*3117ece4Schristos     FSE_skipBits(bitD, nbBits);
695*3117ece4Schristos     return value;
696*3117ece4Schristos }
697*3117ece4Schristos 
698*3117ece4Schristos static unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
699*3117ece4Schristos {
700*3117ece4Schristos     if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
701*3117ece4Schristos         return FSE_DStream_tooFar;
702*3117ece4Schristos 
703*3117ece4Schristos     if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
704*3117ece4Schristos     {
705*3117ece4Schristos         bitD->ptr -= bitD->bitsConsumed >> 3;
706*3117ece4Schristos         bitD->bitsConsumed &= 7;
707*3117ece4Schristos         bitD->bitContainer = FSE_readLEST(bitD->ptr);
708*3117ece4Schristos         return FSE_DStream_unfinished;
709*3117ece4Schristos     }
710*3117ece4Schristos     if (bitD->ptr == bitD->start)
711*3117ece4Schristos     {
712*3117ece4Schristos         if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer;
713*3117ece4Schristos         return FSE_DStream_completed;
714*3117ece4Schristos     }
715*3117ece4Schristos     {
716*3117ece4Schristos         U32 nbBytes = bitD->bitsConsumed >> 3;
717*3117ece4Schristos         U32 result = FSE_DStream_unfinished;
718*3117ece4Schristos         if (bitD->ptr - nbBytes < bitD->start)
719*3117ece4Schristos         {
720*3117ece4Schristos             nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
721*3117ece4Schristos             result = FSE_DStream_endOfBuffer;
722*3117ece4Schristos         }
723*3117ece4Schristos         bitD->ptr -= nbBytes;
724*3117ece4Schristos         bitD->bitsConsumed -= nbBytes*8;
725*3117ece4Schristos         bitD->bitContainer = FSE_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
726*3117ece4Schristos         return result;
727*3117ece4Schristos     }
728*3117ece4Schristos }
729*3117ece4Schristos 
730*3117ece4Schristos 
731*3117ece4Schristos static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
732*3117ece4Schristos {
733*3117ece4Schristos     const void* ptr = dt;
734*3117ece4Schristos     const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
735*3117ece4Schristos     DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
736*3117ece4Schristos     FSE_reloadDStream(bitD);
737*3117ece4Schristos     DStatePtr->table = dt + 1;
738*3117ece4Schristos }
739*3117ece4Schristos 
740*3117ece4Schristos static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
741*3117ece4Schristos {
742*3117ece4Schristos     const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
743*3117ece4Schristos     const U32  nbBits = DInfo.nbBits;
744*3117ece4Schristos     BYTE symbol = DInfo.symbol;
745*3117ece4Schristos     size_t lowBits = FSE_readBits(bitD, nbBits);
746*3117ece4Schristos 
747*3117ece4Schristos     DStatePtr->state = DInfo.newState + lowBits;
748*3117ece4Schristos     return symbol;
749*3117ece4Schristos }
750*3117ece4Schristos 
751*3117ece4Schristos static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
752*3117ece4Schristos {
753*3117ece4Schristos     const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
754*3117ece4Schristos     const U32 nbBits = DInfo.nbBits;
755*3117ece4Schristos     BYTE symbol = DInfo.symbol;
756*3117ece4Schristos     size_t lowBits = FSE_readBitsFast(bitD, nbBits);
757*3117ece4Schristos 
758*3117ece4Schristos     DStatePtr->state = DInfo.newState + lowBits;
759*3117ece4Schristos     return symbol;
760*3117ece4Schristos }
761*3117ece4Schristos 
762*3117ece4Schristos /* FSE_endOfDStream
763*3117ece4Schristos    Tells if bitD has reached end of bitStream or not */
764*3117ece4Schristos 
765*3117ece4Schristos static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
766*3117ece4Schristos {
767*3117ece4Schristos     return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
768*3117ece4Schristos }
769*3117ece4Schristos 
770*3117ece4Schristos static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
771*3117ece4Schristos {
772*3117ece4Schristos     return DStatePtr->state == 0;
773*3117ece4Schristos }
774*3117ece4Schristos 
775*3117ece4Schristos 
776*3117ece4Schristos FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
777*3117ece4Schristos           void* dst, size_t maxDstSize,
778*3117ece4Schristos     const void* cSrc, size_t cSrcSize,
779*3117ece4Schristos     const FSE_DTable* dt, const unsigned fast)
780*3117ece4Schristos {
781*3117ece4Schristos     BYTE* const ostart = (BYTE*) dst;
782*3117ece4Schristos     BYTE* op = ostart;
783*3117ece4Schristos     BYTE* const omax = op + maxDstSize;
784*3117ece4Schristos     BYTE* const olimit = omax-3;
785*3117ece4Schristos 
786*3117ece4Schristos     FSE_DStream_t bitD;
787*3117ece4Schristos     FSE_DState_t state1;
788*3117ece4Schristos     FSE_DState_t state2;
789*3117ece4Schristos     size_t errorCode;
790*3117ece4Schristos 
791*3117ece4Schristos     /* Init */
792*3117ece4Schristos     errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
793*3117ece4Schristos     if (FSE_isError(errorCode)) return errorCode;
794*3117ece4Schristos 
795*3117ece4Schristos     FSE_initDState(&state1, &bitD, dt);
796*3117ece4Schristos     FSE_initDState(&state2, &bitD, dt);
797*3117ece4Schristos 
798*3117ece4Schristos #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
799*3117ece4Schristos 
800*3117ece4Schristos     /* 4 symbols per loop */
801*3117ece4Schristos     for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4)
802*3117ece4Schristos     {
803*3117ece4Schristos         op[0] = FSE_GETSYMBOL(&state1);
804*3117ece4Schristos 
805*3117ece4Schristos         if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
806*3117ece4Schristos             FSE_reloadDStream(&bitD);
807*3117ece4Schristos 
808*3117ece4Schristos         op[1] = FSE_GETSYMBOL(&state2);
809*3117ece4Schristos 
810*3117ece4Schristos         if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
811*3117ece4Schristos             { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } }
812*3117ece4Schristos 
813*3117ece4Schristos         op[2] = FSE_GETSYMBOL(&state1);
814*3117ece4Schristos 
815*3117ece4Schristos         if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
816*3117ece4Schristos             FSE_reloadDStream(&bitD);
817*3117ece4Schristos 
818*3117ece4Schristos         op[3] = FSE_GETSYMBOL(&state2);
819*3117ece4Schristos     }
820*3117ece4Schristos 
821*3117ece4Schristos     /* tail */
822*3117ece4Schristos     /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
823*3117ece4Schristos     while (1)
824*3117ece4Schristos     {
825*3117ece4Schristos         if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
826*3117ece4Schristos             break;
827*3117ece4Schristos 
828*3117ece4Schristos         *op++ = FSE_GETSYMBOL(&state1);
829*3117ece4Schristos 
830*3117ece4Schristos         if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
831*3117ece4Schristos             break;
832*3117ece4Schristos 
833*3117ece4Schristos         *op++ = FSE_GETSYMBOL(&state2);
834*3117ece4Schristos     }
835*3117ece4Schristos 
836*3117ece4Schristos     /* end ? */
837*3117ece4Schristos     if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
838*3117ece4Schristos         return op-ostart;
839*3117ece4Schristos 
840*3117ece4Schristos     if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
841*3117ece4Schristos 
842*3117ece4Schristos     return (size_t)-FSE_ERROR_corruptionDetected;
843*3117ece4Schristos }
844*3117ece4Schristos 
845*3117ece4Schristos 
846*3117ece4Schristos static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
847*3117ece4Schristos                             const void* cSrc, size_t cSrcSize,
848*3117ece4Schristos                             const FSE_DTable* dt)
849*3117ece4Schristos {
850*3117ece4Schristos     FSE_DTableHeader DTableH;
851*3117ece4Schristos     memcpy(&DTableH, dt, sizeof(DTableH));   /* memcpy() into local variable, to avoid strict aliasing warning */
852*3117ece4Schristos 
853*3117ece4Schristos     /* select fast mode (static) */
854*3117ece4Schristos     if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
855*3117ece4Schristos     return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
856*3117ece4Schristos }
857*3117ece4Schristos 
858*3117ece4Schristos 
859*3117ece4Schristos static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
860*3117ece4Schristos {
861*3117ece4Schristos     const BYTE* const istart = (const BYTE*)cSrc;
862*3117ece4Schristos     const BYTE* ip = istart;
863*3117ece4Schristos     short counting[FSE_MAX_SYMBOL_VALUE+1];
864*3117ece4Schristos     DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
865*3117ece4Schristos     unsigned tableLog;
866*3117ece4Schristos     unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
867*3117ece4Schristos     size_t errorCode;
868*3117ece4Schristos 
869*3117ece4Schristos     if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
870*3117ece4Schristos 
871*3117ece4Schristos     /* normal FSE decoding mode */
872*3117ece4Schristos     errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
873*3117ece4Schristos     if (FSE_isError(errorCode)) return errorCode;
874*3117ece4Schristos     if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
875*3117ece4Schristos     ip += errorCode;
876*3117ece4Schristos     cSrcSize -= errorCode;
877*3117ece4Schristos 
878*3117ece4Schristos     errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
879*3117ece4Schristos     if (FSE_isError(errorCode)) return errorCode;
880*3117ece4Schristos 
881*3117ece4Schristos     /* always return, even if it is an error code */
882*3117ece4Schristos     return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
883*3117ece4Schristos }
884*3117ece4Schristos 
885*3117ece4Schristos 
886*3117ece4Schristos 
887*3117ece4Schristos /* *******************************************************
888*3117ece4Schristos *  Huff0 : Huffman block compression
889*3117ece4Schristos *********************************************************/
890*3117ece4Schristos #define HUF_MAX_SYMBOL_VALUE 255
891*3117ece4Schristos #define HUF_DEFAULT_TABLELOG  12       /* used by default, when not specified */
892*3117ece4Schristos #define HUF_MAX_TABLELOG  12           /* max possible tableLog; for allocation purpose; can be modified */
893*3117ece4Schristos #define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
894*3117ece4Schristos #if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
895*3117ece4Schristos #  error "HUF_MAX_TABLELOG is too large !"
896*3117ece4Schristos #endif
897*3117ece4Schristos 
898*3117ece4Schristos typedef struct HUF_CElt_s {
899*3117ece4Schristos   U16  val;
900*3117ece4Schristos   BYTE nbBits;
901*3117ece4Schristos } HUF_CElt ;
902*3117ece4Schristos 
903*3117ece4Schristos typedef struct nodeElt_s {
904*3117ece4Schristos     U32 count;
905*3117ece4Schristos     U16 parent;
906*3117ece4Schristos     BYTE byte;
907*3117ece4Schristos     BYTE nbBits;
908*3117ece4Schristos } nodeElt;
909*3117ece4Schristos 
910*3117ece4Schristos 
911*3117ece4Schristos /* *******************************************************
912*3117ece4Schristos *  Huff0 : Huffman block decompression
913*3117ece4Schristos *********************************************************/
914*3117ece4Schristos typedef struct {
915*3117ece4Schristos     BYTE byte;
916*3117ece4Schristos     BYTE nbBits;
917*3117ece4Schristos } HUF_DElt;
918*3117ece4Schristos 
919*3117ece4Schristos static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
920*3117ece4Schristos {
921*3117ece4Schristos     BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
922*3117ece4Schristos     U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];  /* large enough for values from 0 to 16 */
923*3117ece4Schristos     U32 weightTotal;
924*3117ece4Schristos     U32 maxBits;
925*3117ece4Schristos     const BYTE* ip = (const BYTE*) src;
926*3117ece4Schristos     size_t iSize;
927*3117ece4Schristos     size_t oSize;
928*3117ece4Schristos     U32 n;
929*3117ece4Schristos     U32 nextRankStart;
930*3117ece4Schristos     void* ptr = DTable+1;
931*3117ece4Schristos     HUF_DElt* const dt = (HUF_DElt*)ptr;
932*3117ece4Schristos 
933*3117ece4Schristos     if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
934*3117ece4Schristos     iSize = ip[0];
935*3117ece4Schristos 
936*3117ece4Schristos     FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16));   /* if compilation fails here, assertion is false */
937*3117ece4Schristos     //memset(huffWeight, 0, sizeof(huffWeight));   /* should not be necessary, but some analyzer complain ... */
938*3117ece4Schristos     if (iSize >= 128)  /* special header */
939*3117ece4Schristos     {
940*3117ece4Schristos         if (iSize >= (242))   /* RLE */
941*3117ece4Schristos         {
942*3117ece4Schristos             static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
943*3117ece4Schristos             oSize = l[iSize-242];
944*3117ece4Schristos             memset(huffWeight, 1, sizeof(huffWeight));
945*3117ece4Schristos             iSize = 0;
946*3117ece4Schristos         }
947*3117ece4Schristos         else   /* Incompressible */
948*3117ece4Schristos         {
949*3117ece4Schristos             oSize = iSize - 127;
950*3117ece4Schristos             iSize = ((oSize+1)/2);
951*3117ece4Schristos             if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
952*3117ece4Schristos             ip += 1;
953*3117ece4Schristos             for (n=0; n<oSize; n+=2)
954*3117ece4Schristos             {
955*3117ece4Schristos                 huffWeight[n]   = ip[n/2] >> 4;
956*3117ece4Schristos                 huffWeight[n+1] = ip[n/2] & 15;
957*3117ece4Schristos             }
958*3117ece4Schristos         }
959*3117ece4Schristos     }
960*3117ece4Schristos     else  /* header compressed with FSE (normal case) */
961*3117ece4Schristos     {
962*3117ece4Schristos         if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
963*3117ece4Schristos         oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize);   /* max 255 values decoded, last one is implied */
964*3117ece4Schristos         if (FSE_isError(oSize)) return oSize;
965*3117ece4Schristos     }
966*3117ece4Schristos 
967*3117ece4Schristos     /* collect weight stats */
968*3117ece4Schristos     memset(rankVal, 0, sizeof(rankVal));
969*3117ece4Schristos     weightTotal = 0;
970*3117ece4Schristos     for (n=0; n<oSize; n++)
971*3117ece4Schristos     {
972*3117ece4Schristos         if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected;
973*3117ece4Schristos         rankVal[huffWeight[n]]++;
974*3117ece4Schristos         weightTotal += (1 << huffWeight[n]) >> 1;
975*3117ece4Schristos     }
976*3117ece4Schristos     if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected;
977*3117ece4Schristos 
978*3117ece4Schristos     /* get last non-null symbol weight (implied, total must be 2^n) */
979*3117ece4Schristos     maxBits = FSE_highbit32(weightTotal) + 1;
980*3117ece4Schristos     if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge;   /* DTable is too small */
981*3117ece4Schristos     DTable[0] = (U16)maxBits;
982*3117ece4Schristos     {
983*3117ece4Schristos         U32 total = 1 << maxBits;
984*3117ece4Schristos         U32 rest = total - weightTotal;
985*3117ece4Schristos         U32 verif = 1 << FSE_highbit32(rest);
986*3117ece4Schristos         U32 lastWeight = FSE_highbit32(rest) + 1;
987*3117ece4Schristos         if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected;    /* last value must be a clean power of 2 */
988*3117ece4Schristos         huffWeight[oSize] = (BYTE)lastWeight;
989*3117ece4Schristos         rankVal[lastWeight]++;
990*3117ece4Schristos     }
991*3117ece4Schristos 
992*3117ece4Schristos     /* check tree construction validity */
993*3117ece4Schristos     if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected;   /* by construction : at least 2 elts of rank 1, must be even */
994*3117ece4Schristos 
995*3117ece4Schristos     /* Prepare ranks */
996*3117ece4Schristos     nextRankStart = 0;
997*3117ece4Schristos     for (n=1; n<=maxBits; n++)
998*3117ece4Schristos     {
999*3117ece4Schristos         U32 current = nextRankStart;
1000*3117ece4Schristos         nextRankStart += (rankVal[n] << (n-1));
1001*3117ece4Schristos         rankVal[n] = current;
1002*3117ece4Schristos     }
1003*3117ece4Schristos 
1004*3117ece4Schristos     /* fill DTable */
1005*3117ece4Schristos     for (n=0; n<=oSize; n++)
1006*3117ece4Schristos     {
1007*3117ece4Schristos         const U32 w = huffWeight[n];
1008*3117ece4Schristos         const U32 length = (1 << w) >> 1;
1009*3117ece4Schristos         U32 i;
1010*3117ece4Schristos         HUF_DElt D;
1011*3117ece4Schristos         D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
1012*3117ece4Schristos         for (i = rankVal[w]; i < rankVal[w] + length; i++)
1013*3117ece4Schristos             dt[i] = D;
1014*3117ece4Schristos         rankVal[w] += length;
1015*3117ece4Schristos     }
1016*3117ece4Schristos 
1017*3117ece4Schristos     return iSize+1;
1018*3117ece4Schristos }
1019*3117ece4Schristos 
1020*3117ece4Schristos 
1021*3117ece4Schristos static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog)
1022*3117ece4Schristos {
1023*3117ece4Schristos         const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
1024*3117ece4Schristos         const BYTE c = dt[val].byte;
1025*3117ece4Schristos         FSE_skipBits(Dstream, dt[val].nbBits);
1026*3117ece4Schristos         return c;
1027*3117ece4Schristos }
1028*3117ece4Schristos 
1029*3117ece4Schristos static size_t HUF_decompress_usingDTable(   /* -3% slower when non static */
1030*3117ece4Schristos           void* dst, size_t maxDstSize,
1031*3117ece4Schristos     const void* cSrc, size_t cSrcSize,
1032*3117ece4Schristos     const U16* DTable)
1033*3117ece4Schristos {
1034*3117ece4Schristos     if (cSrcSize < 6) return (size_t)-FSE_ERROR_srcSize_wrong;
1035*3117ece4Schristos     {
1036*3117ece4Schristos         BYTE* const ostart = (BYTE*) dst;
1037*3117ece4Schristos         BYTE* op = ostart;
1038*3117ece4Schristos         BYTE* const omax = op + maxDstSize;
1039*3117ece4Schristos         BYTE* const olimit = maxDstSize < 15 ? op : omax-15;
1040*3117ece4Schristos 
1041*3117ece4Schristos         const void* ptr = DTable;
1042*3117ece4Schristos         const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
1043*3117ece4Schristos         const U32 dtLog = DTable[0];
1044*3117ece4Schristos         size_t errorCode;
1045*3117ece4Schristos         U32 reloadStatus;
1046*3117ece4Schristos 
1047*3117ece4Schristos         /* Init */
1048*3117ece4Schristos 
1049*3117ece4Schristos         const U16* jumpTable = (const U16*)cSrc;
1050*3117ece4Schristos         const size_t length1 = FSE_readLE16(jumpTable);
1051*3117ece4Schristos         const size_t length2 = FSE_readLE16(jumpTable+1);
1052*3117ece4Schristos         const size_t length3 = FSE_readLE16(jumpTable+2);
1053*3117ece4Schristos         const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   /* check coherency !! */
1054*3117ece4Schristos         const char* const start1 = (const char*)(cSrc) + 6;
1055*3117ece4Schristos         const char* const start2 = start1 + length1;
1056*3117ece4Schristos         const char* const start3 = start2 + length2;
1057*3117ece4Schristos         const char* const start4 = start3 + length3;
1058*3117ece4Schristos         FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
1059*3117ece4Schristos 
1060*3117ece4Schristos         if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1061*3117ece4Schristos 
1062*3117ece4Schristos         errorCode = FSE_initDStream(&bitD1, start1, length1);
1063*3117ece4Schristos         if (FSE_isError(errorCode)) return errorCode;
1064*3117ece4Schristos         errorCode = FSE_initDStream(&bitD2, start2, length2);
1065*3117ece4Schristos         if (FSE_isError(errorCode)) return errorCode;
1066*3117ece4Schristos         errorCode = FSE_initDStream(&bitD3, start3, length3);
1067*3117ece4Schristos         if (FSE_isError(errorCode)) return errorCode;
1068*3117ece4Schristos         errorCode = FSE_initDStream(&bitD4, start4, length4);
1069*3117ece4Schristos         if (FSE_isError(errorCode)) return errorCode;
1070*3117ece4Schristos 
1071*3117ece4Schristos         reloadStatus=FSE_reloadDStream(&bitD2);
1072*3117ece4Schristos 
1073*3117ece4Schristos         /* 16 symbols per loop */
1074*3117ece4Schristos         for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit);  /* D2-3-4 are supposed to be synchronized and finish together */
1075*3117ece4Schristos             op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
1076*3117ece4Schristos         {
1077*3117ece4Schristos     #define HUF_DECODE_SYMBOL_0(n, Dstream) \
1078*3117ece4Schristos             op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
1079*3117ece4Schristos 
1080*3117ece4Schristos     #define HUF_DECODE_SYMBOL_1(n, Dstream) \
1081*3117ece4Schristos             op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1082*3117ece4Schristos             if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
1083*3117ece4Schristos 
1084*3117ece4Schristos     #define HUF_DECODE_SYMBOL_2(n, Dstream) \
1085*3117ece4Schristos             op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1086*3117ece4Schristos             if (FSE_32bits()) FSE_reloadDStream(&Dstream)
1087*3117ece4Schristos 
1088*3117ece4Schristos             HUF_DECODE_SYMBOL_1( 0, bitD1);
1089*3117ece4Schristos             HUF_DECODE_SYMBOL_1( 1, bitD2);
1090*3117ece4Schristos             HUF_DECODE_SYMBOL_1( 2, bitD3);
1091*3117ece4Schristos             HUF_DECODE_SYMBOL_1( 3, bitD4);
1092*3117ece4Schristos             HUF_DECODE_SYMBOL_2( 4, bitD1);
1093*3117ece4Schristos             HUF_DECODE_SYMBOL_2( 5, bitD2);
1094*3117ece4Schristos             HUF_DECODE_SYMBOL_2( 6, bitD3);
1095*3117ece4Schristos             HUF_DECODE_SYMBOL_2( 7, bitD4);
1096*3117ece4Schristos             HUF_DECODE_SYMBOL_1( 8, bitD1);
1097*3117ece4Schristos             HUF_DECODE_SYMBOL_1( 9, bitD2);
1098*3117ece4Schristos             HUF_DECODE_SYMBOL_1(10, bitD3);
1099*3117ece4Schristos             HUF_DECODE_SYMBOL_1(11, bitD4);
1100*3117ece4Schristos             HUF_DECODE_SYMBOL_0(12, bitD1);
1101*3117ece4Schristos             HUF_DECODE_SYMBOL_0(13, bitD2);
1102*3117ece4Schristos             HUF_DECODE_SYMBOL_0(14, bitD3);
1103*3117ece4Schristos             HUF_DECODE_SYMBOL_0(15, bitD4);
1104*3117ece4Schristos         }
1105*3117ece4Schristos 
1106*3117ece4Schristos         if (reloadStatus!=FSE_DStream_completed)   /* not complete : some bitStream might be FSE_DStream_unfinished */
1107*3117ece4Schristos             return (size_t)-FSE_ERROR_corruptionDetected;
1108*3117ece4Schristos 
1109*3117ece4Schristos         /* tail */
1110*3117ece4Schristos         {
1111*3117ece4Schristos             /* bitTail = bitD1; */   /* *much* slower : -20% !??! */
1112*3117ece4Schristos             FSE_DStream_t bitTail;
1113*3117ece4Schristos             bitTail.ptr = bitD1.ptr;
1114*3117ece4Schristos             bitTail.bitsConsumed = bitD1.bitsConsumed;
1115*3117ece4Schristos             bitTail.bitContainer = bitD1.bitContainer;   /* required in case of FSE_DStream_endOfBuffer */
1116*3117ece4Schristos             bitTail.start = start1;
1117*3117ece4Schristos             for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
1118*3117ece4Schristos             {
1119*3117ece4Schristos                 HUF_DECODE_SYMBOL_0(0, bitTail);
1120*3117ece4Schristos             }
1121*3117ece4Schristos 
1122*3117ece4Schristos             if (FSE_endOfDStream(&bitTail))
1123*3117ece4Schristos                 return op-ostart;
1124*3117ece4Schristos         }
1125*3117ece4Schristos 
1126*3117ece4Schristos         if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
1127*3117ece4Schristos 
1128*3117ece4Schristos         return (size_t)-FSE_ERROR_corruptionDetected;
1129*3117ece4Schristos     }
1130*3117ece4Schristos }
1131*3117ece4Schristos 
1132*3117ece4Schristos 
1133*3117ece4Schristos static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
1134*3117ece4Schristos {
1135*3117ece4Schristos     HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
1136*3117ece4Schristos     const BYTE* ip = (const BYTE*) cSrc;
1137*3117ece4Schristos     size_t errorCode;
1138*3117ece4Schristos 
1139*3117ece4Schristos     errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
1140*3117ece4Schristos     if (FSE_isError(errorCode)) return errorCode;
1141*3117ece4Schristos     if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1142*3117ece4Schristos     ip += errorCode;
1143*3117ece4Schristos     cSrcSize -= errorCode;
1144*3117ece4Schristos 
1145*3117ece4Schristos     return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
1146*3117ece4Schristos }
1147*3117ece4Schristos 
1148*3117ece4Schristos 
1149*3117ece4Schristos #endif   /* FSE_COMMONDEFS_ONLY */
1150*3117ece4Schristos 
1151*3117ece4Schristos /*
1152*3117ece4Schristos     zstd - standard compression library
1153*3117ece4Schristos     Copyright (C) 2014-2015, Yann Collet.
1154*3117ece4Schristos 
1155*3117ece4Schristos     BSD 2-Clause License (https://opensource.org/licenses/bsd-license.php)
1156*3117ece4Schristos 
1157*3117ece4Schristos     Redistribution and use in source and binary forms, with or without
1158*3117ece4Schristos     modification, are permitted provided that the following conditions are
1159*3117ece4Schristos     met:
1160*3117ece4Schristos     * Redistributions of source code must retain the above copyright
1161*3117ece4Schristos     notice, this list of conditions and the following disclaimer.
1162*3117ece4Schristos     * Redistributions in binary form must reproduce the above
1163*3117ece4Schristos     copyright notice, this list of conditions and the following disclaimer
1164*3117ece4Schristos     in the documentation and/or other materials provided with the
1165*3117ece4Schristos     distribution.
1166*3117ece4Schristos     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1167*3117ece4Schristos     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1168*3117ece4Schristos     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1169*3117ece4Schristos     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1170*3117ece4Schristos     OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1171*3117ece4Schristos     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1172*3117ece4Schristos     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1173*3117ece4Schristos     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1174*3117ece4Schristos     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1175*3117ece4Schristos     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1176*3117ece4Schristos     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1177*3117ece4Schristos 
1178*3117ece4Schristos     You can contact the author at :
1179*3117ece4Schristos     - zstd source repository : https://github.com/Cyan4973/zstd
1180*3117ece4Schristos     - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
1181*3117ece4Schristos */
1182*3117ece4Schristos 
1183*3117ece4Schristos /****************************************************************
1184*3117ece4Schristos *  Tuning parameters
1185*3117ece4Schristos *****************************************************************/
1186*3117ece4Schristos /* MEMORY_USAGE :
1187*3117ece4Schristos *  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
1188*3117ece4Schristos *  Increasing memory usage improves compression ratio
1189*3117ece4Schristos *  Reduced memory usage can improve speed, due to cache effect */
1190*3117ece4Schristos #define ZSTD_MEMORY_USAGE 17
1191*3117ece4Schristos 
1192*3117ece4Schristos 
1193*3117ece4Schristos /**************************************
1194*3117ece4Schristos    CPU Feature Detection
1195*3117ece4Schristos **************************************/
1196*3117ece4Schristos /*
1197*3117ece4Schristos  * Automated efficient unaligned memory access detection
1198*3117ece4Schristos  * Based on known hardware architectures
1199*3117ece4Schristos  * This list will be updated thanks to feedbacks
1200*3117ece4Schristos  */
1201*3117ece4Schristos #if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
1202*3117ece4Schristos     || defined(__ARM_FEATURE_UNALIGNED) \
1203*3117ece4Schristos     || defined(__i386__) || defined(__x86_64__) \
1204*3117ece4Schristos     || defined(_M_IX86) || defined(_M_X64) \
1205*3117ece4Schristos     || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
1206*3117ece4Schristos     || (defined(_M_ARM) && (_M_ARM >= 7))
1207*3117ece4Schristos #  define ZSTD_UNALIGNED_ACCESS 1
1208*3117ece4Schristos #else
1209*3117ece4Schristos #  define ZSTD_UNALIGNED_ACCESS 0
1210*3117ece4Schristos #endif
1211*3117ece4Schristos 
1212*3117ece4Schristos 
1213*3117ece4Schristos /********************************************************
1214*3117ece4Schristos *  Includes
1215*3117ece4Schristos *********************************************************/
1216*3117ece4Schristos #include <stdlib.h>      /* calloc */
1217*3117ece4Schristos #include <string.h>      /* memcpy, memmove */
1218*3117ece4Schristos #include <stdio.h>       /* debug : printf */
1219*3117ece4Schristos 
1220*3117ece4Schristos 
1221*3117ece4Schristos /********************************************************
1222*3117ece4Schristos *  Compiler specifics
1223*3117ece4Schristos *********************************************************/
1224*3117ece4Schristos #ifdef __AVX2__
1225*3117ece4Schristos #  include <immintrin.h>   /* AVX2 intrinsics */
1226*3117ece4Schristos #endif
1227*3117ece4Schristos 
1228*3117ece4Schristos #ifdef _MSC_VER    /* Visual Studio */
1229*3117ece4Schristos #  include <intrin.h>                    /* For Visual 2005 */
1230*3117ece4Schristos #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
1231*3117ece4Schristos #  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
1232*3117ece4Schristos #endif
1233*3117ece4Schristos 
1234*3117ece4Schristos 
1235*3117ece4Schristos #ifndef MEM_ACCESS_MODULE
1236*3117ece4Schristos #define MEM_ACCESS_MODULE
1237*3117ece4Schristos /********************************************************
1238*3117ece4Schristos *  Basic Types
1239*3117ece4Schristos *********************************************************/
1240*3117ece4Schristos #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
1241*3117ece4Schristos # if defined(_AIX)
1242*3117ece4Schristos #  include <inttypes.h>
1243*3117ece4Schristos # else
1244*3117ece4Schristos #  include <stdint.h> /* intptr_t */
1245*3117ece4Schristos # endif
1246*3117ece4Schristos typedef  uint8_t BYTE;
1247*3117ece4Schristos typedef uint16_t U16;
1248*3117ece4Schristos typedef  int16_t S16;
1249*3117ece4Schristos typedef uint32_t U32;
1250*3117ece4Schristos typedef  int32_t S32;
1251*3117ece4Schristos typedef uint64_t U64;
1252*3117ece4Schristos #else
1253*3117ece4Schristos typedef unsigned char       BYTE;
1254*3117ece4Schristos typedef unsigned short      U16;
1255*3117ece4Schristos typedef   signed short      S16;
1256*3117ece4Schristos typedef unsigned int        U32;
1257*3117ece4Schristos typedef   signed int        S32;
1258*3117ece4Schristos typedef unsigned long long  U64;
1259*3117ece4Schristos #endif
1260*3117ece4Schristos 
1261*3117ece4Schristos #endif   /* MEM_ACCESS_MODULE */
1262*3117ece4Schristos 
1263*3117ece4Schristos 
1264*3117ece4Schristos /********************************************************
1265*3117ece4Schristos *  Constants
1266*3117ece4Schristos *********************************************************/
1267*3117ece4Schristos static const U32 ZSTD_magicNumber = 0xFD2FB51E;   /* 3rd version : seqNb header */
1268*3117ece4Schristos 
1269*3117ece4Schristos #define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
1270*3117ece4Schristos #define HASH_TABLESIZE (1 << HASH_LOG)
1271*3117ece4Schristos #define HASH_MASK (HASH_TABLESIZE - 1)
1272*3117ece4Schristos 
1273*3117ece4Schristos #define KNUTH 2654435761
1274*3117ece4Schristos 
1275*3117ece4Schristos #define BIT7 128
1276*3117ece4Schristos #define BIT6  64
1277*3117ece4Schristos #define BIT5  32
1278*3117ece4Schristos #define BIT4  16
1279*3117ece4Schristos 
1280*3117ece4Schristos #define KB *(1 <<10)
1281*3117ece4Schristos #define MB *(1 <<20)
1282*3117ece4Schristos #define GB *(1U<<30)
1283*3117ece4Schristos 
1284*3117ece4Schristos #define BLOCKSIZE (128 KB)                 /* define, for static allocation */
1285*3117ece4Schristos 
1286*3117ece4Schristos #define WORKPLACESIZE (BLOCKSIZE*3)
1287*3117ece4Schristos #define MINMATCH 4
1288*3117ece4Schristos #define MLbits   7
1289*3117ece4Schristos #define LLbits   6
1290*3117ece4Schristos #define Offbits  5
1291*3117ece4Schristos #define MaxML  ((1<<MLbits )-1)
1292*3117ece4Schristos #define MaxLL  ((1<<LLbits )-1)
1293*3117ece4Schristos #define MaxOff ((1<<Offbits)-1)
1294*3117ece4Schristos #define LitFSELog  11
1295*3117ece4Schristos #define MLFSELog   10
1296*3117ece4Schristos #define LLFSELog   10
1297*3117ece4Schristos #define OffFSELog   9
1298*3117ece4Schristos #define MAX(a,b) ((a)<(b)?(b):(a))
1299*3117ece4Schristos #define MaxSeq MAX(MaxLL, MaxML)
1300*3117ece4Schristos 
1301*3117ece4Schristos #define LITERAL_NOENTROPY 63
1302*3117ece4Schristos #define COMMAND_NOENTROPY 7   /* to remove */
1303*3117ece4Schristos 
1304*3117ece4Schristos #define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
1305*3117ece4Schristos 
1306*3117ece4Schristos static const size_t ZSTD_blockHeaderSize = 3;
1307*3117ece4Schristos static const size_t ZSTD_frameHeaderSize = 4;
1308*3117ece4Schristos 
1309*3117ece4Schristos 
1310*3117ece4Schristos /********************************************************
1311*3117ece4Schristos *  Memory operations
1312*3117ece4Schristos *********************************************************/
1313*3117ece4Schristos static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
1314*3117ece4Schristos 
1315*3117ece4Schristos static unsigned ZSTD_isLittleEndian(void)
1316*3117ece4Schristos {
1317*3117ece4Schristos     const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
1318*3117ece4Schristos     return one.c[0];
1319*3117ece4Schristos }
1320*3117ece4Schristos 
1321*3117ece4Schristos static U16    ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
1322*3117ece4Schristos 
1323*3117ece4Schristos static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
1324*3117ece4Schristos 
1325*3117ece4Schristos static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
1326*3117ece4Schristos 
1327*3117ece4Schristos #define COPY8(d,s)    { ZSTD_copy8(d,s); d+=8; s+=8; }
1328*3117ece4Schristos 
1329*3117ece4Schristos static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
1330*3117ece4Schristos {
1331*3117ece4Schristos     const BYTE* ip = (const BYTE*)src;
1332*3117ece4Schristos     BYTE* op = (BYTE*)dst;
1333*3117ece4Schristos     BYTE* const oend = op + length;
1334*3117ece4Schristos     while (op < oend) COPY8(op, ip);
1335*3117ece4Schristos }
1336*3117ece4Schristos 
1337*3117ece4Schristos static U16 ZSTD_readLE16(const void* memPtr)
1338*3117ece4Schristos {
1339*3117ece4Schristos     if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr);
1340*3117ece4Schristos     else
1341*3117ece4Schristos     {
1342*3117ece4Schristos         const BYTE* p = (const BYTE*)memPtr;
1343*3117ece4Schristos         return (U16)((U16)p[0] + ((U16)p[1]<<8));
1344*3117ece4Schristos     }
1345*3117ece4Schristos }
1346*3117ece4Schristos 
1347*3117ece4Schristos static U32 ZSTD_readLE24(const void* memPtr)
1348*3117ece4Schristos {
1349*3117ece4Schristos     return ZSTD_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
1350*3117ece4Schristos }
1351*3117ece4Schristos 
1352*3117ece4Schristos static U32 ZSTD_readBE32(const void* memPtr)
1353*3117ece4Schristos {
1354*3117ece4Schristos     const BYTE* p = (const BYTE*)memPtr;
1355*3117ece4Schristos     return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
1356*3117ece4Schristos }
1357*3117ece4Schristos 
1358*3117ece4Schristos 
1359*3117ece4Schristos /**************************************
1360*3117ece4Schristos *  Local structures
1361*3117ece4Schristos ***************************************/
1362*3117ece4Schristos typedef struct ZSTD_Cctx_s ZSTD_Cctx;
1363*3117ece4Schristos 
1364*3117ece4Schristos typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
1365*3117ece4Schristos 
1366*3117ece4Schristos typedef struct
1367*3117ece4Schristos {
1368*3117ece4Schristos     blockType_t blockType;
1369*3117ece4Schristos     U32 origSize;
1370*3117ece4Schristos } blockProperties_t;
1371*3117ece4Schristos 
1372*3117ece4Schristos typedef struct {
1373*3117ece4Schristos     void* buffer;
1374*3117ece4Schristos     U32*  offsetStart;
1375*3117ece4Schristos     U32*  offset;
1376*3117ece4Schristos     BYTE* offCodeStart;
1377*3117ece4Schristos     BYTE* offCode;
1378*3117ece4Schristos     BYTE* litStart;
1379*3117ece4Schristos     BYTE* lit;
1380*3117ece4Schristos     BYTE* litLengthStart;
1381*3117ece4Schristos     BYTE* litLength;
1382*3117ece4Schristos     BYTE* matchLengthStart;
1383*3117ece4Schristos     BYTE* matchLength;
1384*3117ece4Schristos     BYTE* dumpsStart;
1385*3117ece4Schristos     BYTE* dumps;
1386*3117ece4Schristos } seqStore_t;
1387*3117ece4Schristos 
1388*3117ece4Schristos 
1389*3117ece4Schristos typedef struct ZSTD_Cctx_s
1390*3117ece4Schristos {
1391*3117ece4Schristos     const BYTE* base;
1392*3117ece4Schristos     U32 current;
1393*3117ece4Schristos     U32 nextUpdate;
1394*3117ece4Schristos     seqStore_t seqStore;
1395*3117ece4Schristos #ifdef __AVX2__
1396*3117ece4Schristos     __m256i hashTable[HASH_TABLESIZE>>3];
1397*3117ece4Schristos #else
1398*3117ece4Schristos     U32 hashTable[HASH_TABLESIZE];
1399*3117ece4Schristos #endif
1400*3117ece4Schristos     BYTE buffer[WORKPLACESIZE];
1401*3117ece4Schristos } cctxi_t;
1402*3117ece4Schristos 
1403*3117ece4Schristos 
1404*3117ece4Schristos 
1405*3117ece4Schristos 
1406*3117ece4Schristos /**************************************
1407*3117ece4Schristos *  Error Management
1408*3117ece4Schristos **************************************/
1409*3117ece4Schristos /* published entry point */
1410*3117ece4Schristos unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); }
1411*3117ece4Schristos 
1412*3117ece4Schristos 
1413*3117ece4Schristos /**************************************
1414*3117ece4Schristos *  Tool functions
1415*3117ece4Schristos **************************************/
1416*3117ece4Schristos #define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
1417*3117ece4Schristos #define ZSTD_VERSION_MINOR    1    /* for new (non-breaking) interface capabilities */
1418*3117ece4Schristos #define ZSTD_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
1419*3117ece4Schristos #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
1420*3117ece4Schristos 
1421*3117ece4Schristos /**************************************************************
1422*3117ece4Schristos *   Decompression code
1423*3117ece4Schristos **************************************************************/
1424*3117ece4Schristos 
1425*3117ece4Schristos static size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
1426*3117ece4Schristos {
1427*3117ece4Schristos     const BYTE* const in = (const BYTE* const)src;
1428*3117ece4Schristos     BYTE headerFlags;
1429*3117ece4Schristos     U32 cSize;
1430*3117ece4Schristos 
1431*3117ece4Schristos     if (srcSize < 3) return ERROR(srcSize_wrong);
1432*3117ece4Schristos 
1433*3117ece4Schristos     headerFlags = *in;
1434*3117ece4Schristos     cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
1435*3117ece4Schristos 
1436*3117ece4Schristos     bpPtr->blockType = (blockType_t)(headerFlags >> 6);
1437*3117ece4Schristos     bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
1438*3117ece4Schristos 
1439*3117ece4Schristos     if (bpPtr->blockType == bt_end) return 0;
1440*3117ece4Schristos     if (bpPtr->blockType == bt_rle) return 1;
1441*3117ece4Schristos     return cSize;
1442*3117ece4Schristos }
1443*3117ece4Schristos 
1444*3117ece4Schristos 
1445*3117ece4Schristos static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1446*3117ece4Schristos {
1447*3117ece4Schristos     if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
1448*3117ece4Schristos     if (srcSize > 0) {
1449*3117ece4Schristos         memcpy(dst, src, srcSize);
1450*3117ece4Schristos     }
1451*3117ece4Schristos     return srcSize;
1452*3117ece4Schristos }
1453*3117ece4Schristos 
1454*3117ece4Schristos 
1455*3117ece4Schristos static size_t ZSTD_decompressLiterals(void* ctx,
1456*3117ece4Schristos                                       void* dst, size_t maxDstSize,
1457*3117ece4Schristos                                 const void* src, size_t srcSize)
1458*3117ece4Schristos {
1459*3117ece4Schristos     BYTE* op = (BYTE*)dst;
1460*3117ece4Schristos     BYTE* const oend = op + maxDstSize;
1461*3117ece4Schristos     const BYTE* ip = (const BYTE*)src;
1462*3117ece4Schristos     size_t errorCode;
1463*3117ece4Schristos     size_t litSize;
1464*3117ece4Schristos 
1465*3117ece4Schristos     /* check : minimum 2, for litSize, +1, for content */
1466*3117ece4Schristos     if (srcSize <= 3) return ERROR(corruption_detected);
1467*3117ece4Schristos 
1468*3117ece4Schristos     litSize = ip[1] + (ip[0]<<8);
1469*3117ece4Schristos     litSize += ((ip[-3] >> 3) & 7) << 16;   /* mmmmh.... */
1470*3117ece4Schristos     op = oend - litSize;
1471*3117ece4Schristos 
1472*3117ece4Schristos     (void)ctx;
1473*3117ece4Schristos     if (litSize > maxDstSize) return ERROR(dstSize_tooSmall);
1474*3117ece4Schristos     errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2);
1475*3117ece4Schristos     if (FSE_isError(errorCode)) return ERROR(GENERIC);
1476*3117ece4Schristos     return litSize;
1477*3117ece4Schristos }
1478*3117ece4Schristos 
1479*3117ece4Schristos 
1480*3117ece4Schristos static size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
1481*3117ece4Schristos                                 void* dst, size_t maxDstSize,
1482*3117ece4Schristos                           const BYTE** litStart, size_t* litSize,
1483*3117ece4Schristos                           const void* src, size_t srcSize)
1484*3117ece4Schristos {
1485*3117ece4Schristos     const BYTE* const istart = (const BYTE* const)src;
1486*3117ece4Schristos     const BYTE* ip = istart;
1487*3117ece4Schristos     BYTE* const ostart = (BYTE* const)dst;
1488*3117ece4Schristos     BYTE* const oend = ostart + maxDstSize;
1489*3117ece4Schristos     blockProperties_t litbp;
1490*3117ece4Schristos 
1491*3117ece4Schristos     size_t litcSize = ZSTDv01_getcBlockSize(src, srcSize, &litbp);
1492*3117ece4Schristos     if (ZSTDv01_isError(litcSize)) return litcSize;
1493*3117ece4Schristos     if (litcSize > srcSize - ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1494*3117ece4Schristos     ip += ZSTD_blockHeaderSize;
1495*3117ece4Schristos 
1496*3117ece4Schristos     switch(litbp.blockType)
1497*3117ece4Schristos     {
1498*3117ece4Schristos     case bt_raw:
1499*3117ece4Schristos         *litStart = ip;
1500*3117ece4Schristos         ip += litcSize;
1501*3117ece4Schristos         *litSize = litcSize;
1502*3117ece4Schristos         break;
1503*3117ece4Schristos     case bt_rle:
1504*3117ece4Schristos         {
1505*3117ece4Schristos             size_t rleSize = litbp.origSize;
1506*3117ece4Schristos             if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall);
1507*3117ece4Schristos             if (!srcSize) return ERROR(srcSize_wrong);
1508*3117ece4Schristos             if (rleSize > 0) {
1509*3117ece4Schristos                 memset(oend - rleSize, *ip, rleSize);
1510*3117ece4Schristos             }
1511*3117ece4Schristos             *litStart = oend - rleSize;
1512*3117ece4Schristos             *litSize = rleSize;
1513*3117ece4Schristos             ip++;
1514*3117ece4Schristos             break;
1515*3117ece4Schristos         }
1516*3117ece4Schristos     case bt_compressed:
1517*3117ece4Schristos         {
1518*3117ece4Schristos             size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
1519*3117ece4Schristos             if (ZSTDv01_isError(decodedLitSize)) return decodedLitSize;
1520*3117ece4Schristos             *litStart = oend - decodedLitSize;
1521*3117ece4Schristos             *litSize = decodedLitSize;
1522*3117ece4Schristos             ip += litcSize;
1523*3117ece4Schristos             break;
1524*3117ece4Schristos         }
1525*3117ece4Schristos     case bt_end:
1526*3117ece4Schristos     default:
1527*3117ece4Schristos         return ERROR(GENERIC);
1528*3117ece4Schristos     }
1529*3117ece4Schristos 
1530*3117ece4Schristos     return ip-istart;
1531*3117ece4Schristos }
1532*3117ece4Schristos 
1533*3117ece4Schristos 
1534*3117ece4Schristos static size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
1535*3117ece4Schristos                          FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
1536*3117ece4Schristos                          const void* src, size_t srcSize)
1537*3117ece4Schristos {
1538*3117ece4Schristos     const BYTE* const istart = (const BYTE* const)src;
1539*3117ece4Schristos     const BYTE* ip = istart;
1540*3117ece4Schristos     const BYTE* const iend = istart + srcSize;
1541*3117ece4Schristos     U32 LLtype, Offtype, MLtype;
1542*3117ece4Schristos     U32 LLlog, Offlog, MLlog;
1543*3117ece4Schristos     size_t dumpsLength;
1544*3117ece4Schristos 
1545*3117ece4Schristos     /* check */
1546*3117ece4Schristos     if (srcSize < 5) return ERROR(srcSize_wrong);
1547*3117ece4Schristos 
1548*3117ece4Schristos     /* SeqHead */
1549*3117ece4Schristos     *nbSeq = ZSTD_readLE16(ip); ip+=2;
1550*3117ece4Schristos     LLtype  = *ip >> 6;
1551*3117ece4Schristos     Offtype = (*ip >> 4) & 3;
1552*3117ece4Schristos     MLtype  = (*ip >> 2) & 3;
1553*3117ece4Schristos     if (*ip & 2)
1554*3117ece4Schristos     {
1555*3117ece4Schristos         dumpsLength  = ip[2];
1556*3117ece4Schristos         dumpsLength += ip[1] << 8;
1557*3117ece4Schristos         ip += 3;
1558*3117ece4Schristos     }
1559*3117ece4Schristos     else
1560*3117ece4Schristos     {
1561*3117ece4Schristos         dumpsLength  = ip[1];
1562*3117ece4Schristos         dumpsLength += (ip[0] & 1) << 8;
1563*3117ece4Schristos         ip += 2;
1564*3117ece4Schristos     }
1565*3117ece4Schristos     *dumpsPtr = ip;
1566*3117ece4Schristos     ip += dumpsLength;
1567*3117ece4Schristos     *dumpsLengthPtr = dumpsLength;
1568*3117ece4Schristos 
1569*3117ece4Schristos     /* check */
1570*3117ece4Schristos     if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
1571*3117ece4Schristos 
1572*3117ece4Schristos     /* sequences */
1573*3117ece4Schristos     {
1574*3117ece4Schristos         S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
1575*3117ece4Schristos         size_t headerSize;
1576*3117ece4Schristos 
1577*3117ece4Schristos         /* Build DTables */
1578*3117ece4Schristos         switch(LLtype)
1579*3117ece4Schristos         {
1580*3117ece4Schristos         case bt_rle :
1581*3117ece4Schristos             LLlog = 0;
1582*3117ece4Schristos             FSE_buildDTable_rle(DTableLL, *ip++); break;
1583*3117ece4Schristos         case bt_raw :
1584*3117ece4Schristos             LLlog = LLbits;
1585*3117ece4Schristos             FSE_buildDTable_raw(DTableLL, LLbits); break;
1586*3117ece4Schristos         default :
1587*3117ece4Schristos             {   U32 max = MaxLL;
1588*3117ece4Schristos                 headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
1589*3117ece4Schristos                 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1590*3117ece4Schristos                 if (LLlog > LLFSELog) return ERROR(corruption_detected);
1591*3117ece4Schristos                 ip += headerSize;
1592*3117ece4Schristos                 FSE_buildDTable(DTableLL, norm, max, LLlog);
1593*3117ece4Schristos         }   }
1594*3117ece4Schristos 
1595*3117ece4Schristos         switch(Offtype)
1596*3117ece4Schristos         {
1597*3117ece4Schristos         case bt_rle :
1598*3117ece4Schristos             Offlog = 0;
1599*3117ece4Schristos             if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1600*3117ece4Schristos             FSE_buildDTable_rle(DTableOffb, *ip++); break;
1601*3117ece4Schristos         case bt_raw :
1602*3117ece4Schristos             Offlog = Offbits;
1603*3117ece4Schristos             FSE_buildDTable_raw(DTableOffb, Offbits); break;
1604*3117ece4Schristos         default :
1605*3117ece4Schristos             {   U32 max = MaxOff;
1606*3117ece4Schristos                 headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
1607*3117ece4Schristos                 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1608*3117ece4Schristos                 if (Offlog > OffFSELog) return ERROR(corruption_detected);
1609*3117ece4Schristos                 ip += headerSize;
1610*3117ece4Schristos                 FSE_buildDTable(DTableOffb, norm, max, Offlog);
1611*3117ece4Schristos         }   }
1612*3117ece4Schristos 
1613*3117ece4Schristos         switch(MLtype)
1614*3117ece4Schristos         {
1615*3117ece4Schristos         case bt_rle :
1616*3117ece4Schristos             MLlog = 0;
1617*3117ece4Schristos             if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1618*3117ece4Schristos             FSE_buildDTable_rle(DTableML, *ip++); break;
1619*3117ece4Schristos         case bt_raw :
1620*3117ece4Schristos             MLlog = MLbits;
1621*3117ece4Schristos             FSE_buildDTable_raw(DTableML, MLbits); break;
1622*3117ece4Schristos         default :
1623*3117ece4Schristos             {   U32 max = MaxML;
1624*3117ece4Schristos                 headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
1625*3117ece4Schristos                 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1626*3117ece4Schristos                 if (MLlog > MLFSELog) return ERROR(corruption_detected);
1627*3117ece4Schristos                 ip += headerSize;
1628*3117ece4Schristos                 FSE_buildDTable(DTableML, norm, max, MLlog);
1629*3117ece4Schristos     }   }   }
1630*3117ece4Schristos 
1631*3117ece4Schristos     return ip-istart;
1632*3117ece4Schristos }
1633*3117ece4Schristos 
1634*3117ece4Schristos 
1635*3117ece4Schristos typedef struct {
1636*3117ece4Schristos     size_t litLength;
1637*3117ece4Schristos     size_t offset;
1638*3117ece4Schristos     size_t matchLength;
1639*3117ece4Schristos } seq_t;
1640*3117ece4Schristos 
1641*3117ece4Schristos typedef struct {
1642*3117ece4Schristos     FSE_DStream_t DStream;
1643*3117ece4Schristos     FSE_DState_t stateLL;
1644*3117ece4Schristos     FSE_DState_t stateOffb;
1645*3117ece4Schristos     FSE_DState_t stateML;
1646*3117ece4Schristos     size_t prevOffset;
1647*3117ece4Schristos     const BYTE* dumps;
1648*3117ece4Schristos     const BYTE* dumpsEnd;
1649*3117ece4Schristos } seqState_t;
1650*3117ece4Schristos 
1651*3117ece4Schristos 
1652*3117ece4Schristos static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
1653*3117ece4Schristos {
1654*3117ece4Schristos     size_t litLength;
1655*3117ece4Schristos     size_t prevOffset;
1656*3117ece4Schristos     size_t offset;
1657*3117ece4Schristos     size_t matchLength;
1658*3117ece4Schristos     const BYTE* dumps = seqState->dumps;
1659*3117ece4Schristos     const BYTE* const de = seqState->dumpsEnd;
1660*3117ece4Schristos 
1661*3117ece4Schristos     /* Literal length */
1662*3117ece4Schristos     litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
1663*3117ece4Schristos     prevOffset = litLength ? seq->offset : seqState->prevOffset;
1664*3117ece4Schristos     seqState->prevOffset = seq->offset;
1665*3117ece4Schristos     if (litLength == MaxLL)
1666*3117ece4Schristos     {
1667*3117ece4Schristos         const U32 add = dumps<de ? *dumps++ : 0;
1668*3117ece4Schristos         if (add < 255) litLength += add;
1669*3117ece4Schristos         else
1670*3117ece4Schristos         {
1671*3117ece4Schristos             if (dumps<=(de-3))
1672*3117ece4Schristos             {
1673*3117ece4Schristos                 litLength = ZSTD_readLE24(dumps);
1674*3117ece4Schristos                 dumps += 3;
1675*3117ece4Schristos             }
1676*3117ece4Schristos         }
1677*3117ece4Schristos     }
1678*3117ece4Schristos 
1679*3117ece4Schristos     /* Offset */
1680*3117ece4Schristos     {
1681*3117ece4Schristos         U32 offsetCode, nbBits;
1682*3117ece4Schristos         offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));
1683*3117ece4Schristos         if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1684*3117ece4Schristos         nbBits = offsetCode - 1;
1685*3117ece4Schristos         if (offsetCode==0) nbBits = 0;   /* cmove */
1686*3117ece4Schristos         offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits);
1687*3117ece4Schristos         if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1688*3117ece4Schristos         if (offsetCode==0) offset = prevOffset;
1689*3117ece4Schristos     }
1690*3117ece4Schristos 
1691*3117ece4Schristos     /* MatchLength */
1692*3117ece4Schristos     matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
1693*3117ece4Schristos     if (matchLength == MaxML)
1694*3117ece4Schristos     {
1695*3117ece4Schristos         const U32 add = dumps<de ? *dumps++ : 0;
1696*3117ece4Schristos         if (add < 255) matchLength += add;
1697*3117ece4Schristos         else
1698*3117ece4Schristos         {
1699*3117ece4Schristos             if (dumps<=(de-3))
1700*3117ece4Schristos             {
1701*3117ece4Schristos                 matchLength = ZSTD_readLE24(dumps);
1702*3117ece4Schristos                 dumps += 3;
1703*3117ece4Schristos             }
1704*3117ece4Schristos         }
1705*3117ece4Schristos     }
1706*3117ece4Schristos     matchLength += MINMATCH;
1707*3117ece4Schristos 
1708*3117ece4Schristos     /* save result */
1709*3117ece4Schristos     seq->litLength = litLength;
1710*3117ece4Schristos     seq->offset = offset;
1711*3117ece4Schristos     seq->matchLength = matchLength;
1712*3117ece4Schristos     seqState->dumps = dumps;
1713*3117ece4Schristos }
1714*3117ece4Schristos 
1715*3117ece4Schristos 
1716*3117ece4Schristos static size_t ZSTD_execSequence(BYTE* op,
1717*3117ece4Schristos                                 seq_t sequence,
1718*3117ece4Schristos                                 const BYTE** litPtr, const BYTE* const litLimit,
1719*3117ece4Schristos                                 BYTE* const base, BYTE* const oend)
1720*3117ece4Schristos {
1721*3117ece4Schristos     static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
1722*3117ece4Schristos     static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* subtracted */
1723*3117ece4Schristos     const BYTE* const ostart = op;
1724*3117ece4Schristos     BYTE* const oLitEnd = op + sequence.litLength;
1725*3117ece4Schristos     const size_t litLength = sequence.litLength;
1726*3117ece4Schristos     BYTE* const endMatch = op + litLength + sequence.matchLength;    /* risk : address space overflow (32-bits) */
1727*3117ece4Schristos     const BYTE* const litEnd = *litPtr + litLength;
1728*3117ece4Schristos 
1729*3117ece4Schristos     /* checks */
1730*3117ece4Schristos     size_t const seqLength = sequence.litLength + sequence.matchLength;
1731*3117ece4Schristos 
1732*3117ece4Schristos     if (seqLength > (size_t)(oend - op)) return ERROR(dstSize_tooSmall);
1733*3117ece4Schristos     if (sequence.litLength > (size_t)(litLimit - *litPtr)) return ERROR(corruption_detected);
1734*3117ece4Schristos     /* Now we know there are no overflow in literal nor match lengths, can use pointer checks */
1735*3117ece4Schristos     if (sequence.offset > (U32)(oLitEnd - base)) return ERROR(corruption_detected);
1736*3117ece4Schristos 
1737*3117ece4Schristos     if (endMatch > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
1738*3117ece4Schristos     if (litEnd > litLimit) return ERROR(corruption_detected);   /* overRead beyond lit buffer */
1739*3117ece4Schristos     if (sequence.matchLength > (size_t)(*litPtr-op)) return ERROR(dstSize_tooSmall);  /* overwrite literal segment */
1740*3117ece4Schristos 
1741*3117ece4Schristos     /* copy Literals */
1742*3117ece4Schristos     ZSTD_memmove(op, *litPtr, sequence.litLength);   /* note : v0.1 seems to allow scenarios where output or input are close to end of buffer */
1743*3117ece4Schristos 
1744*3117ece4Schristos     op += litLength;
1745*3117ece4Schristos     *litPtr = litEnd;   /* update for next sequence */
1746*3117ece4Schristos 
1747*3117ece4Schristos     /* check : last match must be at a minimum distance of 8 from end of dest buffer */
1748*3117ece4Schristos     if (oend-op < 8) return ERROR(dstSize_tooSmall);
1749*3117ece4Schristos 
1750*3117ece4Schristos     /* copy Match */
1751*3117ece4Schristos     {
1752*3117ece4Schristos         const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12);
1753*3117ece4Schristos         const BYTE* match = op - sequence.offset;            /* possible underflow at op - offset ? */
1754*3117ece4Schristos         size_t qutt = 12;
1755*3117ece4Schristos         U64 saved[2];
1756*3117ece4Schristos 
1757*3117ece4Schristos         /* check */
1758*3117ece4Schristos         if (match < base) return ERROR(corruption_detected);
1759*3117ece4Schristos         if (sequence.offset > (size_t)base) return ERROR(corruption_detected);
1760*3117ece4Schristos 
1761*3117ece4Schristos         /* save beginning of literal sequence, in case of write overlap */
1762*3117ece4Schristos         if (overlapRisk)
1763*3117ece4Schristos         {
1764*3117ece4Schristos             if ((endMatch + qutt) > oend) qutt = oend-endMatch;
1765*3117ece4Schristos             memcpy(saved, endMatch, qutt);
1766*3117ece4Schristos         }
1767*3117ece4Schristos 
1768*3117ece4Schristos         if (sequence.offset < 8)
1769*3117ece4Schristos         {
1770*3117ece4Schristos             const int dec64 = dec64table[sequence.offset];
1771*3117ece4Schristos             op[0] = match[0];
1772*3117ece4Schristos             op[1] = match[1];
1773*3117ece4Schristos             op[2] = match[2];
1774*3117ece4Schristos             op[3] = match[3];
1775*3117ece4Schristos             match += dec32table[sequence.offset];
1776*3117ece4Schristos             ZSTD_copy4(op+4, match);
1777*3117ece4Schristos             match -= dec64;
1778*3117ece4Schristos         } else { ZSTD_copy8(op, match); }
1779*3117ece4Schristos         op += 8; match += 8;
1780*3117ece4Schristos 
1781*3117ece4Schristos         if (endMatch > oend-(16-MINMATCH))
1782*3117ece4Schristos         {
1783*3117ece4Schristos             if (op < oend-8)
1784*3117ece4Schristos             {
1785*3117ece4Schristos                 ZSTD_wildcopy(op, match, (oend-8) - op);
1786*3117ece4Schristos                 match += (oend-8) - op;
1787*3117ece4Schristos                 op = oend-8;
1788*3117ece4Schristos             }
1789*3117ece4Schristos             while (op<endMatch) *op++ = *match++;
1790*3117ece4Schristos         }
1791*3117ece4Schristos         else
1792*3117ece4Schristos             ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
1793*3117ece4Schristos 
1794*3117ece4Schristos         /* restore, in case of overlap */
1795*3117ece4Schristos         if (overlapRisk) memcpy(endMatch, saved, qutt);
1796*3117ece4Schristos     }
1797*3117ece4Schristos 
1798*3117ece4Schristos     return endMatch-ostart;
1799*3117ece4Schristos }
1800*3117ece4Schristos 
1801*3117ece4Schristos typedef struct ZSTDv01_Dctx_s
1802*3117ece4Schristos {
1803*3117ece4Schristos     U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
1804*3117ece4Schristos     U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
1805*3117ece4Schristos     U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
1806*3117ece4Schristos     void* previousDstEnd;
1807*3117ece4Schristos     void* base;
1808*3117ece4Schristos     size_t expected;
1809*3117ece4Schristos     blockType_t bType;
1810*3117ece4Schristos     U32 phase;
1811*3117ece4Schristos } dctx_t;
1812*3117ece4Schristos 
1813*3117ece4Schristos 
1814*3117ece4Schristos static size_t ZSTD_decompressSequences(
1815*3117ece4Schristos                                void* ctx,
1816*3117ece4Schristos                                void* dst, size_t maxDstSize,
1817*3117ece4Schristos                          const void* seqStart, size_t seqSize,
1818*3117ece4Schristos                          const BYTE* litStart, size_t litSize)
1819*3117ece4Schristos {
1820*3117ece4Schristos     dctx_t* dctx = (dctx_t*)ctx;
1821*3117ece4Schristos     const BYTE* ip = (const BYTE*)seqStart;
1822*3117ece4Schristos     const BYTE* const iend = ip + seqSize;
1823*3117ece4Schristos     BYTE* const ostart = (BYTE* const)dst;
1824*3117ece4Schristos     BYTE* op = ostart;
1825*3117ece4Schristos     BYTE* const oend = ostart + maxDstSize;
1826*3117ece4Schristos     size_t errorCode, dumpsLength;
1827*3117ece4Schristos     const BYTE* litPtr = litStart;
1828*3117ece4Schristos     const BYTE* const litEnd = litStart + litSize;
1829*3117ece4Schristos     int nbSeq;
1830*3117ece4Schristos     const BYTE* dumps;
1831*3117ece4Schristos     U32* DTableLL = dctx->LLTable;
1832*3117ece4Schristos     U32* DTableML = dctx->MLTable;
1833*3117ece4Schristos     U32* DTableOffb = dctx->OffTable;
1834*3117ece4Schristos     BYTE* const base = (BYTE*) (dctx->base);
1835*3117ece4Schristos 
1836*3117ece4Schristos     /* Build Decoding Tables */
1837*3117ece4Schristos     errorCode = ZSTDv01_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
1838*3117ece4Schristos                                       DTableLL, DTableML, DTableOffb,
1839*3117ece4Schristos                                       ip, iend-ip);
1840*3117ece4Schristos     if (ZSTDv01_isError(errorCode)) return errorCode;
1841*3117ece4Schristos     ip += errorCode;
1842*3117ece4Schristos 
1843*3117ece4Schristos     /* Regen sequences */
1844*3117ece4Schristos     {
1845*3117ece4Schristos         seq_t sequence;
1846*3117ece4Schristos         seqState_t seqState;
1847*3117ece4Schristos 
1848*3117ece4Schristos         memset(&sequence, 0, sizeof(sequence));
1849*3117ece4Schristos         seqState.dumps = dumps;
1850*3117ece4Schristos         seqState.dumpsEnd = dumps + dumpsLength;
1851*3117ece4Schristos         seqState.prevOffset = 1;
1852*3117ece4Schristos         errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip);
1853*3117ece4Schristos         if (FSE_isError(errorCode)) return ERROR(corruption_detected);
1854*3117ece4Schristos         FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
1855*3117ece4Schristos         FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
1856*3117ece4Schristos         FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
1857*3117ece4Schristos 
1858*3117ece4Schristos         for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; )
1859*3117ece4Schristos         {
1860*3117ece4Schristos             size_t oneSeqSize;
1861*3117ece4Schristos             nbSeq--;
1862*3117ece4Schristos             ZSTD_decodeSequence(&sequence, &seqState);
1863*3117ece4Schristos             oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
1864*3117ece4Schristos             if (ZSTDv01_isError(oneSeqSize)) return oneSeqSize;
1865*3117ece4Schristos             op += oneSeqSize;
1866*3117ece4Schristos         }
1867*3117ece4Schristos 
1868*3117ece4Schristos         /* check if reached exact end */
1869*3117ece4Schristos         if ( !FSE_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* requested too much : data is corrupted */
1870*3117ece4Schristos         if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */
1871*3117ece4Schristos 
1872*3117ece4Schristos         /* last literal segment */
1873*3117ece4Schristos         {
1874*3117ece4Schristos             size_t lastLLSize = litEnd - litPtr;
1875*3117ece4Schristos             if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
1876*3117ece4Schristos             if (lastLLSize > 0) {
1877*3117ece4Schristos                 if (op != litPtr) memmove(op, litPtr, lastLLSize);
1878*3117ece4Schristos                 op += lastLLSize;
1879*3117ece4Schristos             }
1880*3117ece4Schristos         }
1881*3117ece4Schristos     }
1882*3117ece4Schristos 
1883*3117ece4Schristos     return op-ostart;
1884*3117ece4Schristos }
1885*3117ece4Schristos 
1886*3117ece4Schristos 
1887*3117ece4Schristos static size_t ZSTD_decompressBlock(
1888*3117ece4Schristos                             void* ctx,
1889*3117ece4Schristos                             void* dst, size_t maxDstSize,
1890*3117ece4Schristos                       const void* src, size_t srcSize)
1891*3117ece4Schristos {
1892*3117ece4Schristos     /* blockType == blockCompressed, srcSize is trusted */
1893*3117ece4Schristos     const BYTE* ip = (const BYTE*)src;
1894*3117ece4Schristos     const BYTE* litPtr = NULL;
1895*3117ece4Schristos     size_t litSize = 0;
1896*3117ece4Schristos     size_t errorCode;
1897*3117ece4Schristos 
1898*3117ece4Schristos     /* Decode literals sub-block */
1899*3117ece4Schristos     errorCode = ZSTDv01_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize);
1900*3117ece4Schristos     if (ZSTDv01_isError(errorCode)) return errorCode;
1901*3117ece4Schristos     ip += errorCode;
1902*3117ece4Schristos     srcSize -= errorCode;
1903*3117ece4Schristos 
1904*3117ece4Schristos     return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize);
1905*3117ece4Schristos }
1906*3117ece4Schristos 
1907*3117ece4Schristos 
1908*3117ece4Schristos size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1909*3117ece4Schristos {
1910*3117ece4Schristos     const BYTE* ip = (const BYTE*)src;
1911*3117ece4Schristos     const BYTE* iend = ip + srcSize;
1912*3117ece4Schristos     BYTE* const ostart = (BYTE* const)dst;
1913*3117ece4Schristos     BYTE* op = ostart;
1914*3117ece4Schristos     BYTE* const oend = ostart + maxDstSize;
1915*3117ece4Schristos     size_t remainingSize = srcSize;
1916*3117ece4Schristos     U32 magicNumber;
1917*3117ece4Schristos     size_t errorCode=0;
1918*3117ece4Schristos     blockProperties_t blockProperties;
1919*3117ece4Schristos 
1920*3117ece4Schristos     /* Frame Header */
1921*3117ece4Schristos     if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1922*3117ece4Schristos     magicNumber = ZSTD_readBE32(src);
1923*3117ece4Schristos     if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
1924*3117ece4Schristos     ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
1925*3117ece4Schristos 
1926*3117ece4Schristos     /* Loop on each block */
1927*3117ece4Schristos     while (1)
1928*3117ece4Schristos     {
1929*3117ece4Schristos         size_t blockSize = ZSTDv01_getcBlockSize(ip, iend-ip, &blockProperties);
1930*3117ece4Schristos         if (ZSTDv01_isError(blockSize)) return blockSize;
1931*3117ece4Schristos 
1932*3117ece4Schristos         ip += ZSTD_blockHeaderSize;
1933*3117ece4Schristos         remainingSize -= ZSTD_blockHeaderSize;
1934*3117ece4Schristos         if (blockSize > remainingSize) return ERROR(srcSize_wrong);
1935*3117ece4Schristos 
1936*3117ece4Schristos         switch(blockProperties.blockType)
1937*3117ece4Schristos         {
1938*3117ece4Schristos         case bt_compressed:
1939*3117ece4Schristos             errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
1940*3117ece4Schristos             break;
1941*3117ece4Schristos         case bt_raw :
1942*3117ece4Schristos             errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
1943*3117ece4Schristos             break;
1944*3117ece4Schristos         case bt_rle :
1945*3117ece4Schristos             return ERROR(GENERIC);   /* not yet supported */
1946*3117ece4Schristos             break;
1947*3117ece4Schristos         case bt_end :
1948*3117ece4Schristos             /* end of frame */
1949*3117ece4Schristos             if (remainingSize) return ERROR(srcSize_wrong);
1950*3117ece4Schristos             break;
1951*3117ece4Schristos         default:
1952*3117ece4Schristos             return ERROR(GENERIC);
1953*3117ece4Schristos         }
1954*3117ece4Schristos         if (blockSize == 0) break;   /* bt_end */
1955*3117ece4Schristos 
1956*3117ece4Schristos         if (ZSTDv01_isError(errorCode)) return errorCode;
1957*3117ece4Schristos         op += errorCode;
1958*3117ece4Schristos         ip += blockSize;
1959*3117ece4Schristos         remainingSize -= blockSize;
1960*3117ece4Schristos     }
1961*3117ece4Schristos 
1962*3117ece4Schristos     return op-ostart;
1963*3117ece4Schristos }
1964*3117ece4Schristos 
1965*3117ece4Schristos size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1966*3117ece4Schristos {
1967*3117ece4Schristos     dctx_t ctx;
1968*3117ece4Schristos     ctx.base = dst;
1969*3117ece4Schristos     return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
1970*3117ece4Schristos }
1971*3117ece4Schristos 
1972*3117ece4Schristos /* ZSTD_errorFrameSizeInfoLegacy() :
1973*3117ece4Schristos    assumes `cSize` and `dBound` are _not_ NULL */
1974*3117ece4Schristos static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
1975*3117ece4Schristos {
1976*3117ece4Schristos     *cSize = ret;
1977*3117ece4Schristos     *dBound = ZSTD_CONTENTSIZE_ERROR;
1978*3117ece4Schristos }
1979*3117ece4Schristos 
1980*3117ece4Schristos void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
1981*3117ece4Schristos {
1982*3117ece4Schristos     const BYTE* ip = (const BYTE*)src;
1983*3117ece4Schristos     size_t remainingSize = srcSize;
1984*3117ece4Schristos     size_t nbBlocks = 0;
1985*3117ece4Schristos     U32 magicNumber;
1986*3117ece4Schristos     blockProperties_t blockProperties;
1987*3117ece4Schristos 
1988*3117ece4Schristos     /* Frame Header */
1989*3117ece4Schristos     if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
1990*3117ece4Schristos         ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
1991*3117ece4Schristos         return;
1992*3117ece4Schristos     }
1993*3117ece4Schristos     magicNumber = ZSTD_readBE32(src);
1994*3117ece4Schristos     if (magicNumber != ZSTD_magicNumber) {
1995*3117ece4Schristos         ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
1996*3117ece4Schristos         return;
1997*3117ece4Schristos     }
1998*3117ece4Schristos     ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
1999*3117ece4Schristos 
2000*3117ece4Schristos     /* Loop on each block */
2001*3117ece4Schristos     while (1)
2002*3117ece4Schristos     {
2003*3117ece4Schristos         size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
2004*3117ece4Schristos         if (ZSTDv01_isError(blockSize)) {
2005*3117ece4Schristos             ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, blockSize);
2006*3117ece4Schristos             return;
2007*3117ece4Schristos         }
2008*3117ece4Schristos 
2009*3117ece4Schristos         ip += ZSTD_blockHeaderSize;
2010*3117ece4Schristos         remainingSize -= ZSTD_blockHeaderSize;
2011*3117ece4Schristos         if (blockSize > remainingSize) {
2012*3117ece4Schristos             ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
2013*3117ece4Schristos             return;
2014*3117ece4Schristos         }
2015*3117ece4Schristos 
2016*3117ece4Schristos         if (blockSize == 0) break;   /* bt_end */
2017*3117ece4Schristos 
2018*3117ece4Schristos         ip += blockSize;
2019*3117ece4Schristos         remainingSize -= blockSize;
2020*3117ece4Schristos         nbBlocks++;
2021*3117ece4Schristos     }
2022*3117ece4Schristos 
2023*3117ece4Schristos     *cSize = ip - (const BYTE*)src;
2024*3117ece4Schristos     *dBound = nbBlocks * BLOCKSIZE;
2025*3117ece4Schristos }
2026*3117ece4Schristos 
2027*3117ece4Schristos /*******************************
2028*3117ece4Schristos *  Streaming Decompression API
2029*3117ece4Schristos *******************************/
2030*3117ece4Schristos 
2031*3117ece4Schristos size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx)
2032*3117ece4Schristos {
2033*3117ece4Schristos     dctx->expected = ZSTD_frameHeaderSize;
2034*3117ece4Schristos     dctx->phase = 0;
2035*3117ece4Schristos     dctx->previousDstEnd = NULL;
2036*3117ece4Schristos     dctx->base = NULL;
2037*3117ece4Schristos     return 0;
2038*3117ece4Schristos }
2039*3117ece4Schristos 
2040*3117ece4Schristos ZSTDv01_Dctx* ZSTDv01_createDCtx(void)
2041*3117ece4Schristos {
2042*3117ece4Schristos     ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx));
2043*3117ece4Schristos     if (dctx==NULL) return NULL;
2044*3117ece4Schristos     ZSTDv01_resetDCtx(dctx);
2045*3117ece4Schristos     return dctx;
2046*3117ece4Schristos }
2047*3117ece4Schristos 
2048*3117ece4Schristos size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx)
2049*3117ece4Schristos {
2050*3117ece4Schristos     free(dctx);
2051*3117ece4Schristos     return 0;
2052*3117ece4Schristos }
2053*3117ece4Schristos 
2054*3117ece4Schristos size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx)
2055*3117ece4Schristos {
2056*3117ece4Schristos     return ((dctx_t*)dctx)->expected;
2057*3117ece4Schristos }
2058*3117ece4Schristos 
2059*3117ece4Schristos size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2060*3117ece4Schristos {
2061*3117ece4Schristos     dctx_t* ctx = (dctx_t*)dctx;
2062*3117ece4Schristos 
2063*3117ece4Schristos     /* Sanity check */
2064*3117ece4Schristos     if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
2065*3117ece4Schristos     if (dst != ctx->previousDstEnd)  /* not contiguous */
2066*3117ece4Schristos         ctx->base = dst;
2067*3117ece4Schristos 
2068*3117ece4Schristos     /* Decompress : frame header */
2069*3117ece4Schristos     if (ctx->phase == 0)
2070*3117ece4Schristos     {
2071*3117ece4Schristos         /* Check frame magic header */
2072*3117ece4Schristos         U32 magicNumber = ZSTD_readBE32(src);
2073*3117ece4Schristos         if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
2074*3117ece4Schristos         ctx->phase = 1;
2075*3117ece4Schristos         ctx->expected = ZSTD_blockHeaderSize;
2076*3117ece4Schristos         return 0;
2077*3117ece4Schristos     }
2078*3117ece4Schristos 
2079*3117ece4Schristos     /* Decompress : block header */
2080*3117ece4Schristos     if (ctx->phase == 1)
2081*3117ece4Schristos     {
2082*3117ece4Schristos         blockProperties_t bp;
2083*3117ece4Schristos         size_t blockSize = ZSTDv01_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
2084*3117ece4Schristos         if (ZSTDv01_isError(blockSize)) return blockSize;
2085*3117ece4Schristos         if (bp.blockType == bt_end)
2086*3117ece4Schristos         {
2087*3117ece4Schristos             ctx->expected = 0;
2088*3117ece4Schristos             ctx->phase = 0;
2089*3117ece4Schristos         }
2090*3117ece4Schristos         else
2091*3117ece4Schristos         {
2092*3117ece4Schristos             ctx->expected = blockSize;
2093*3117ece4Schristos             ctx->bType = bp.blockType;
2094*3117ece4Schristos             ctx->phase = 2;
2095*3117ece4Schristos         }
2096*3117ece4Schristos 
2097*3117ece4Schristos         return 0;
2098*3117ece4Schristos     }
2099*3117ece4Schristos 
2100*3117ece4Schristos     /* Decompress : block content */
2101*3117ece4Schristos     {
2102*3117ece4Schristos         size_t rSize;
2103*3117ece4Schristos         switch(ctx->bType)
2104*3117ece4Schristos         {
2105*3117ece4Schristos         case bt_compressed:
2106*3117ece4Schristos             rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
2107*3117ece4Schristos             break;
2108*3117ece4Schristos         case bt_raw :
2109*3117ece4Schristos             rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
2110*3117ece4Schristos             break;
2111*3117ece4Schristos         case bt_rle :
2112*3117ece4Schristos             return ERROR(GENERIC);   /* not yet handled */
2113*3117ece4Schristos             break;
2114*3117ece4Schristos         case bt_end :   /* should never happen (filtered at phase 1) */
2115*3117ece4Schristos             rSize = 0;
2116*3117ece4Schristos             break;
2117*3117ece4Schristos         default:
2118*3117ece4Schristos             return ERROR(GENERIC);
2119*3117ece4Schristos         }
2120*3117ece4Schristos         ctx->phase = 1;
2121*3117ece4Schristos         ctx->expected = ZSTD_blockHeaderSize;
2122*3117ece4Schristos         if (ZSTDv01_isError(rSize)) return rSize;
2123*3117ece4Schristos         ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
2124*3117ece4Schristos         return rSize;
2125*3117ece4Schristos     }
2126*3117ece4Schristos 
2127*3117ece4Schristos }
2128