xref: /freebsd-src/sys/contrib/openzfs/module/zfs/lz4.c (revision e92ffd9b626833ebdbf2742c8ffddc6cd94b963e)
1eda14cbcSMatt Macy /*
2*e92ffd9bSMartin Matuska    LZ4 - Fast LZ compression algorithm
3*e92ffd9bSMartin Matuska    Copyright (C) 2011-present, Yann Collet.
4*e92ffd9bSMartin Matuska 
5*e92ffd9bSMartin Matuska    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6*e92ffd9bSMartin Matuska 
7*e92ffd9bSMartin Matuska    Redistribution and use in source and binary forms, with or without
8*e92ffd9bSMartin Matuska    modification, are permitted provided that the following conditions are
9*e92ffd9bSMartin Matuska    met:
10*e92ffd9bSMartin Matuska 
11*e92ffd9bSMartin Matuska        * Redistributions of source code must retain the above copyright
12*e92ffd9bSMartin Matuska    notice, this list of conditions and the following disclaimer.
13*e92ffd9bSMartin Matuska        * Redistributions in binary form must reproduce the above
14*e92ffd9bSMartin Matuska    copyright notice, this list of conditions and the following disclaimer
15*e92ffd9bSMartin Matuska    in the documentation and/or other materials provided with the
16*e92ffd9bSMartin Matuska    distribution.
17*e92ffd9bSMartin Matuska 
18*e92ffd9bSMartin Matuska    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19*e92ffd9bSMartin Matuska    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20*e92ffd9bSMartin Matuska    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21*e92ffd9bSMartin Matuska    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22*e92ffd9bSMartin Matuska    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23*e92ffd9bSMartin Matuska    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24*e92ffd9bSMartin Matuska    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25*e92ffd9bSMartin Matuska    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26*e92ffd9bSMartin Matuska    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27*e92ffd9bSMartin Matuska    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28*e92ffd9bSMartin Matuska    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*e92ffd9bSMartin Matuska 
30*e92ffd9bSMartin Matuska    You can contact the author at :
31*e92ffd9bSMartin Matuska     - LZ4 homepage : http://www.lz4.org
32*e92ffd9bSMartin Matuska     - LZ4 source repository : https://github.com/lz4/lz4
33*e92ffd9bSMartin Matuska */
34*e92ffd9bSMartin Matuska 
35*e92ffd9bSMartin Matuska /*
36*e92ffd9bSMartin Matuska  * This file contains unmodified code from lz4 1.9.3's decompressor, plus
37*e92ffd9bSMartin Matuska  * associated macros and constants.
38eda14cbcSMatt Macy  *
39*e92ffd9bSMartin Matuska  * It also contains a couple of defines from the old lz4.c to make things
40*e92ffd9bSMartin Matuska  * fit together smoothly.
41eda14cbcSMatt Macy  *
42eda14cbcSMatt Macy  */
43eda14cbcSMatt Macy 
44eda14cbcSMatt Macy #include <sys/zfs_context.h>
45eda14cbcSMatt Macy 
46*e92ffd9bSMartin Matuska int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
47eda14cbcSMatt Macy     int isize, int maxOutputSize);
48eda14cbcSMatt Macy 
49eda14cbcSMatt Macy /*
50eda14cbcSMatt Macy  * Tuning parameters
51eda14cbcSMatt Macy  */
52eda14cbcSMatt Macy 
53eda14cbcSMatt Macy /*
54eda14cbcSMatt Macy  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
55eda14cbcSMatt Macy  *	 Lowering this value reduces memory usage. Reduced memory usage
56eda14cbcSMatt Macy  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
57eda14cbcSMatt Macy  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
58eda14cbcSMatt Macy  *	(examples : 12 -> 16KB ; 17 -> 512KB)
59eda14cbcSMatt Macy  */
60eda14cbcSMatt Macy #define	COMPRESSIONLEVEL 12
61eda14cbcSMatt Macy 
62eda14cbcSMatt Macy /*
63eda14cbcSMatt Macy  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
64eda14cbcSMatt Macy  *	algorithm skip faster data segments considered "incompressible".
65eda14cbcSMatt Macy  *	This may decrease compression ratio dramatically, but will be
66eda14cbcSMatt Macy  *	faster on incompressible data. Increasing this value will make
67eda14cbcSMatt Macy  *	the algorithm search more before declaring a segment "incompressible".
68eda14cbcSMatt Macy  *	This could improve compression a bit, but will be slower on
69eda14cbcSMatt Macy  *	incompressible data. The default value (6) is recommended.
70eda14cbcSMatt Macy  */
71eda14cbcSMatt Macy #define	NOTCOMPRESSIBLE_CONFIRMATION 6
72eda14cbcSMatt Macy 
73eda14cbcSMatt Macy /*
74eda14cbcSMatt Macy  * Little Endian or Big Endian?
75eda14cbcSMatt Macy  * Note: overwrite the below #define if you know your architecture endianness.
76eda14cbcSMatt Macy  */
77eda14cbcSMatt Macy #if defined(_ZFS_BIG_ENDIAN)
78eda14cbcSMatt Macy #define	LZ4_BIG_ENDIAN 1
79eda14cbcSMatt Macy #else
80eda14cbcSMatt Macy /*
81eda14cbcSMatt Macy  * Little Endian assumed. PDP Endian and other very rare endian format
82eda14cbcSMatt Macy  * are unsupported.
83eda14cbcSMatt Macy  */
84eda14cbcSMatt Macy #undef LZ4_BIG_ENDIAN
85eda14cbcSMatt Macy #endif
86eda14cbcSMatt Macy 
87*e92ffd9bSMartin Matuska /*-************************************
88*e92ffd9bSMartin Matuska *  CPU Feature Detection
89*e92ffd9bSMartin Matuska **************************************/
90*e92ffd9bSMartin Matuska /* LZ4_FORCE_MEMORY_ACCESS
91*e92ffd9bSMartin Matuska  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
92*e92ffd9bSMartin Matuska  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
93*e92ffd9bSMartin Matuska  * The below switch allow to select different access method for improved performance.
94*e92ffd9bSMartin Matuska  * Method 0 (default) : use `memcpy()`. Safe and portable.
95*e92ffd9bSMartin Matuska  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
96*e92ffd9bSMartin Matuska  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
97*e92ffd9bSMartin Matuska  * Method 2 : direct access. This method is portable but violate C standard.
98*e92ffd9bSMartin Matuska  *            It can generate buggy code on targets which assembly generation depends on alignment.
99*e92ffd9bSMartin Matuska  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
100*e92ffd9bSMartin Matuska  * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
101*e92ffd9bSMartin Matuska  * Prefer these methods in priority order (0 > 1 > 2)
102eda14cbcSMatt Macy  */
103*e92ffd9bSMartin Matuska #ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally */
104*e92ffd9bSMartin Matuska #  if defined(__GNUC__) && \
105*e92ffd9bSMartin Matuska   ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
106*e92ffd9bSMartin Matuska   || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
107*e92ffd9bSMartin Matuska #    define LZ4_FORCE_MEMORY_ACCESS 2
108*e92ffd9bSMartin Matuska #  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
109*e92ffd9bSMartin Matuska #    define LZ4_FORCE_MEMORY_ACCESS 1
110*e92ffd9bSMartin Matuska #  endif
111eda14cbcSMatt Macy #endif
112eda14cbcSMatt Macy 
113eda14cbcSMatt Macy /*
114*e92ffd9bSMartin Matuska  * LZ4_FORCE_SW_BITCOUNT
115*e92ffd9bSMartin Matuska  * Define this parameter if your target system or compiler does not support hardware bit count
116*e92ffd9bSMartin Matuska  */
117*e92ffd9bSMartin Matuska /*
118eda14cbcSMatt Macy  * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
119eda14cbcSMatt Macy  * kernel
120eda14cbcSMatt Macy  * Linux : we can use GCC's __builtin_ctz family of builtins in the
121eda14cbcSMatt Macy  * kernel
122eda14cbcSMatt Macy  */
123eda14cbcSMatt Macy #undef	LZ4_FORCE_SW_BITCOUNT
124*e92ffd9bSMartin Matuska #if defined(__sunos__)
125eda14cbcSMatt Macy #define	LZ4_FORCE_SW_BITCOUNT
126eda14cbcSMatt Macy #endif
127eda14cbcSMatt Macy 
128eda14cbcSMatt Macy /*
129eda14cbcSMatt Macy  * Compiler Options
130eda14cbcSMatt Macy  */
131eda14cbcSMatt Macy /* Disable restrict */
132eda14cbcSMatt Macy #define	restrict
133eda14cbcSMatt Macy 
134eda14cbcSMatt Macy /*
135eda14cbcSMatt Macy  * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
136eda14cbcSMatt Macy  * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
137eda14cbcSMatt Macy  */
138eda14cbcSMatt Macy #ifdef GCC_VERSION
139eda14cbcSMatt Macy #undef GCC_VERSION
140eda14cbcSMatt Macy #endif
141eda14cbcSMatt Macy 
142eda14cbcSMatt Macy #define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
143eda14cbcSMatt Macy 
144*e92ffd9bSMartin Matuska #ifndef LZ4_FORCE_INLINE
145*e92ffd9bSMartin Matuska #  ifdef _MSC_VER    /* Visual Studio */
146*e92ffd9bSMartin Matuska #    define LZ4_FORCE_INLINE static __forceinline
147*e92ffd9bSMartin Matuska #  else
148*e92ffd9bSMartin Matuska #    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
149*e92ffd9bSMartin Matuska #      ifdef __GNUC__
150*e92ffd9bSMartin Matuska #        define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
151*e92ffd9bSMartin Matuska #      else
152*e92ffd9bSMartin Matuska #        define LZ4_FORCE_INLINE static inline
153*e92ffd9bSMartin Matuska #      endif
154*e92ffd9bSMartin Matuska #    else
155*e92ffd9bSMartin Matuska #      define LZ4_FORCE_INLINE static
156*e92ffd9bSMartin Matuska #    endif /* __STDC_VERSION__ */
157*e92ffd9bSMartin Matuska #  endif  /* _MSC_VER */
158*e92ffd9bSMartin Matuska #endif /* LZ4_FORCE_INLINE */
159*e92ffd9bSMartin Matuska 
160*e92ffd9bSMartin Matuska /* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
161*e92ffd9bSMartin Matuska  * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
162*e92ffd9bSMartin Matuska  * together with a simple 8-byte copy loop as a fall-back path.
163*e92ffd9bSMartin Matuska  * However, this optimization hurts the decompression speed by >30%,
164*e92ffd9bSMartin Matuska  * because the execution does not go to the optimized loop
165*e92ffd9bSMartin Matuska  * for typical compressible data, and all of the preamble checks
166*e92ffd9bSMartin Matuska  * before going to the fall-back path become useless overhead.
167*e92ffd9bSMartin Matuska  * This optimization happens only with the -O3 flag, and -O2 generates
168*e92ffd9bSMartin Matuska  * a simple 8-byte copy loop.
169*e92ffd9bSMartin Matuska  * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
170*e92ffd9bSMartin Matuska  * functions are annotated with __attribute__((optimize("O2"))),
171*e92ffd9bSMartin Matuska  * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
172*e92ffd9bSMartin Matuska  * of LZ4_wildCopy8 does not affect the compression speed.
173*e92ffd9bSMartin Matuska  */
174*e92ffd9bSMartin Matuska #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
175*e92ffd9bSMartin Matuska #  define LZ4_FORCE_O2  __attribute__((optimize("O2")))
176*e92ffd9bSMartin Matuska #  undef LZ4_FORCE_INLINE
177*e92ffd9bSMartin Matuska #  define LZ4_FORCE_INLINE  static __inline __attribute__((optimize("O2"),always_inline))
178*e92ffd9bSMartin Matuska #else
179*e92ffd9bSMartin Matuska #  define LZ4_FORCE_O2
180*e92ffd9bSMartin Matuska #endif
181*e92ffd9bSMartin Matuska 
182*e92ffd9bSMartin Matuska #ifndef expect
183*e92ffd9bSMartin Matuska #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
184eda14cbcSMatt Macy #  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
185eda14cbcSMatt Macy #else
186eda14cbcSMatt Macy #  define expect(expr,value)    (expr)
187eda14cbcSMatt Macy #endif
188*e92ffd9bSMartin Matuska #endif
189eda14cbcSMatt Macy 
190eda14cbcSMatt Macy #ifndef likely
191eda14cbcSMatt Macy #define	likely(expr)	expect((expr) != 0, 1)
192eda14cbcSMatt Macy #endif
193eda14cbcSMatt Macy 
194eda14cbcSMatt Macy #ifndef unlikely
195eda14cbcSMatt Macy #define	unlikely(expr)	expect((expr) != 0, 0)
196eda14cbcSMatt Macy #endif
197eda14cbcSMatt Macy 
198*e92ffd9bSMartin Matuska #ifndef _KERNEL
199*e92ffd9bSMartin Matuska #include <stdlib.h>   /* malloc, calloc, free */
200*e92ffd9bSMartin Matuska #include <string.h>   /* memset, memcpy */
201eda14cbcSMatt Macy #endif
202*e92ffd9bSMartin Matuska #define ALLOC(s)          malloc(s)
203*e92ffd9bSMartin Matuska #define ALLOC_AND_ZERO(s) calloc(1,s)
204*e92ffd9bSMartin Matuska #define FREEMEM(p)        free(p)
205eda14cbcSMatt Macy 
206*e92ffd9bSMartin Matuska #define MEM_INIT(p,v,s)   memset((p),(v),(s))
207eda14cbcSMatt Macy 
208eda14cbcSMatt Macy 
209*e92ffd9bSMartin Matuska /*-************************************
210*e92ffd9bSMartin Matuska *  Common Constants
211*e92ffd9bSMartin Matuska **************************************/
212eda14cbcSMatt Macy #define MINMATCH 4
213eda14cbcSMatt Macy 
214*e92ffd9bSMartin Matuska #define WILDCOPYLENGTH 8
215*e92ffd9bSMartin Matuska #define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
216*e92ffd9bSMartin Matuska #define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
217*e92ffd9bSMartin Matuska #define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
218*e92ffd9bSMartin Matuska #define FASTLOOP_SAFE_DISTANCE 64
219eda14cbcSMatt Macy 
220*e92ffd9bSMartin Matuska #define KB *(1 <<10)
221*e92ffd9bSMartin Matuska #define MB *(1 <<20)
222*e92ffd9bSMartin Matuska #define GB *(1U<<30)
223eda14cbcSMatt Macy 
224*e92ffd9bSMartin Matuska #ifndef LZ4_DISTANCE_MAX   /* history window size; can be user-defined at compile time */
225*e92ffd9bSMartin Matuska #  define LZ4_DISTANCE_MAX 65535   /* set to maximum value by default */
226*e92ffd9bSMartin Matuska #endif
227eda14cbcSMatt Macy 
228*e92ffd9bSMartin Matuska #define LZ4_DISTANCE_ABSOLUTE_MAX 65535
229*e92ffd9bSMartin Matuska #if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX)   /* max supported by LZ4 format */
230*e92ffd9bSMartin Matuska #  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
231*e92ffd9bSMartin Matuska #endif
232eda14cbcSMatt Macy 
233eda14cbcSMatt Macy #define ML_BITS  4
234eda14cbcSMatt Macy #define ML_MASK  ((1U<<ML_BITS)-1)
235eda14cbcSMatt Macy #define RUN_BITS (8-ML_BITS)
236eda14cbcSMatt Macy #define RUN_MASK ((1U<<RUN_BITS)-1)
237eda14cbcSMatt Macy 
238*e92ffd9bSMartin Matuska #define DEBUGLOG(l, ...) {}    /* disabled */
239eda14cbcSMatt Macy 
240*e92ffd9bSMartin Matuska #ifndef assert
241*e92ffd9bSMartin Matuska #define assert ASSERT
242*e92ffd9bSMartin Matuska #endif
243*e92ffd9bSMartin Matuska 
244*e92ffd9bSMartin Matuska /*-************************************
245*e92ffd9bSMartin Matuska *  Types
246*e92ffd9bSMartin Matuska **************************************/
247*e92ffd9bSMartin Matuska #ifndef _KERNEL
248*e92ffd9bSMartin Matuska #include <limits.h>
249*e92ffd9bSMartin Matuska #endif
250*e92ffd9bSMartin Matuska #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
251*e92ffd9bSMartin Matuska #ifndef _KERNEL
252*e92ffd9bSMartin Matuska #include <stdint.h>
253*e92ffd9bSMartin Matuska #endif
254*e92ffd9bSMartin Matuska   typedef  uint8_t BYTE;
255*e92ffd9bSMartin Matuska   typedef uint16_t U16;
256*e92ffd9bSMartin Matuska   typedef uint32_t U32;
257*e92ffd9bSMartin Matuska   typedef  int32_t S32;
258*e92ffd9bSMartin Matuska   typedef uint64_t U64;
259*e92ffd9bSMartin Matuska   typedef uintptr_t uptrval;
260*e92ffd9bSMartin Matuska #else
261*e92ffd9bSMartin Matuska # if UINT_MAX != 4294967295UL
262*e92ffd9bSMartin Matuska #   error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
263*e92ffd9bSMartin Matuska # endif
264*e92ffd9bSMartin Matuska   typedef unsigned char       BYTE;
265*e92ffd9bSMartin Matuska   typedef unsigned short      U16;
266*e92ffd9bSMartin Matuska   typedef unsigned int        U32;
267*e92ffd9bSMartin Matuska   typedef   signed int        S32;
268*e92ffd9bSMartin Matuska   typedef unsigned long long  U64;
269*e92ffd9bSMartin Matuska   typedef size_t              uptrval;   /* generally true, except OpenVMS-64 */
270*e92ffd9bSMartin Matuska #endif
271*e92ffd9bSMartin Matuska 
272*e92ffd9bSMartin Matuska #if defined(__x86_64__)
273*e92ffd9bSMartin Matuska   typedef U64    reg_t;   /* 64-bits in x32 mode */
274*e92ffd9bSMartin Matuska #else
275*e92ffd9bSMartin Matuska   typedef size_t reg_t;   /* 32-bits in x32 mode */
276*e92ffd9bSMartin Matuska #endif
277*e92ffd9bSMartin Matuska 
278*e92ffd9bSMartin Matuska typedef enum {
279*e92ffd9bSMartin Matuska     notLimited = 0,
280*e92ffd9bSMartin Matuska     limitedOutput = 1,
281*e92ffd9bSMartin Matuska     fillOutput = 2
282*e92ffd9bSMartin Matuska } limitedOutput_directive;
283*e92ffd9bSMartin Matuska 
284*e92ffd9bSMartin Matuska 
285*e92ffd9bSMartin Matuska /*-************************************
286*e92ffd9bSMartin Matuska *  Reading and writing into memory
287*e92ffd9bSMartin Matuska **************************************/
288*e92ffd9bSMartin Matuska 
289*e92ffd9bSMartin Matuska /**
290*e92ffd9bSMartin Matuska  * LZ4 relies on memcpy with a constant size being inlined. In freestanding
291*e92ffd9bSMartin Matuska  * environments, the compiler can't assume the implementation of memcpy() is
292*e92ffd9bSMartin Matuska  * standard compliant, so it can't apply its specialized memcpy() inlining
293*e92ffd9bSMartin Matuska  * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
294*e92ffd9bSMartin Matuska  * memcpy() as if it were standard compliant, so it can inline it in freestanding
295*e92ffd9bSMartin Matuska  * environments. This is needed when decompressing the Linux Kernel, for example.
296eda14cbcSMatt Macy  */
297*e92ffd9bSMartin Matuska #if defined(__GNUC__) && (__GNUC__ >= 4)
298*e92ffd9bSMartin Matuska #define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
299eda14cbcSMatt Macy #else
300*e92ffd9bSMartin Matuska #define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
301eda14cbcSMatt Macy #endif
302eda14cbcSMatt Macy 
LZ4_isLittleEndian(void)303*e92ffd9bSMartin Matuska static unsigned LZ4_isLittleEndian(void)
304eda14cbcSMatt Macy {
305*e92ffd9bSMartin Matuska     const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental */
306*e92ffd9bSMartin Matuska     return one.c[0];
307eda14cbcSMatt Macy }
308eda14cbcSMatt Macy 
309eda14cbcSMatt Macy 
310*e92ffd9bSMartin Matuska #if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
311*e92ffd9bSMartin Matuska /* lie to the compiler about data alignment; use with caution */
312*e92ffd9bSMartin Matuska 
LZ4_read16(const void * memPtr)313*e92ffd9bSMartin Matuska static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
314*e92ffd9bSMartin Matuska 
LZ4_write16(void * memPtr,U16 value)315*e92ffd9bSMartin Matuska static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
LZ4_write32(void * memPtr,U32 value)316*e92ffd9bSMartin Matuska static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
317*e92ffd9bSMartin Matuska 
318*e92ffd9bSMartin Matuska #elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
319*e92ffd9bSMartin Matuska 
320*e92ffd9bSMartin Matuska /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
321*e92ffd9bSMartin Matuska /* currently only defined for gcc and icc */
322*e92ffd9bSMartin Matuska typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign;
323*e92ffd9bSMartin Matuska 
LZ4_read16(const void * ptr)324*e92ffd9bSMartin Matuska static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
325*e92ffd9bSMartin Matuska 
LZ4_write32(void * memPtr,U32 value)326*e92ffd9bSMartin Matuska static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
327*e92ffd9bSMartin Matuska 
328*e92ffd9bSMartin Matuska #else  /* safe and portable access using memcpy() */
329*e92ffd9bSMartin Matuska 
LZ4_read16(const void * memPtr)330*e92ffd9bSMartin Matuska static U16 LZ4_read16(const void* memPtr)
331eda14cbcSMatt Macy {
332*e92ffd9bSMartin Matuska     U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
333eda14cbcSMatt Macy }
334eda14cbcSMatt Macy 
LZ4_write32(void * memPtr,U32 value)335*e92ffd9bSMartin Matuska static void LZ4_write32(void* memPtr, U32 value)
336eda14cbcSMatt Macy {
337*e92ffd9bSMartin Matuska     LZ4_memcpy(memPtr, &value, sizeof(value));
338eda14cbcSMatt Macy }
339eda14cbcSMatt Macy 
340*e92ffd9bSMartin Matuska #endif /* LZ4_FORCE_MEMORY_ACCESS */
341eda14cbcSMatt Macy 
LZ4_readLE16(const void * memPtr)342*e92ffd9bSMartin Matuska static U16 LZ4_readLE16(const void* memPtr)
343*e92ffd9bSMartin Matuska {
344*e92ffd9bSMartin Matuska     if (LZ4_isLittleEndian()) {
345*e92ffd9bSMartin Matuska         return LZ4_read16(memPtr);
346*e92ffd9bSMartin Matuska     } else {
347*e92ffd9bSMartin Matuska         const BYTE* p = (const BYTE*)memPtr;
348*e92ffd9bSMartin Matuska         return (U16)((U16)p[0] + (p[1]<<8));
349*e92ffd9bSMartin Matuska     }
350eda14cbcSMatt Macy }
351eda14cbcSMatt Macy 
352*e92ffd9bSMartin Matuska /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
353*e92ffd9bSMartin Matuska LZ4_FORCE_INLINE
LZ4_wildCopy8(void * dstPtr,const void * srcPtr,void * dstEnd)354*e92ffd9bSMartin Matuska void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
355*e92ffd9bSMartin Matuska {
356*e92ffd9bSMartin Matuska     BYTE* d = (BYTE*)dstPtr;
357*e92ffd9bSMartin Matuska     const BYTE* s = (const BYTE*)srcPtr;
358*e92ffd9bSMartin Matuska     BYTE* const e = (BYTE*)dstEnd;
359eda14cbcSMatt Macy 
360*e92ffd9bSMartin Matuska     do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
361eda14cbcSMatt Macy }
362*e92ffd9bSMartin Matuska 
363*e92ffd9bSMartin Matuska static const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
364*e92ffd9bSMartin Matuska static const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
365*e92ffd9bSMartin Matuska 
366*e92ffd9bSMartin Matuska 
367*e92ffd9bSMartin Matuska #ifndef LZ4_FAST_DEC_LOOP
368*e92ffd9bSMartin Matuska #  if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
369*e92ffd9bSMartin Matuska #    define LZ4_FAST_DEC_LOOP 1
370*e92ffd9bSMartin Matuska #  elif defined(__aarch64__) && !defined(__clang__)
371*e92ffd9bSMartin Matuska      /* On aarch64, we disable this optimization for clang because on certain
372*e92ffd9bSMartin Matuska       * mobile chipsets, performance is reduced with clang. For information
373*e92ffd9bSMartin Matuska       * refer to https://github.com/lz4/lz4/pull/707 */
374*e92ffd9bSMartin Matuska #    define LZ4_FAST_DEC_LOOP 1
375*e92ffd9bSMartin Matuska #  else
376*e92ffd9bSMartin Matuska #    define LZ4_FAST_DEC_LOOP 0
377eda14cbcSMatt Macy #  endif
378*e92ffd9bSMartin Matuska #endif
379eda14cbcSMatt Macy 
380*e92ffd9bSMartin Matuska #if LZ4_FAST_DEC_LOOP
381eda14cbcSMatt Macy 
382*e92ffd9bSMartin Matuska LZ4_FORCE_INLINE void
LZ4_memcpy_using_offset_base(BYTE * dstPtr,const BYTE * srcPtr,BYTE * dstEnd,const size_t offset)383*e92ffd9bSMartin Matuska LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
384*e92ffd9bSMartin Matuska {
385*e92ffd9bSMartin Matuska     assert(srcPtr + offset == dstPtr);
386*e92ffd9bSMartin Matuska     if (offset < 8) {
387*e92ffd9bSMartin Matuska         LZ4_write32(dstPtr, 0);   /* silence an msan warning when offset==0 */
388*e92ffd9bSMartin Matuska         dstPtr[0] = srcPtr[0];
389*e92ffd9bSMartin Matuska         dstPtr[1] = srcPtr[1];
390*e92ffd9bSMartin Matuska         dstPtr[2] = srcPtr[2];
391*e92ffd9bSMartin Matuska         dstPtr[3] = srcPtr[3];
392*e92ffd9bSMartin Matuska         srcPtr += inc32table[offset];
393*e92ffd9bSMartin Matuska         LZ4_memcpy(dstPtr+4, srcPtr, 4);
394*e92ffd9bSMartin Matuska         srcPtr -= dec64table[offset];
395*e92ffd9bSMartin Matuska         dstPtr += 8;
396*e92ffd9bSMartin Matuska     } else {
397*e92ffd9bSMartin Matuska         LZ4_memcpy(dstPtr, srcPtr, 8);
398*e92ffd9bSMartin Matuska         dstPtr += 8;
399*e92ffd9bSMartin Matuska         srcPtr += 8;
400*e92ffd9bSMartin Matuska     }
401*e92ffd9bSMartin Matuska 
402*e92ffd9bSMartin Matuska     LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
403*e92ffd9bSMartin Matuska }
404*e92ffd9bSMartin Matuska 
405*e92ffd9bSMartin Matuska /* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
406*e92ffd9bSMartin Matuska  * this version copies two times 16 bytes (instead of one time 32 bytes)
407*e92ffd9bSMartin Matuska  * because it must be compatible with offsets >= 16. */
408*e92ffd9bSMartin Matuska LZ4_FORCE_INLINE void
LZ4_wildCopy32(void * dstPtr,const void * srcPtr,void * dstEnd)409*e92ffd9bSMartin Matuska LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
410*e92ffd9bSMartin Matuska {
411*e92ffd9bSMartin Matuska     BYTE* d = (BYTE*)dstPtr;
412*e92ffd9bSMartin Matuska     const BYTE* s = (const BYTE*)srcPtr;
413*e92ffd9bSMartin Matuska     BYTE* const e = (BYTE*)dstEnd;
414*e92ffd9bSMartin Matuska 
415*e92ffd9bSMartin Matuska     do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
416*e92ffd9bSMartin Matuska }
417*e92ffd9bSMartin Matuska 
418*e92ffd9bSMartin Matuska /* LZ4_memcpy_using_offset()  presumes :
419*e92ffd9bSMartin Matuska  * - dstEnd >= dstPtr + MINMATCH
420*e92ffd9bSMartin Matuska  * - there is at least 8 bytes available to write after dstEnd */
421*e92ffd9bSMartin Matuska LZ4_FORCE_INLINE void
LZ4_memcpy_using_offset(BYTE * dstPtr,const BYTE * srcPtr,BYTE * dstEnd,const size_t offset)422*e92ffd9bSMartin Matuska LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
423*e92ffd9bSMartin Matuska {
424*e92ffd9bSMartin Matuska     BYTE v[8];
425*e92ffd9bSMartin Matuska 
426*e92ffd9bSMartin Matuska     assert(dstEnd >= dstPtr + MINMATCH);
427*e92ffd9bSMartin Matuska 
428*e92ffd9bSMartin Matuska     switch(offset) {
429*e92ffd9bSMartin Matuska     case 1:
430*e92ffd9bSMartin Matuska         MEM_INIT(v, *srcPtr, 8);
431eda14cbcSMatt Macy         break;
432*e92ffd9bSMartin Matuska     case 2:
433*e92ffd9bSMartin Matuska         LZ4_memcpy(v, srcPtr, 2);
434*e92ffd9bSMartin Matuska         LZ4_memcpy(&v[2], srcPtr, 2);
435*e92ffd9bSMartin Matuska         LZ4_memcpy(&v[4], v, 4);
436*e92ffd9bSMartin Matuska         break;
437*e92ffd9bSMartin Matuska     case 4:
438*e92ffd9bSMartin Matuska         LZ4_memcpy(v, srcPtr, 4);
439*e92ffd9bSMartin Matuska         LZ4_memcpy(&v[4], srcPtr, 4);
440*e92ffd9bSMartin Matuska         break;
441*e92ffd9bSMartin Matuska     default:
442*e92ffd9bSMartin Matuska         LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
443*e92ffd9bSMartin Matuska         return;
444eda14cbcSMatt Macy     }
445eda14cbcSMatt Macy 
446*e92ffd9bSMartin Matuska     LZ4_memcpy(dstPtr, v, 8);
447*e92ffd9bSMartin Matuska     dstPtr += 8;
448*e92ffd9bSMartin Matuska     while (dstPtr < dstEnd) {
449*e92ffd9bSMartin Matuska         LZ4_memcpy(dstPtr, v, 8);
450*e92ffd9bSMartin Matuska         dstPtr += 8;
451eda14cbcSMatt Macy     }
452eda14cbcSMatt Macy }
453eda14cbcSMatt Macy #endif
454eda14cbcSMatt Macy 
455eda14cbcSMatt Macy 
456*e92ffd9bSMartin Matuska /*-************************************
457*e92ffd9bSMartin Matuska *  Local Structures and types
458*e92ffd9bSMartin Matuska **************************************/
459*e92ffd9bSMartin Matuska typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
460eda14cbcSMatt Macy 
461*e92ffd9bSMartin Matuska /**
462*e92ffd9bSMartin Matuska  * This enum distinguishes several different modes of accessing previous
463*e92ffd9bSMartin Matuska  * content in the stream.
464eda14cbcSMatt Macy  *
465*e92ffd9bSMartin Matuska  * - noDict        : There is no preceding content.
466*e92ffd9bSMartin Matuska  * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
467*e92ffd9bSMartin Matuska  *                   blob being compressed are valid and refer to the preceding
468*e92ffd9bSMartin Matuska  *                   content (of length ctx->dictSize), which is available
469*e92ffd9bSMartin Matuska  *                   contiguously preceding in memory the content currently
470*e92ffd9bSMartin Matuska  *                   being compressed.
471*e92ffd9bSMartin Matuska  * - usingExtDict  : Like withPrefix64k, but the preceding content is somewhere
472*e92ffd9bSMartin Matuska  *                   else in memory, starting at ctx->dictionary with length
473*e92ffd9bSMartin Matuska  *                   ctx->dictSize.
474*e92ffd9bSMartin Matuska  * - usingDictCtx  : Like usingExtDict, but everything concerning the preceding
475*e92ffd9bSMartin Matuska  *                   content is in a separate context, pointed to by
476*e92ffd9bSMartin Matuska  *                   ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table
477*e92ffd9bSMartin Matuska  *                   entries in the current context that refer to positions
478*e92ffd9bSMartin Matuska  *                   preceding the beginning of the current compression are
479*e92ffd9bSMartin Matuska  *                   ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
480*e92ffd9bSMartin Matuska  *                   ->dictSize describe the location and size of the preceding
481*e92ffd9bSMartin Matuska  *                   content, and matches are found by looking in the ctx
482*e92ffd9bSMartin Matuska  *                   ->dictCtx->hashTable.
483eda14cbcSMatt Macy  */
484*e92ffd9bSMartin Matuska typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
485*e92ffd9bSMartin Matuska typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
486eda14cbcSMatt Macy 
487*e92ffd9bSMartin Matuska /*-*******************************
488*e92ffd9bSMartin Matuska  *  Decompression functions
489*e92ffd9bSMartin Matuska  ********************************/
490eda14cbcSMatt Macy 
491*e92ffd9bSMartin Matuska typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
492*e92ffd9bSMartin Matuska typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
493*e92ffd9bSMartin Matuska 
494*e92ffd9bSMartin Matuska typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error;
495*e92ffd9bSMartin Matuska 
496*e92ffd9bSMartin Matuska LZ4_FORCE_INLINE unsigned
read_variable_length(const BYTE ** ip,const BYTE * lencheck,int loop_check,int initial_check,variable_length_error * error)497*e92ffd9bSMartin Matuska read_variable_length(const BYTE**ip, const BYTE* lencheck,
498*e92ffd9bSMartin Matuska                      int loop_check, int initial_check,
499*e92ffd9bSMartin Matuska                      variable_length_error* error)
500eda14cbcSMatt Macy {
501*e92ffd9bSMartin Matuska     U32 length = 0;
502*e92ffd9bSMartin Matuska     U32 s;
503*e92ffd9bSMartin Matuska     if (initial_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
504*e92ffd9bSMartin Matuska         *error = initial_error;
505*e92ffd9bSMartin Matuska         return length;
506*e92ffd9bSMartin Matuska     }
507*e92ffd9bSMartin Matuska     do {
508*e92ffd9bSMartin Matuska         s = **ip;
509*e92ffd9bSMartin Matuska         (*ip)++;
510*e92ffd9bSMartin Matuska         length += s;
511*e92ffd9bSMartin Matuska         if (loop_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
512*e92ffd9bSMartin Matuska             *error = loop_error;
513*e92ffd9bSMartin Matuska             return length;
514*e92ffd9bSMartin Matuska         }
515*e92ffd9bSMartin Matuska     } while (s==255);
516eda14cbcSMatt Macy 
517*e92ffd9bSMartin Matuska     return length;
518*e92ffd9bSMartin Matuska }
519*e92ffd9bSMartin Matuska 
520*e92ffd9bSMartin Matuska #define	LZ4_STATIC_ASSERT(c)	ASSERT(c)
521*e92ffd9bSMartin Matuska 
522*e92ffd9bSMartin Matuska 
523*e92ffd9bSMartin Matuska /*! LZ4_decompress_generic() :
524*e92ffd9bSMartin Matuska  *  This generic decompression function covers all use cases.
525*e92ffd9bSMartin Matuska  *  It shall be instantiated several times, using different sets of directives.
526*e92ffd9bSMartin Matuska  *  Note that it is important for performance that this function really get inlined,
527*e92ffd9bSMartin Matuska  *  in order to remove useless branches during compilation optimization.
528*e92ffd9bSMartin Matuska  */
529*e92ffd9bSMartin Matuska LZ4_FORCE_INLINE int
LZ4_decompress_generic(const char * const src,char * const dst,int srcSize,int outputSize,endCondition_directive endOnInput,earlyEnd_directive partialDecoding,dict_directive dict,const BYTE * const lowPrefix,const BYTE * const dictStart,const size_t dictSize)530*e92ffd9bSMartin Matuska LZ4_decompress_generic(
531*e92ffd9bSMartin Matuska                  const char* const src,
532*e92ffd9bSMartin Matuska                  char* const dst,
533*e92ffd9bSMartin Matuska                  int srcSize,
534*e92ffd9bSMartin Matuska                  int outputSize,         /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
535*e92ffd9bSMartin Matuska 
536*e92ffd9bSMartin Matuska                  endCondition_directive endOnInput,   /* endOnOutputSize, endOnInputSize */
537*e92ffd9bSMartin Matuska                  earlyEnd_directive partialDecoding,  /* full, partial */
538*e92ffd9bSMartin Matuska                  dict_directive dict,                 /* noDict, withPrefix64k, usingExtDict */
539*e92ffd9bSMartin Matuska                  const BYTE* const lowPrefix,  /* always <= dst, == dst when no prefix */
540*e92ffd9bSMartin Matuska                  const BYTE* const dictStart,  /* only if dict==usingExtDict */
541*e92ffd9bSMartin Matuska                  const size_t dictSize         /* note : = 0 if noDict */
542*e92ffd9bSMartin Matuska                  )
543*e92ffd9bSMartin Matuska {
544*e92ffd9bSMartin Matuska     if ((src == NULL) || (outputSize < 0)) { return -1; }
545*e92ffd9bSMartin Matuska 
546*e92ffd9bSMartin Matuska     {   const BYTE* ip = (const BYTE*) src;
547*e92ffd9bSMartin Matuska         const BYTE* const iend = ip + srcSize;
548*e92ffd9bSMartin Matuska 
549*e92ffd9bSMartin Matuska         BYTE* op = (BYTE*) dst;
550*e92ffd9bSMartin Matuska         BYTE* const oend = op + outputSize;
551eda14cbcSMatt Macy         BYTE* cpy;
552eda14cbcSMatt Macy 
553*e92ffd9bSMartin Matuska         const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
554*e92ffd9bSMartin Matuska 
555*e92ffd9bSMartin Matuska         const int safeDecode = (endOnInput==endOnInputSize);
556*e92ffd9bSMartin Matuska         const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
557*e92ffd9bSMartin Matuska 
558*e92ffd9bSMartin Matuska 
559*e92ffd9bSMartin Matuska         /* Set up the "end" pointers for the shortcut. */
560*e92ffd9bSMartin Matuska         const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
561*e92ffd9bSMartin Matuska         const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
562*e92ffd9bSMartin Matuska 
563*e92ffd9bSMartin Matuska         const BYTE* match;
564*e92ffd9bSMartin Matuska         size_t offset;
565eda14cbcSMatt Macy         unsigned token;
566eda14cbcSMatt Macy         size_t length;
567eda14cbcSMatt Macy 
568*e92ffd9bSMartin Matuska 
569*e92ffd9bSMartin Matuska         DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
570*e92ffd9bSMartin Matuska 
571*e92ffd9bSMartin Matuska         /* Special cases */
572*e92ffd9bSMartin Matuska         assert(lowPrefix <= op);
573*e92ffd9bSMartin Matuska         if ((endOnInput) && (unlikely(outputSize==0))) {
574*e92ffd9bSMartin Matuska             /* Empty output buffer */
575*e92ffd9bSMartin Matuska             if (partialDecoding) return 0;
576*e92ffd9bSMartin Matuska             return ((srcSize==1) && (*ip==0)) ? 0 : -1;
577*e92ffd9bSMartin Matuska         }
578*e92ffd9bSMartin Matuska         if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); }
579*e92ffd9bSMartin Matuska         if ((endOnInput) && unlikely(srcSize==0)) { return -1; }
580*e92ffd9bSMartin Matuska 
581*e92ffd9bSMartin Matuska 	/* Currently the fast loop shows a regression on qualcomm arm chips. */
582*e92ffd9bSMartin Matuska #if LZ4_FAST_DEC_LOOP
583*e92ffd9bSMartin Matuska         if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
584*e92ffd9bSMartin Matuska             DEBUGLOG(6, "skip fast decode loop");
585*e92ffd9bSMartin Matuska             goto safe_decode;
586*e92ffd9bSMartin Matuska         }
587*e92ffd9bSMartin Matuska 
588*e92ffd9bSMartin Matuska         /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */
589*e92ffd9bSMartin Matuska         while (1) {
590*e92ffd9bSMartin Matuska             /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
591*e92ffd9bSMartin Matuska             assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
592*e92ffd9bSMartin Matuska             if (endOnInput) { assert(ip < iend); }
593eda14cbcSMatt Macy             token = *ip++;
594*e92ffd9bSMartin Matuska             length = token >> ML_BITS;  /* literal length */
595*e92ffd9bSMartin Matuska 
596*e92ffd9bSMartin Matuska             assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
597*e92ffd9bSMartin Matuska 
598*e92ffd9bSMartin Matuska             /* decode literal length */
599*e92ffd9bSMartin Matuska             if (length == RUN_MASK) {
600*e92ffd9bSMartin Matuska                 variable_length_error error = ok;
601*e92ffd9bSMartin Matuska                 length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
602*e92ffd9bSMartin Matuska                 if (error == initial_error) { goto _output_error; }
603*e92ffd9bSMartin Matuska                 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
604*e92ffd9bSMartin Matuska                 if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
605*e92ffd9bSMartin Matuska 
606eda14cbcSMatt Macy                 /* copy literals */
607eda14cbcSMatt Macy                 cpy = op+length;
608*e92ffd9bSMartin Matuska                 LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
609*e92ffd9bSMartin Matuska                 if (endOnInput) {  /* LZ4_decompress_safe() */
610*e92ffd9bSMartin Matuska                     if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
611*e92ffd9bSMartin Matuska                     LZ4_wildCopy32(op, ip, cpy);
612*e92ffd9bSMartin Matuska                 } else {   /* LZ4_decompress_fast() */
613*e92ffd9bSMartin Matuska                     if (cpy>oend-8) { goto safe_literal_copy; }
614*e92ffd9bSMartin Matuska                     LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
615*e92ffd9bSMartin Matuska                                                  * it doesn't know input length, and only relies on end-of-block properties */
616eda14cbcSMatt Macy                 }
617*e92ffd9bSMartin Matuska                 ip += length; op = cpy;
618*e92ffd9bSMartin Matuska             } else {
619*e92ffd9bSMartin Matuska                 cpy = op+length;
620*e92ffd9bSMartin Matuska                 if (endOnInput) {  /* LZ4_decompress_safe() */
621*e92ffd9bSMartin Matuska                     DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
622*e92ffd9bSMartin Matuska                     /* We don't need to check oend, since we check it once for each loop below */
623*e92ffd9bSMartin Matuska                     if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
624*e92ffd9bSMartin Matuska                     /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
625*e92ffd9bSMartin Matuska                     LZ4_memcpy(op, ip, 16);
626*e92ffd9bSMartin Matuska                 } else {  /* LZ4_decompress_fast() */
627*e92ffd9bSMartin Matuska                     /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
628*e92ffd9bSMartin Matuska                      * it doesn't know input length, and relies on end-of-block properties */
629*e92ffd9bSMartin Matuska                     LZ4_memcpy(op, ip, 8);
630*e92ffd9bSMartin Matuska                     if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); }
631*e92ffd9bSMartin Matuska                 }
632*e92ffd9bSMartin Matuska                 ip += length; op = cpy;
633*e92ffd9bSMartin Matuska             }
634eda14cbcSMatt Macy 
635eda14cbcSMatt Macy             /* get offset */
636*e92ffd9bSMartin Matuska             offset = LZ4_readLE16(ip); ip+=2;
637*e92ffd9bSMartin Matuska             match = op - offset;
638*e92ffd9bSMartin Matuska             assert(match <= op);
639eda14cbcSMatt Macy 
640eda14cbcSMatt Macy             /* get matchlength */
641*e92ffd9bSMartin Matuska             length = token & ML_MASK;
642*e92ffd9bSMartin Matuska 
643*e92ffd9bSMartin Matuska             if (length == ML_MASK) {
644*e92ffd9bSMartin Matuska                 variable_length_error error = ok;
645*e92ffd9bSMartin Matuska                 if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
646*e92ffd9bSMartin Matuska                 length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
647*e92ffd9bSMartin Matuska                 if (error != ok) { goto _output_error; }
648*e92ffd9bSMartin Matuska                 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
649*e92ffd9bSMartin Matuska                 length += MINMATCH;
650*e92ffd9bSMartin Matuska                 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
651*e92ffd9bSMartin Matuska                     goto safe_match_copy;
652*e92ffd9bSMartin Matuska                 }
653*e92ffd9bSMartin Matuska             } else {
654*e92ffd9bSMartin Matuska                 length += MINMATCH;
655*e92ffd9bSMartin Matuska                 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
656*e92ffd9bSMartin Matuska                     goto safe_match_copy;
657*e92ffd9bSMartin Matuska                 }
658*e92ffd9bSMartin Matuska 
659*e92ffd9bSMartin Matuska                 /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */
660*e92ffd9bSMartin Matuska                 if ((dict == withPrefix64k) || (match >= lowPrefix)) {
661*e92ffd9bSMartin Matuska                     if (offset >= 8) {
662*e92ffd9bSMartin Matuska                         assert(match >= lowPrefix);
663*e92ffd9bSMartin Matuska                         assert(match <= op);
664*e92ffd9bSMartin Matuska                         assert(op + 18 <= oend);
665*e92ffd9bSMartin Matuska 
666*e92ffd9bSMartin Matuska                         LZ4_memcpy(op, match, 8);
667*e92ffd9bSMartin Matuska                         LZ4_memcpy(op+8, match+8, 8);
668*e92ffd9bSMartin Matuska                         LZ4_memcpy(op+16, match+16, 2);
669*e92ffd9bSMartin Matuska                         op += length;
670eda14cbcSMatt Macy                         continue;
671*e92ffd9bSMartin Matuska             }   }   }
672*e92ffd9bSMartin Matuska 
673*e92ffd9bSMartin Matuska             if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
674*e92ffd9bSMartin Matuska             /* match starting within external dictionary */
675*e92ffd9bSMartin Matuska             if ((dict==usingExtDict) && (match < lowPrefix)) {
676*e92ffd9bSMartin Matuska                 if (unlikely(op+length > oend-LASTLITERALS)) {
677*e92ffd9bSMartin Matuska                     if (partialDecoding) {
678*e92ffd9bSMartin Matuska                         DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
679*e92ffd9bSMartin Matuska                         length = MIN(length, (size_t)(oend-op));
680*e92ffd9bSMartin Matuska                     } else {
681*e92ffd9bSMartin Matuska                         goto _output_error;  /* end-of-block condition violated */
682*e92ffd9bSMartin Matuska                 }   }
683*e92ffd9bSMartin Matuska 
684*e92ffd9bSMartin Matuska                 if (length <= (size_t)(lowPrefix-match)) {
685*e92ffd9bSMartin Matuska                     /* match fits entirely within external dictionary : just copy */
686*e92ffd9bSMartin Matuska                     memmove(op, dictEnd - (lowPrefix-match), length);
687*e92ffd9bSMartin Matuska                     op += length;
688*e92ffd9bSMartin Matuska                 } else {
689*e92ffd9bSMartin Matuska                     /* match stretches into both external dictionary and current block */
690*e92ffd9bSMartin Matuska                     size_t const copySize = (size_t)(lowPrefix - match);
691*e92ffd9bSMartin Matuska                     size_t const restSize = length - copySize;
692*e92ffd9bSMartin Matuska                     LZ4_memcpy(op, dictEnd - copySize, copySize);
693*e92ffd9bSMartin Matuska                     op += copySize;
694*e92ffd9bSMartin Matuska                     if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
695*e92ffd9bSMartin Matuska                         BYTE* const endOfMatch = op + restSize;
696*e92ffd9bSMartin Matuska                         const BYTE* copyFrom = lowPrefix;
697*e92ffd9bSMartin Matuska                         while (op < endOfMatch) { *op++ = *copyFrom++; }
698*e92ffd9bSMartin Matuska                     } else {
699*e92ffd9bSMartin Matuska                         LZ4_memcpy(op, lowPrefix, restSize);
700*e92ffd9bSMartin Matuska                         op += restSize;
701*e92ffd9bSMartin Matuska                 }   }
702*e92ffd9bSMartin Matuska                 continue;
703*e92ffd9bSMartin Matuska             }
704*e92ffd9bSMartin Matuska 
705*e92ffd9bSMartin Matuska             /* copy match within block */
706*e92ffd9bSMartin Matuska             cpy = op + length;
707*e92ffd9bSMartin Matuska 
708*e92ffd9bSMartin Matuska             assert((op <= oend) && (oend-op >= 32));
709*e92ffd9bSMartin Matuska             if (unlikely(offset<16)) {
710*e92ffd9bSMartin Matuska                 LZ4_memcpy_using_offset(op, match, cpy, offset);
711*e92ffd9bSMartin Matuska             } else {
712*e92ffd9bSMartin Matuska                 LZ4_wildCopy32(op, match, cpy);
713*e92ffd9bSMartin Matuska             }
714*e92ffd9bSMartin Matuska 
715*e92ffd9bSMartin Matuska             op = cpy;   /* wildcopy correction */
716*e92ffd9bSMartin Matuska         }
717*e92ffd9bSMartin Matuska     safe_decode:
718*e92ffd9bSMartin Matuska #endif
719*e92ffd9bSMartin Matuska 
720*e92ffd9bSMartin Matuska         /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
721*e92ffd9bSMartin Matuska         while (1) {
722*e92ffd9bSMartin Matuska             token = *ip++;
723*e92ffd9bSMartin Matuska             length = token >> ML_BITS;  /* literal length */
724*e92ffd9bSMartin Matuska 
725*e92ffd9bSMartin Matuska             assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
726*e92ffd9bSMartin Matuska 
727*e92ffd9bSMartin Matuska             /* A two-stage shortcut for the most common case:
728*e92ffd9bSMartin Matuska              * 1) If the literal length is 0..14, and there is enough space,
729*e92ffd9bSMartin Matuska              * enter the shortcut and copy 16 bytes on behalf of the literals
730*e92ffd9bSMartin Matuska              * (in the fast mode, only 8 bytes can be safely copied this way).
731*e92ffd9bSMartin Matuska              * 2) Further if the match length is 4..18, copy 18 bytes in a similar
732*e92ffd9bSMartin Matuska              * manner; but we ensure that there's enough space in the output for
733*e92ffd9bSMartin Matuska              * those 18 bytes earlier, upon entering the shortcut (in other words,
734*e92ffd9bSMartin Matuska              * there is a combined check for both stages).
735*e92ffd9bSMartin Matuska              */
736*e92ffd9bSMartin Matuska             if ( (endOnInput ? length != RUN_MASK : length <= 8)
737*e92ffd9bSMartin Matuska                 /* strictly "less than" on input, to re-enter the loop with at least one byte */
738*e92ffd9bSMartin Matuska               && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
739*e92ffd9bSMartin Matuska                 /* Copy the literals */
740*e92ffd9bSMartin Matuska                 LZ4_memcpy(op, ip, endOnInput ? 16 : 8);
741*e92ffd9bSMartin Matuska                 op += length; ip += length;
742*e92ffd9bSMartin Matuska 
743*e92ffd9bSMartin Matuska                 /* The second stage: prepare for match copying, decode full info.
744*e92ffd9bSMartin Matuska                  * If it doesn't work out, the info won't be wasted. */
745*e92ffd9bSMartin Matuska                 length = token & ML_MASK; /* match length */
746*e92ffd9bSMartin Matuska                 offset = LZ4_readLE16(ip); ip += 2;
747*e92ffd9bSMartin Matuska                 match = op - offset;
748*e92ffd9bSMartin Matuska                 assert(match <= op); /* check overflow */
749*e92ffd9bSMartin Matuska 
750*e92ffd9bSMartin Matuska                 /* Do not deal with overlapping matches. */
751*e92ffd9bSMartin Matuska                 if ( (length != ML_MASK)
752*e92ffd9bSMartin Matuska                   && (offset >= 8)
753*e92ffd9bSMartin Matuska                   && (dict==withPrefix64k || match >= lowPrefix) ) {
754*e92ffd9bSMartin Matuska                     /* Copy the match. */
755*e92ffd9bSMartin Matuska                     LZ4_memcpy(op + 0, match + 0, 8);
756*e92ffd9bSMartin Matuska                     LZ4_memcpy(op + 8, match + 8, 8);
757*e92ffd9bSMartin Matuska                     LZ4_memcpy(op +16, match +16, 2);
758*e92ffd9bSMartin Matuska                     op += length + MINMATCH;
759*e92ffd9bSMartin Matuska                     /* Both stages worked, load the next token. */
760*e92ffd9bSMartin Matuska                     continue;
761*e92ffd9bSMartin Matuska                 }
762*e92ffd9bSMartin Matuska 
763*e92ffd9bSMartin Matuska                 /* The second stage didn't work out, but the info is ready.
764*e92ffd9bSMartin Matuska                  * Propel it right to the point of match copying. */
765*e92ffd9bSMartin Matuska                 goto _copy_match;
766*e92ffd9bSMartin Matuska             }
767*e92ffd9bSMartin Matuska 
768*e92ffd9bSMartin Matuska             /* decode literal length */
769*e92ffd9bSMartin Matuska             if (length == RUN_MASK) {
770*e92ffd9bSMartin Matuska                 variable_length_error error = ok;
771*e92ffd9bSMartin Matuska                 length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
772*e92ffd9bSMartin Matuska                 if (error == initial_error) { goto _output_error; }
773*e92ffd9bSMartin Matuska                 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
774*e92ffd9bSMartin Matuska                 if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
775*e92ffd9bSMartin Matuska             }
776*e92ffd9bSMartin Matuska 
777*e92ffd9bSMartin Matuska             /* copy literals */
778*e92ffd9bSMartin Matuska             cpy = op+length;
779*e92ffd9bSMartin Matuska #if LZ4_FAST_DEC_LOOP
780*e92ffd9bSMartin Matuska         safe_literal_copy:
781*e92ffd9bSMartin Matuska #endif
782*e92ffd9bSMartin Matuska             LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
783*e92ffd9bSMartin Matuska             if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
784*e92ffd9bSMartin Matuska               || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
785*e92ffd9bSMartin Matuska             {
786*e92ffd9bSMartin Matuska                 /* We've either hit the input parsing restriction or the output parsing restriction.
787*e92ffd9bSMartin Matuska                  * In the normal scenario, decoding a full block, it must be the last sequence,
788*e92ffd9bSMartin Matuska                  * otherwise it's an error (invalid input or dimensions).
789*e92ffd9bSMartin Matuska                  * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
790*e92ffd9bSMartin Matuska                  */
791*e92ffd9bSMartin Matuska                 if (partialDecoding) {
792*e92ffd9bSMartin Matuska                     /* Since we are partial decoding we may be in this block because of the output parsing
793*e92ffd9bSMartin Matuska                      * restriction, which is not valid since the output buffer is allowed to be undersized.
794*e92ffd9bSMartin Matuska                      */
795*e92ffd9bSMartin Matuska                     assert(endOnInput);
796*e92ffd9bSMartin Matuska                     DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
797*e92ffd9bSMartin Matuska                     DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
798*e92ffd9bSMartin Matuska                     DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
799*e92ffd9bSMartin Matuska                     DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
800*e92ffd9bSMartin Matuska                     /* Finishing in the middle of a literals segment,
801*e92ffd9bSMartin Matuska                      * due to lack of input.
802*e92ffd9bSMartin Matuska                      */
803*e92ffd9bSMartin Matuska                     if (ip+length > iend) {
804*e92ffd9bSMartin Matuska                         length = (size_t)(iend-ip);
805*e92ffd9bSMartin Matuska                         cpy = op + length;
806*e92ffd9bSMartin Matuska                     }
807*e92ffd9bSMartin Matuska                     /* Finishing in the middle of a literals segment,
808*e92ffd9bSMartin Matuska                      * due to lack of output space.
809*e92ffd9bSMartin Matuska                      */
810*e92ffd9bSMartin Matuska                     if (cpy > oend) {
811*e92ffd9bSMartin Matuska                         cpy = oend;
812*e92ffd9bSMartin Matuska                         assert(op<=oend);
813*e92ffd9bSMartin Matuska                         length = (size_t)(oend-op);
814*e92ffd9bSMartin Matuska                     }
815*e92ffd9bSMartin Matuska                 } else {
816*e92ffd9bSMartin Matuska                     /* We must be on the last sequence because of the parsing limitations so check
817*e92ffd9bSMartin Matuska                      * that we exactly regenerate the original size (must be exact when !endOnInput).
818*e92ffd9bSMartin Matuska                      */
819*e92ffd9bSMartin Matuska                     if ((!endOnInput) && (cpy != oend)) { goto _output_error; }
820*e92ffd9bSMartin Matuska                      /* We must be on the last sequence (or invalid) because of the parsing limitations
821*e92ffd9bSMartin Matuska                       * so check that we exactly consume the input and don't overrun the output buffer.
822*e92ffd9bSMartin Matuska                       */
823*e92ffd9bSMartin Matuska                     if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) {
824*e92ffd9bSMartin Matuska                         DEBUGLOG(6, "should have been last run of literals")
825*e92ffd9bSMartin Matuska                         DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
826*e92ffd9bSMartin Matuska                         DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
827*e92ffd9bSMartin Matuska                         goto _output_error;
828*e92ffd9bSMartin Matuska                     }
829*e92ffd9bSMartin Matuska                 }
830*e92ffd9bSMartin Matuska                 memmove(op, ip, length);  /* supports overlapping memory regions; only matters for in-place decompression scenarios */
831*e92ffd9bSMartin Matuska                 ip += length;
832*e92ffd9bSMartin Matuska                 op += length;
833*e92ffd9bSMartin Matuska                 /* Necessarily EOF when !partialDecoding.
834*e92ffd9bSMartin Matuska                  * When partialDecoding, it is EOF if we've either
835*e92ffd9bSMartin Matuska                  * filled the output buffer or
836*e92ffd9bSMartin Matuska                  * can't proceed with reading an offset for following match.
837*e92ffd9bSMartin Matuska                  */
838*e92ffd9bSMartin Matuska                 if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
839eda14cbcSMatt Macy                     break;
840eda14cbcSMatt Macy                 }
841eda14cbcSMatt Macy             } else {
842*e92ffd9bSMartin Matuska                 LZ4_wildCopy8(op, ip, cpy);   /* may overwrite up to WILDCOPYLENGTH beyond cpy */
843*e92ffd9bSMartin Matuska                 ip += length; op = cpy;
844eda14cbcSMatt Macy             }
845*e92ffd9bSMartin Matuska 
846*e92ffd9bSMartin Matuska             /* get offset */
847*e92ffd9bSMartin Matuska             offset = LZ4_readLE16(ip); ip+=2;
848*e92ffd9bSMartin Matuska             match = op - offset;
849*e92ffd9bSMartin Matuska 
850*e92ffd9bSMartin Matuska             /* get matchlength */
851*e92ffd9bSMartin Matuska             length = token & ML_MASK;
852*e92ffd9bSMartin Matuska 
853*e92ffd9bSMartin Matuska     _copy_match:
854*e92ffd9bSMartin Matuska             if (length == ML_MASK) {
855*e92ffd9bSMartin Matuska               variable_length_error error = ok;
856*e92ffd9bSMartin Matuska               length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
857*e92ffd9bSMartin Matuska               if (error != ok) goto _output_error;
858*e92ffd9bSMartin Matuska                 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
859*e92ffd9bSMartin Matuska             }
860*e92ffd9bSMartin Matuska             length += MINMATCH;
861*e92ffd9bSMartin Matuska 
862*e92ffd9bSMartin Matuska #if LZ4_FAST_DEC_LOOP
863*e92ffd9bSMartin Matuska         safe_match_copy:
864eda14cbcSMatt Macy #endif
865*e92ffd9bSMartin Matuska             if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
866*e92ffd9bSMartin Matuska             /* match starting within external dictionary */
867*e92ffd9bSMartin Matuska             if ((dict==usingExtDict) && (match < lowPrefix)) {
868*e92ffd9bSMartin Matuska                 if (unlikely(op+length > oend-LASTLITERALS)) {
869*e92ffd9bSMartin Matuska                     if (partialDecoding) length = MIN(length, (size_t)(oend-op));
870*e92ffd9bSMartin Matuska                     else goto _output_error;   /* doesn't respect parsing restriction */
871*e92ffd9bSMartin Matuska                 }
872*e92ffd9bSMartin Matuska 
873*e92ffd9bSMartin Matuska                 if (length <= (size_t)(lowPrefix-match)) {
874*e92ffd9bSMartin Matuska                     /* match fits entirely within external dictionary : just copy */
875*e92ffd9bSMartin Matuska                     memmove(op, dictEnd - (lowPrefix-match), length);
876*e92ffd9bSMartin Matuska                     op += length;
877*e92ffd9bSMartin Matuska                 } else {
878*e92ffd9bSMartin Matuska                     /* match stretches into both external dictionary and current block */
879*e92ffd9bSMartin Matuska                     size_t const copySize = (size_t)(lowPrefix - match);
880*e92ffd9bSMartin Matuska                     size_t const restSize = length - copySize;
881*e92ffd9bSMartin Matuska                     LZ4_memcpy(op, dictEnd - copySize, copySize);
882*e92ffd9bSMartin Matuska                     op += copySize;
883*e92ffd9bSMartin Matuska                     if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
884*e92ffd9bSMartin Matuska                         BYTE* const endOfMatch = op + restSize;
885*e92ffd9bSMartin Matuska                         const BYTE* copyFrom = lowPrefix;
886*e92ffd9bSMartin Matuska                         while (op < endOfMatch) *op++ = *copyFrom++;
887*e92ffd9bSMartin Matuska                     } else {
888*e92ffd9bSMartin Matuska                         LZ4_memcpy(op, lowPrefix, restSize);
889*e92ffd9bSMartin Matuska                         op += restSize;
890*e92ffd9bSMartin Matuska                 }   }
891eda14cbcSMatt Macy                 continue;
892eda14cbcSMatt Macy             }
893*e92ffd9bSMartin Matuska             assert(match >= lowPrefix);
894*e92ffd9bSMartin Matuska 
895*e92ffd9bSMartin Matuska             /* copy match within block */
896*e92ffd9bSMartin Matuska             cpy = op + length;
897*e92ffd9bSMartin Matuska 
898*e92ffd9bSMartin Matuska             /* partialDecoding : may end anywhere within the block */
899*e92ffd9bSMartin Matuska             assert(op<=oend);
900*e92ffd9bSMartin Matuska             if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
901*e92ffd9bSMartin Matuska                 size_t const mlen = MIN(length, (size_t)(oend-op));
902*e92ffd9bSMartin Matuska                 const BYTE* const matchEnd = match + mlen;
903*e92ffd9bSMartin Matuska                 BYTE* const copyEnd = op + mlen;
904*e92ffd9bSMartin Matuska                 if (matchEnd > op) {   /* overlap copy */
905*e92ffd9bSMartin Matuska                     while (op < copyEnd) { *op++ = *match++; }
906*e92ffd9bSMartin Matuska                 } else {
907*e92ffd9bSMartin Matuska                     LZ4_memcpy(op, match, mlen);
908*e92ffd9bSMartin Matuska                 }
909*e92ffd9bSMartin Matuska                 op = copyEnd;
910*e92ffd9bSMartin Matuska                 if (op == oend) { break; }
911*e92ffd9bSMartin Matuska                 continue;
912*e92ffd9bSMartin Matuska             }
913*e92ffd9bSMartin Matuska 
914*e92ffd9bSMartin Matuska             if (unlikely(offset<8)) {
915*e92ffd9bSMartin Matuska                 LZ4_write32(op, 0);   /* silence msan warning when offset==0 */
916*e92ffd9bSMartin Matuska                 op[0] = match[0];
917*e92ffd9bSMartin Matuska                 op[1] = match[1];
918*e92ffd9bSMartin Matuska                 op[2] = match[2];
919*e92ffd9bSMartin Matuska                 op[3] = match[3];
920*e92ffd9bSMartin Matuska                 match += inc32table[offset];
921*e92ffd9bSMartin Matuska                 LZ4_memcpy(op+4, match, 4);
922*e92ffd9bSMartin Matuska                 match -= dec64table[offset];
923*e92ffd9bSMartin Matuska             } else {
924*e92ffd9bSMartin Matuska                 LZ4_memcpy(op, match, 8);
925*e92ffd9bSMartin Matuska                 match += 8;
926*e92ffd9bSMartin Matuska             }
927*e92ffd9bSMartin Matuska             op += 8;
928*e92ffd9bSMartin Matuska 
929*e92ffd9bSMartin Matuska             if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
930*e92ffd9bSMartin Matuska                 BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
931*e92ffd9bSMartin Matuska                 if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
932*e92ffd9bSMartin Matuska                 if (op < oCopyLimit) {
933*e92ffd9bSMartin Matuska                     LZ4_wildCopy8(op, match, oCopyLimit);
934*e92ffd9bSMartin Matuska                     match += oCopyLimit - op;
935*e92ffd9bSMartin Matuska                     op = oCopyLimit;
936*e92ffd9bSMartin Matuska                 }
937*e92ffd9bSMartin Matuska                 while (op < cpy) { *op++ = *match++; }
938*e92ffd9bSMartin Matuska             } else {
939*e92ffd9bSMartin Matuska                 LZ4_memcpy(op, match, 8);
940*e92ffd9bSMartin Matuska                 if (length > 16)  { LZ4_wildCopy8(op+8, match+8, cpy); }
941*e92ffd9bSMartin Matuska             }
942*e92ffd9bSMartin Matuska             op = cpy;   /* wildcopy correction */
943eda14cbcSMatt Macy         }
944eda14cbcSMatt Macy 
945eda14cbcSMatt Macy         /* end of decoding */
946*e92ffd9bSMartin Matuska         if (endOnInput) {
947*e92ffd9bSMartin Matuska             DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
948*e92ffd9bSMartin Matuska            return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
949*e92ffd9bSMartin Matuska        } else {
950*e92ffd9bSMartin Matuska            return (int) (((const char*)ip)-src);   /* Nb of input bytes read */
951*e92ffd9bSMartin Matuska        }
952eda14cbcSMatt Macy 
953*e92ffd9bSMartin Matuska         /* Overflow error detected */
954eda14cbcSMatt Macy     _output_error:
955*e92ffd9bSMartin Matuska         return (int) (-(((const char*)ip)-src))-1;
956*e92ffd9bSMartin Matuska     }
957eda14cbcSMatt Macy }
958eda14cbcSMatt Macy 
959aebc9683SMateusz Guzik /*
960*e92ffd9bSMartin Matuska  * LZ4_uncompress_unknownOutputSize() :
961*e92ffd9bSMartin Matuska  * 	isize  : is the input size, therefore the compressed size
962*e92ffd9bSMartin Matuska  * 	maxOutputSize : is the size of the destination buffer (which must be
963*e92ffd9bSMartin Matuska  * 		already allocated)
964*e92ffd9bSMartin Matuska  * 	return : the number of bytes decoded in the destination buffer
965*e92ffd9bSMartin Matuska  * 		(necessarily <= maxOutputSize). If the source stream is
966*e92ffd9bSMartin Matuska  * 		malformed, the function will stop decoding and return a
967*e92ffd9bSMartin Matuska  * 		negative result, indicating the byte position of the faulty
968*e92ffd9bSMartin Matuska  * 		instruction. This function never writes beyond dest +
969*e92ffd9bSMartin Matuska  * 		maxOutputSize, and is therefore protected against malicious
970*e92ffd9bSMartin Matuska  * 		data packets.
971*e92ffd9bSMartin Matuska  * 	note   : Destination buffer must be already allocated.
972*e92ffd9bSMartin Matuska  *		This version is slightly slower than real_LZ4_uncompress()
973*e92ffd9bSMartin Matuska  *
974aebc9683SMateusz Guzik  */
975aebc9683SMateusz Guzik 
976*e92ffd9bSMartin Matuska /*
977*e92ffd9bSMartin Matuska  * Note: In upstream code, LZ4_uncompress_unknownOutputSize is now a legacy
978*e92ffd9bSMartin Matuska  *       wrapper for LZ4_decompress_safe which is a wrapper for
979*e92ffd9bSMartin Matuska  *	 LZ4_decompress_generic; this wrapper flattens that, rather than
980*e92ffd9bSMartin Matuska  *	 rewriting the callers.
981*e92ffd9bSMartin Matuska  */
LZ4_uncompress_unknownOutputSize(const char * source,char * dest,int compressedSize,int maxDecompressedSize)982*e92ffd9bSMartin Matuska int LZ4_uncompress_unknownOutputSize(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
983eda14cbcSMatt Macy {
984*e92ffd9bSMartin Matuska     return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
985*e92ffd9bSMartin Matuska                                   endOnInputSize, decode_full_block, noDict,
986*e92ffd9bSMartin Matuska                                   (BYTE*)dest, NULL, 0);
987eda14cbcSMatt Macy }
988