1eda14cbcSMatt Macy /*
2*e92ffd9bSMartin Matuska LZ4 - Fast LZ compression algorithm
3*e92ffd9bSMartin Matuska Copyright (C) 2011-present, Yann Collet.
4*e92ffd9bSMartin Matuska
5*e92ffd9bSMartin Matuska BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6*e92ffd9bSMartin Matuska
7*e92ffd9bSMartin Matuska Redistribution and use in source and binary forms, with or without
8*e92ffd9bSMartin Matuska modification, are permitted provided that the following conditions are
9*e92ffd9bSMartin Matuska met:
10*e92ffd9bSMartin Matuska
11*e92ffd9bSMartin Matuska * Redistributions of source code must retain the above copyright
12*e92ffd9bSMartin Matuska notice, this list of conditions and the following disclaimer.
13*e92ffd9bSMartin Matuska * Redistributions in binary form must reproduce the above
14*e92ffd9bSMartin Matuska copyright notice, this list of conditions and the following disclaimer
15*e92ffd9bSMartin Matuska in the documentation and/or other materials provided with the
16*e92ffd9bSMartin Matuska distribution.
17*e92ffd9bSMartin Matuska
18*e92ffd9bSMartin Matuska THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19*e92ffd9bSMartin Matuska "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20*e92ffd9bSMartin Matuska LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21*e92ffd9bSMartin Matuska A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22*e92ffd9bSMartin Matuska OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23*e92ffd9bSMartin Matuska SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24*e92ffd9bSMartin Matuska LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25*e92ffd9bSMartin Matuska DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26*e92ffd9bSMartin Matuska THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27*e92ffd9bSMartin Matuska (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28*e92ffd9bSMartin Matuska OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*e92ffd9bSMartin Matuska
30*e92ffd9bSMartin Matuska You can contact the author at :
31*e92ffd9bSMartin Matuska - LZ4 homepage : http://www.lz4.org
32*e92ffd9bSMartin Matuska - LZ4 source repository : https://github.com/lz4/lz4
33*e92ffd9bSMartin Matuska */
34*e92ffd9bSMartin Matuska
35*e92ffd9bSMartin Matuska /*
36*e92ffd9bSMartin Matuska * This file contains unmodified code from lz4 1.9.3's decompressor, plus
37*e92ffd9bSMartin Matuska * associated macros and constants.
38eda14cbcSMatt Macy *
39*e92ffd9bSMartin Matuska * It also contains a couple of defines from the old lz4.c to make things
40*e92ffd9bSMartin Matuska * fit together smoothly.
41eda14cbcSMatt Macy *
42eda14cbcSMatt Macy */
43eda14cbcSMatt Macy
44eda14cbcSMatt Macy #include <sys/zfs_context.h>
45eda14cbcSMatt Macy
46*e92ffd9bSMartin Matuska int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
47eda14cbcSMatt Macy int isize, int maxOutputSize);
48eda14cbcSMatt Macy
49eda14cbcSMatt Macy /*
50eda14cbcSMatt Macy * Tuning parameters
51eda14cbcSMatt Macy */
52eda14cbcSMatt Macy
53eda14cbcSMatt Macy /*
54eda14cbcSMatt Macy * COMPRESSIONLEVEL: Increasing this value improves compression ratio
55eda14cbcSMatt Macy * Lowering this value reduces memory usage. Reduced memory usage
56eda14cbcSMatt Macy * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
57eda14cbcSMatt Macy * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
58eda14cbcSMatt Macy * (examples : 12 -> 16KB ; 17 -> 512KB)
59eda14cbcSMatt Macy */
60eda14cbcSMatt Macy #define COMPRESSIONLEVEL 12
61eda14cbcSMatt Macy
62eda14cbcSMatt Macy /*
63eda14cbcSMatt Macy * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
64eda14cbcSMatt Macy * algorithm skip faster data segments considered "incompressible".
65eda14cbcSMatt Macy * This may decrease compression ratio dramatically, but will be
66eda14cbcSMatt Macy * faster on incompressible data. Increasing this value will make
67eda14cbcSMatt Macy * the algorithm search more before declaring a segment "incompressible".
68eda14cbcSMatt Macy * This could improve compression a bit, but will be slower on
69eda14cbcSMatt Macy * incompressible data. The default value (6) is recommended.
70eda14cbcSMatt Macy */
71eda14cbcSMatt Macy #define NOTCOMPRESSIBLE_CONFIRMATION 6
72eda14cbcSMatt Macy
73eda14cbcSMatt Macy /*
74eda14cbcSMatt Macy * Little Endian or Big Endian?
75eda14cbcSMatt Macy * Note: overwrite the below #define if you know your architecture endianness.
76eda14cbcSMatt Macy */
77eda14cbcSMatt Macy #if defined(_ZFS_BIG_ENDIAN)
78eda14cbcSMatt Macy #define LZ4_BIG_ENDIAN 1
79eda14cbcSMatt Macy #else
80eda14cbcSMatt Macy /*
81eda14cbcSMatt Macy * Little Endian assumed. PDP Endian and other very rare endian format
82eda14cbcSMatt Macy * are unsupported.
83eda14cbcSMatt Macy */
84eda14cbcSMatt Macy #undef LZ4_BIG_ENDIAN
85eda14cbcSMatt Macy #endif
86eda14cbcSMatt Macy
87*e92ffd9bSMartin Matuska /*-************************************
88*e92ffd9bSMartin Matuska * CPU Feature Detection
89*e92ffd9bSMartin Matuska **************************************/
90*e92ffd9bSMartin Matuska /* LZ4_FORCE_MEMORY_ACCESS
91*e92ffd9bSMartin Matuska * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
92*e92ffd9bSMartin Matuska * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
93*e92ffd9bSMartin Matuska * The below switch allow to select different access method for improved performance.
94*e92ffd9bSMartin Matuska * Method 0 (default) : use `memcpy()`. Safe and portable.
95*e92ffd9bSMartin Matuska * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
96*e92ffd9bSMartin Matuska * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
97*e92ffd9bSMartin Matuska * Method 2 : direct access. This method is portable but violate C standard.
98*e92ffd9bSMartin Matuska * It can generate buggy code on targets which assembly generation depends on alignment.
99*e92ffd9bSMartin Matuska * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
100*e92ffd9bSMartin Matuska * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
101*e92ffd9bSMartin Matuska * Prefer these methods in priority order (0 > 1 > 2)
102eda14cbcSMatt Macy */
103*e92ffd9bSMartin Matuska #ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */
104*e92ffd9bSMartin Matuska # if defined(__GNUC__) && \
105*e92ffd9bSMartin Matuska ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
106*e92ffd9bSMartin Matuska || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
107*e92ffd9bSMartin Matuska # define LZ4_FORCE_MEMORY_ACCESS 2
108*e92ffd9bSMartin Matuska # elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
109*e92ffd9bSMartin Matuska # define LZ4_FORCE_MEMORY_ACCESS 1
110*e92ffd9bSMartin Matuska # endif
111eda14cbcSMatt Macy #endif
112eda14cbcSMatt Macy
113eda14cbcSMatt Macy /*
114*e92ffd9bSMartin Matuska * LZ4_FORCE_SW_BITCOUNT
115*e92ffd9bSMartin Matuska * Define this parameter if your target system or compiler does not support hardware bit count
116*e92ffd9bSMartin Matuska */
117*e92ffd9bSMartin Matuska /*
118eda14cbcSMatt Macy * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
119eda14cbcSMatt Macy * kernel
120eda14cbcSMatt Macy * Linux : we can use GCC's __builtin_ctz family of builtins in the
121eda14cbcSMatt Macy * kernel
122eda14cbcSMatt Macy */
123eda14cbcSMatt Macy #undef LZ4_FORCE_SW_BITCOUNT
124*e92ffd9bSMartin Matuska #if defined(__sunos__)
125eda14cbcSMatt Macy #define LZ4_FORCE_SW_BITCOUNT
126eda14cbcSMatt Macy #endif
127eda14cbcSMatt Macy
128eda14cbcSMatt Macy /*
129eda14cbcSMatt Macy * Compiler Options
130eda14cbcSMatt Macy */
131eda14cbcSMatt Macy /* Disable restrict */
132eda14cbcSMatt Macy #define restrict
133eda14cbcSMatt Macy
134eda14cbcSMatt Macy /*
135eda14cbcSMatt Macy * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
136eda14cbcSMatt Macy * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
137eda14cbcSMatt Macy */
138eda14cbcSMatt Macy #ifdef GCC_VERSION
139eda14cbcSMatt Macy #undef GCC_VERSION
140eda14cbcSMatt Macy #endif
141eda14cbcSMatt Macy
142eda14cbcSMatt Macy #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
143eda14cbcSMatt Macy
144*e92ffd9bSMartin Matuska #ifndef LZ4_FORCE_INLINE
145*e92ffd9bSMartin Matuska # ifdef _MSC_VER /* Visual Studio */
146*e92ffd9bSMartin Matuska # define LZ4_FORCE_INLINE static __forceinline
147*e92ffd9bSMartin Matuska # else
148*e92ffd9bSMartin Matuska # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
149*e92ffd9bSMartin Matuska # ifdef __GNUC__
150*e92ffd9bSMartin Matuska # define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
151*e92ffd9bSMartin Matuska # else
152*e92ffd9bSMartin Matuska # define LZ4_FORCE_INLINE static inline
153*e92ffd9bSMartin Matuska # endif
154*e92ffd9bSMartin Matuska # else
155*e92ffd9bSMartin Matuska # define LZ4_FORCE_INLINE static
156*e92ffd9bSMartin Matuska # endif /* __STDC_VERSION__ */
157*e92ffd9bSMartin Matuska # endif /* _MSC_VER */
158*e92ffd9bSMartin Matuska #endif /* LZ4_FORCE_INLINE */
159*e92ffd9bSMartin Matuska
160*e92ffd9bSMartin Matuska /* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
161*e92ffd9bSMartin Matuska * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
162*e92ffd9bSMartin Matuska * together with a simple 8-byte copy loop as a fall-back path.
163*e92ffd9bSMartin Matuska * However, this optimization hurts the decompression speed by >30%,
164*e92ffd9bSMartin Matuska * because the execution does not go to the optimized loop
165*e92ffd9bSMartin Matuska * for typical compressible data, and all of the preamble checks
166*e92ffd9bSMartin Matuska * before going to the fall-back path become useless overhead.
167*e92ffd9bSMartin Matuska * This optimization happens only with the -O3 flag, and -O2 generates
168*e92ffd9bSMartin Matuska * a simple 8-byte copy loop.
169*e92ffd9bSMartin Matuska * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
170*e92ffd9bSMartin Matuska * functions are annotated with __attribute__((optimize("O2"))),
171*e92ffd9bSMartin Matuska * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
172*e92ffd9bSMartin Matuska * of LZ4_wildCopy8 does not affect the compression speed.
173*e92ffd9bSMartin Matuska */
174*e92ffd9bSMartin Matuska #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
175*e92ffd9bSMartin Matuska # define LZ4_FORCE_O2 __attribute__((optimize("O2")))
176*e92ffd9bSMartin Matuska # undef LZ4_FORCE_INLINE
177*e92ffd9bSMartin Matuska # define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline))
178*e92ffd9bSMartin Matuska #else
179*e92ffd9bSMartin Matuska # define LZ4_FORCE_O2
180*e92ffd9bSMartin Matuska #endif
181*e92ffd9bSMartin Matuska
182*e92ffd9bSMartin Matuska #ifndef expect
183*e92ffd9bSMartin Matuska #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
184eda14cbcSMatt Macy # define expect(expr,value) (__builtin_expect ((expr),(value)) )
185eda14cbcSMatt Macy #else
186eda14cbcSMatt Macy # define expect(expr,value) (expr)
187eda14cbcSMatt Macy #endif
188*e92ffd9bSMartin Matuska #endif
189eda14cbcSMatt Macy
190eda14cbcSMatt Macy #ifndef likely
191eda14cbcSMatt Macy #define likely(expr) expect((expr) != 0, 1)
192eda14cbcSMatt Macy #endif
193eda14cbcSMatt Macy
194eda14cbcSMatt Macy #ifndef unlikely
195eda14cbcSMatt Macy #define unlikely(expr) expect((expr) != 0, 0)
196eda14cbcSMatt Macy #endif
197eda14cbcSMatt Macy
198*e92ffd9bSMartin Matuska #ifndef _KERNEL
199*e92ffd9bSMartin Matuska #include <stdlib.h> /* malloc, calloc, free */
200*e92ffd9bSMartin Matuska #include <string.h> /* memset, memcpy */
201eda14cbcSMatt Macy #endif
202*e92ffd9bSMartin Matuska #define ALLOC(s) malloc(s)
203*e92ffd9bSMartin Matuska #define ALLOC_AND_ZERO(s) calloc(1,s)
204*e92ffd9bSMartin Matuska #define FREEMEM(p) free(p)
205eda14cbcSMatt Macy
206*e92ffd9bSMartin Matuska #define MEM_INIT(p,v,s) memset((p),(v),(s))
207eda14cbcSMatt Macy
208eda14cbcSMatt Macy
209*e92ffd9bSMartin Matuska /*-************************************
210*e92ffd9bSMartin Matuska * Common Constants
211*e92ffd9bSMartin Matuska **************************************/
212eda14cbcSMatt Macy #define MINMATCH 4
213eda14cbcSMatt Macy
214*e92ffd9bSMartin Matuska #define WILDCOPYLENGTH 8
215*e92ffd9bSMartin Matuska #define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
216*e92ffd9bSMartin Matuska #define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
217*e92ffd9bSMartin Matuska #define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
218*e92ffd9bSMartin Matuska #define FASTLOOP_SAFE_DISTANCE 64
219eda14cbcSMatt Macy
220*e92ffd9bSMartin Matuska #define KB *(1 <<10)
221*e92ffd9bSMartin Matuska #define MB *(1 <<20)
222*e92ffd9bSMartin Matuska #define GB *(1U<<30)
223eda14cbcSMatt Macy
224*e92ffd9bSMartin Matuska #ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */
225*e92ffd9bSMartin Matuska # define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */
226*e92ffd9bSMartin Matuska #endif
227eda14cbcSMatt Macy
228*e92ffd9bSMartin Matuska #define LZ4_DISTANCE_ABSOLUTE_MAX 65535
229*e92ffd9bSMartin Matuska #if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */
230*e92ffd9bSMartin Matuska # error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
231*e92ffd9bSMartin Matuska #endif
232eda14cbcSMatt Macy
233eda14cbcSMatt Macy #define ML_BITS 4
234eda14cbcSMatt Macy #define ML_MASK ((1U<<ML_BITS)-1)
235eda14cbcSMatt Macy #define RUN_BITS (8-ML_BITS)
236eda14cbcSMatt Macy #define RUN_MASK ((1U<<RUN_BITS)-1)
237eda14cbcSMatt Macy
238*e92ffd9bSMartin Matuska #define DEBUGLOG(l, ...) {} /* disabled */
239eda14cbcSMatt Macy
240*e92ffd9bSMartin Matuska #ifndef assert
241*e92ffd9bSMartin Matuska #define assert ASSERT
242*e92ffd9bSMartin Matuska #endif
243*e92ffd9bSMartin Matuska
244*e92ffd9bSMartin Matuska /*-************************************
245*e92ffd9bSMartin Matuska * Types
246*e92ffd9bSMartin Matuska **************************************/
247*e92ffd9bSMartin Matuska #ifndef _KERNEL
248*e92ffd9bSMartin Matuska #include <limits.h>
249*e92ffd9bSMartin Matuska #endif
250*e92ffd9bSMartin Matuska #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
251*e92ffd9bSMartin Matuska #ifndef _KERNEL
252*e92ffd9bSMartin Matuska #include <stdint.h>
253*e92ffd9bSMartin Matuska #endif
254*e92ffd9bSMartin Matuska typedef uint8_t BYTE;
255*e92ffd9bSMartin Matuska typedef uint16_t U16;
256*e92ffd9bSMartin Matuska typedef uint32_t U32;
257*e92ffd9bSMartin Matuska typedef int32_t S32;
258*e92ffd9bSMartin Matuska typedef uint64_t U64;
259*e92ffd9bSMartin Matuska typedef uintptr_t uptrval;
260*e92ffd9bSMartin Matuska #else
261*e92ffd9bSMartin Matuska # if UINT_MAX != 4294967295UL
262*e92ffd9bSMartin Matuska # error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
263*e92ffd9bSMartin Matuska # endif
264*e92ffd9bSMartin Matuska typedef unsigned char BYTE;
265*e92ffd9bSMartin Matuska typedef unsigned short U16;
266*e92ffd9bSMartin Matuska typedef unsigned int U32;
267*e92ffd9bSMartin Matuska typedef signed int S32;
268*e92ffd9bSMartin Matuska typedef unsigned long long U64;
269*e92ffd9bSMartin Matuska typedef size_t uptrval; /* generally true, except OpenVMS-64 */
270*e92ffd9bSMartin Matuska #endif
271*e92ffd9bSMartin Matuska
272*e92ffd9bSMartin Matuska #if defined(__x86_64__)
273*e92ffd9bSMartin Matuska typedef U64 reg_t; /* 64-bits in x32 mode */
274*e92ffd9bSMartin Matuska #else
275*e92ffd9bSMartin Matuska typedef size_t reg_t; /* 32-bits in x32 mode */
276*e92ffd9bSMartin Matuska #endif
277*e92ffd9bSMartin Matuska
278*e92ffd9bSMartin Matuska typedef enum {
279*e92ffd9bSMartin Matuska notLimited = 0,
280*e92ffd9bSMartin Matuska limitedOutput = 1,
281*e92ffd9bSMartin Matuska fillOutput = 2
282*e92ffd9bSMartin Matuska } limitedOutput_directive;
283*e92ffd9bSMartin Matuska
284*e92ffd9bSMartin Matuska
285*e92ffd9bSMartin Matuska /*-************************************
286*e92ffd9bSMartin Matuska * Reading and writing into memory
287*e92ffd9bSMartin Matuska **************************************/
288*e92ffd9bSMartin Matuska
289*e92ffd9bSMartin Matuska /**
290*e92ffd9bSMartin Matuska * LZ4 relies on memcpy with a constant size being inlined. In freestanding
291*e92ffd9bSMartin Matuska * environments, the compiler can't assume the implementation of memcpy() is
292*e92ffd9bSMartin Matuska * standard compliant, so it can't apply its specialized memcpy() inlining
293*e92ffd9bSMartin Matuska * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
294*e92ffd9bSMartin Matuska * memcpy() as if it were standard compliant, so it can inline it in freestanding
295*e92ffd9bSMartin Matuska * environments. This is needed when decompressing the Linux Kernel, for example.
296eda14cbcSMatt Macy */
297*e92ffd9bSMartin Matuska #if defined(__GNUC__) && (__GNUC__ >= 4)
298*e92ffd9bSMartin Matuska #define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
299eda14cbcSMatt Macy #else
300*e92ffd9bSMartin Matuska #define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
301eda14cbcSMatt Macy #endif
302eda14cbcSMatt Macy
LZ4_isLittleEndian(void)303*e92ffd9bSMartin Matuska static unsigned LZ4_isLittleEndian(void)
304eda14cbcSMatt Macy {
305*e92ffd9bSMartin Matuska const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
306*e92ffd9bSMartin Matuska return one.c[0];
307eda14cbcSMatt Macy }
308eda14cbcSMatt Macy
309eda14cbcSMatt Macy
310*e92ffd9bSMartin Matuska #if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
311*e92ffd9bSMartin Matuska /* lie to the compiler about data alignment; use with caution */
312*e92ffd9bSMartin Matuska
LZ4_read16(const void * memPtr)313*e92ffd9bSMartin Matuska static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
314*e92ffd9bSMartin Matuska
LZ4_write16(void * memPtr,U16 value)315*e92ffd9bSMartin Matuska static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
LZ4_write32(void * memPtr,U32 value)316*e92ffd9bSMartin Matuska static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
317*e92ffd9bSMartin Matuska
318*e92ffd9bSMartin Matuska #elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
319*e92ffd9bSMartin Matuska
320*e92ffd9bSMartin Matuska /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
321*e92ffd9bSMartin Matuska /* currently only defined for gcc and icc */
322*e92ffd9bSMartin Matuska typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign;
323*e92ffd9bSMartin Matuska
LZ4_read16(const void * ptr)324*e92ffd9bSMartin Matuska static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
325*e92ffd9bSMartin Matuska
LZ4_write32(void * memPtr,U32 value)326*e92ffd9bSMartin Matuska static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
327*e92ffd9bSMartin Matuska
328*e92ffd9bSMartin Matuska #else /* safe and portable access using memcpy() */
329*e92ffd9bSMartin Matuska
LZ4_read16(const void * memPtr)330*e92ffd9bSMartin Matuska static U16 LZ4_read16(const void* memPtr)
331eda14cbcSMatt Macy {
332*e92ffd9bSMartin Matuska U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
333eda14cbcSMatt Macy }
334eda14cbcSMatt Macy
LZ4_write32(void * memPtr,U32 value)335*e92ffd9bSMartin Matuska static void LZ4_write32(void* memPtr, U32 value)
336eda14cbcSMatt Macy {
337*e92ffd9bSMartin Matuska LZ4_memcpy(memPtr, &value, sizeof(value));
338eda14cbcSMatt Macy }
339eda14cbcSMatt Macy
340*e92ffd9bSMartin Matuska #endif /* LZ4_FORCE_MEMORY_ACCESS */
341eda14cbcSMatt Macy
LZ4_readLE16(const void * memPtr)342*e92ffd9bSMartin Matuska static U16 LZ4_readLE16(const void* memPtr)
343*e92ffd9bSMartin Matuska {
344*e92ffd9bSMartin Matuska if (LZ4_isLittleEndian()) {
345*e92ffd9bSMartin Matuska return LZ4_read16(memPtr);
346*e92ffd9bSMartin Matuska } else {
347*e92ffd9bSMartin Matuska const BYTE* p = (const BYTE*)memPtr;
348*e92ffd9bSMartin Matuska return (U16)((U16)p[0] + (p[1]<<8));
349*e92ffd9bSMartin Matuska }
350eda14cbcSMatt Macy }
351eda14cbcSMatt Macy
352*e92ffd9bSMartin Matuska /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
353*e92ffd9bSMartin Matuska LZ4_FORCE_INLINE
LZ4_wildCopy8(void * dstPtr,const void * srcPtr,void * dstEnd)354*e92ffd9bSMartin Matuska void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
355*e92ffd9bSMartin Matuska {
356*e92ffd9bSMartin Matuska BYTE* d = (BYTE*)dstPtr;
357*e92ffd9bSMartin Matuska const BYTE* s = (const BYTE*)srcPtr;
358*e92ffd9bSMartin Matuska BYTE* const e = (BYTE*)dstEnd;
359eda14cbcSMatt Macy
360*e92ffd9bSMartin Matuska do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
361eda14cbcSMatt Macy }
362*e92ffd9bSMartin Matuska
363*e92ffd9bSMartin Matuska static const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
364*e92ffd9bSMartin Matuska static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
365*e92ffd9bSMartin Matuska
366*e92ffd9bSMartin Matuska
367*e92ffd9bSMartin Matuska #ifndef LZ4_FAST_DEC_LOOP
368*e92ffd9bSMartin Matuska # if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
369*e92ffd9bSMartin Matuska # define LZ4_FAST_DEC_LOOP 1
370*e92ffd9bSMartin Matuska # elif defined(__aarch64__) && !defined(__clang__)
371*e92ffd9bSMartin Matuska /* On aarch64, we disable this optimization for clang because on certain
372*e92ffd9bSMartin Matuska * mobile chipsets, performance is reduced with clang. For information
373*e92ffd9bSMartin Matuska * refer to https://github.com/lz4/lz4/pull/707 */
374*e92ffd9bSMartin Matuska # define LZ4_FAST_DEC_LOOP 1
375*e92ffd9bSMartin Matuska # else
376*e92ffd9bSMartin Matuska # define LZ4_FAST_DEC_LOOP 0
377eda14cbcSMatt Macy # endif
378*e92ffd9bSMartin Matuska #endif
379eda14cbcSMatt Macy
380*e92ffd9bSMartin Matuska #if LZ4_FAST_DEC_LOOP
381eda14cbcSMatt Macy
382*e92ffd9bSMartin Matuska LZ4_FORCE_INLINE void
LZ4_memcpy_using_offset_base(BYTE * dstPtr,const BYTE * srcPtr,BYTE * dstEnd,const size_t offset)383*e92ffd9bSMartin Matuska LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
384*e92ffd9bSMartin Matuska {
385*e92ffd9bSMartin Matuska assert(srcPtr + offset == dstPtr);
386*e92ffd9bSMartin Matuska if (offset < 8) {
387*e92ffd9bSMartin Matuska LZ4_write32(dstPtr, 0); /* silence an msan warning when offset==0 */
388*e92ffd9bSMartin Matuska dstPtr[0] = srcPtr[0];
389*e92ffd9bSMartin Matuska dstPtr[1] = srcPtr[1];
390*e92ffd9bSMartin Matuska dstPtr[2] = srcPtr[2];
391*e92ffd9bSMartin Matuska dstPtr[3] = srcPtr[3];
392*e92ffd9bSMartin Matuska srcPtr += inc32table[offset];
393*e92ffd9bSMartin Matuska LZ4_memcpy(dstPtr+4, srcPtr, 4);
394*e92ffd9bSMartin Matuska srcPtr -= dec64table[offset];
395*e92ffd9bSMartin Matuska dstPtr += 8;
396*e92ffd9bSMartin Matuska } else {
397*e92ffd9bSMartin Matuska LZ4_memcpy(dstPtr, srcPtr, 8);
398*e92ffd9bSMartin Matuska dstPtr += 8;
399*e92ffd9bSMartin Matuska srcPtr += 8;
400*e92ffd9bSMartin Matuska }
401*e92ffd9bSMartin Matuska
402*e92ffd9bSMartin Matuska LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
403*e92ffd9bSMartin Matuska }
404*e92ffd9bSMartin Matuska
405*e92ffd9bSMartin Matuska /* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
406*e92ffd9bSMartin Matuska * this version copies two times 16 bytes (instead of one time 32 bytes)
407*e92ffd9bSMartin Matuska * because it must be compatible with offsets >= 16. */
408*e92ffd9bSMartin Matuska LZ4_FORCE_INLINE void
LZ4_wildCopy32(void * dstPtr,const void * srcPtr,void * dstEnd)409*e92ffd9bSMartin Matuska LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
410*e92ffd9bSMartin Matuska {
411*e92ffd9bSMartin Matuska BYTE* d = (BYTE*)dstPtr;
412*e92ffd9bSMartin Matuska const BYTE* s = (const BYTE*)srcPtr;
413*e92ffd9bSMartin Matuska BYTE* const e = (BYTE*)dstEnd;
414*e92ffd9bSMartin Matuska
415*e92ffd9bSMartin Matuska do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
416*e92ffd9bSMartin Matuska }
417*e92ffd9bSMartin Matuska
418*e92ffd9bSMartin Matuska /* LZ4_memcpy_using_offset() presumes :
419*e92ffd9bSMartin Matuska * - dstEnd >= dstPtr + MINMATCH
420*e92ffd9bSMartin Matuska * - there is at least 8 bytes available to write after dstEnd */
421*e92ffd9bSMartin Matuska LZ4_FORCE_INLINE void
LZ4_memcpy_using_offset(BYTE * dstPtr,const BYTE * srcPtr,BYTE * dstEnd,const size_t offset)422*e92ffd9bSMartin Matuska LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
423*e92ffd9bSMartin Matuska {
424*e92ffd9bSMartin Matuska BYTE v[8];
425*e92ffd9bSMartin Matuska
426*e92ffd9bSMartin Matuska assert(dstEnd >= dstPtr + MINMATCH);
427*e92ffd9bSMartin Matuska
428*e92ffd9bSMartin Matuska switch(offset) {
429*e92ffd9bSMartin Matuska case 1:
430*e92ffd9bSMartin Matuska MEM_INIT(v, *srcPtr, 8);
431eda14cbcSMatt Macy break;
432*e92ffd9bSMartin Matuska case 2:
433*e92ffd9bSMartin Matuska LZ4_memcpy(v, srcPtr, 2);
434*e92ffd9bSMartin Matuska LZ4_memcpy(&v[2], srcPtr, 2);
435*e92ffd9bSMartin Matuska LZ4_memcpy(&v[4], v, 4);
436*e92ffd9bSMartin Matuska break;
437*e92ffd9bSMartin Matuska case 4:
438*e92ffd9bSMartin Matuska LZ4_memcpy(v, srcPtr, 4);
439*e92ffd9bSMartin Matuska LZ4_memcpy(&v[4], srcPtr, 4);
440*e92ffd9bSMartin Matuska break;
441*e92ffd9bSMartin Matuska default:
442*e92ffd9bSMartin Matuska LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
443*e92ffd9bSMartin Matuska return;
444eda14cbcSMatt Macy }
445eda14cbcSMatt Macy
446*e92ffd9bSMartin Matuska LZ4_memcpy(dstPtr, v, 8);
447*e92ffd9bSMartin Matuska dstPtr += 8;
448*e92ffd9bSMartin Matuska while (dstPtr < dstEnd) {
449*e92ffd9bSMartin Matuska LZ4_memcpy(dstPtr, v, 8);
450*e92ffd9bSMartin Matuska dstPtr += 8;
451eda14cbcSMatt Macy }
452eda14cbcSMatt Macy }
453eda14cbcSMatt Macy #endif
454eda14cbcSMatt Macy
455eda14cbcSMatt Macy
456*e92ffd9bSMartin Matuska /*-************************************
457*e92ffd9bSMartin Matuska * Local Structures and types
458*e92ffd9bSMartin Matuska **************************************/
459*e92ffd9bSMartin Matuska typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
460eda14cbcSMatt Macy
461*e92ffd9bSMartin Matuska /**
462*e92ffd9bSMartin Matuska * This enum distinguishes several different modes of accessing previous
463*e92ffd9bSMartin Matuska * content in the stream.
464eda14cbcSMatt Macy *
465*e92ffd9bSMartin Matuska * - noDict : There is no preceding content.
466*e92ffd9bSMartin Matuska * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
467*e92ffd9bSMartin Matuska * blob being compressed are valid and refer to the preceding
468*e92ffd9bSMartin Matuska * content (of length ctx->dictSize), which is available
469*e92ffd9bSMartin Matuska * contiguously preceding in memory the content currently
470*e92ffd9bSMartin Matuska * being compressed.
471*e92ffd9bSMartin Matuska * - usingExtDict : Like withPrefix64k, but the preceding content is somewhere
472*e92ffd9bSMartin Matuska * else in memory, starting at ctx->dictionary with length
473*e92ffd9bSMartin Matuska * ctx->dictSize.
474*e92ffd9bSMartin Matuska * - usingDictCtx : Like usingExtDict, but everything concerning the preceding
475*e92ffd9bSMartin Matuska * content is in a separate context, pointed to by
476*e92ffd9bSMartin Matuska * ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table
477*e92ffd9bSMartin Matuska * entries in the current context that refer to positions
478*e92ffd9bSMartin Matuska * preceding the beginning of the current compression are
479*e92ffd9bSMartin Matuska * ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
480*e92ffd9bSMartin Matuska * ->dictSize describe the location and size of the preceding
481*e92ffd9bSMartin Matuska * content, and matches are found by looking in the ctx
482*e92ffd9bSMartin Matuska * ->dictCtx->hashTable.
483eda14cbcSMatt Macy */
484*e92ffd9bSMartin Matuska typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
485*e92ffd9bSMartin Matuska typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
486eda14cbcSMatt Macy
487*e92ffd9bSMartin Matuska /*-*******************************
488*e92ffd9bSMartin Matuska * Decompression functions
489*e92ffd9bSMartin Matuska ********************************/
490eda14cbcSMatt Macy
491*e92ffd9bSMartin Matuska typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
492*e92ffd9bSMartin Matuska typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
493*e92ffd9bSMartin Matuska
494*e92ffd9bSMartin Matuska typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error;
495*e92ffd9bSMartin Matuska
496*e92ffd9bSMartin Matuska LZ4_FORCE_INLINE unsigned
read_variable_length(const BYTE ** ip,const BYTE * lencheck,int loop_check,int initial_check,variable_length_error * error)497*e92ffd9bSMartin Matuska read_variable_length(const BYTE**ip, const BYTE* lencheck,
498*e92ffd9bSMartin Matuska int loop_check, int initial_check,
499*e92ffd9bSMartin Matuska variable_length_error* error)
500eda14cbcSMatt Macy {
501*e92ffd9bSMartin Matuska U32 length = 0;
502*e92ffd9bSMartin Matuska U32 s;
503*e92ffd9bSMartin Matuska if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */
504*e92ffd9bSMartin Matuska *error = initial_error;
505*e92ffd9bSMartin Matuska return length;
506*e92ffd9bSMartin Matuska }
507*e92ffd9bSMartin Matuska do {
508*e92ffd9bSMartin Matuska s = **ip;
509*e92ffd9bSMartin Matuska (*ip)++;
510*e92ffd9bSMartin Matuska length += s;
511*e92ffd9bSMartin Matuska if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */
512*e92ffd9bSMartin Matuska *error = loop_error;
513*e92ffd9bSMartin Matuska return length;
514*e92ffd9bSMartin Matuska }
515*e92ffd9bSMartin Matuska } while (s==255);
516eda14cbcSMatt Macy
517*e92ffd9bSMartin Matuska return length;
518*e92ffd9bSMartin Matuska }
519*e92ffd9bSMartin Matuska
520*e92ffd9bSMartin Matuska #define LZ4_STATIC_ASSERT(c) ASSERT(c)
521*e92ffd9bSMartin Matuska
522*e92ffd9bSMartin Matuska
523*e92ffd9bSMartin Matuska /*! LZ4_decompress_generic() :
524*e92ffd9bSMartin Matuska * This generic decompression function covers all use cases.
525*e92ffd9bSMartin Matuska * It shall be instantiated several times, using different sets of directives.
526*e92ffd9bSMartin Matuska * Note that it is important for performance that this function really get inlined,
527*e92ffd9bSMartin Matuska * in order to remove useless branches during compilation optimization.
528*e92ffd9bSMartin Matuska */
529*e92ffd9bSMartin Matuska LZ4_FORCE_INLINE int
LZ4_decompress_generic(const char * const src,char * const dst,int srcSize,int outputSize,endCondition_directive endOnInput,earlyEnd_directive partialDecoding,dict_directive dict,const BYTE * const lowPrefix,const BYTE * const dictStart,const size_t dictSize)530*e92ffd9bSMartin Matuska LZ4_decompress_generic(
531*e92ffd9bSMartin Matuska const char* const src,
532*e92ffd9bSMartin Matuska char* const dst,
533*e92ffd9bSMartin Matuska int srcSize,
534*e92ffd9bSMartin Matuska int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
535*e92ffd9bSMartin Matuska
536*e92ffd9bSMartin Matuska endCondition_directive endOnInput, /* endOnOutputSize, endOnInputSize */
537*e92ffd9bSMartin Matuska earlyEnd_directive partialDecoding, /* full, partial */
538*e92ffd9bSMartin Matuska dict_directive dict, /* noDict, withPrefix64k, usingExtDict */
539*e92ffd9bSMartin Matuska const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */
540*e92ffd9bSMartin Matuska const BYTE* const dictStart, /* only if dict==usingExtDict */
541*e92ffd9bSMartin Matuska const size_t dictSize /* note : = 0 if noDict */
542*e92ffd9bSMartin Matuska )
543*e92ffd9bSMartin Matuska {
544*e92ffd9bSMartin Matuska if ((src == NULL) || (outputSize < 0)) { return -1; }
545*e92ffd9bSMartin Matuska
546*e92ffd9bSMartin Matuska { const BYTE* ip = (const BYTE*) src;
547*e92ffd9bSMartin Matuska const BYTE* const iend = ip + srcSize;
548*e92ffd9bSMartin Matuska
549*e92ffd9bSMartin Matuska BYTE* op = (BYTE*) dst;
550*e92ffd9bSMartin Matuska BYTE* const oend = op + outputSize;
551eda14cbcSMatt Macy BYTE* cpy;
552eda14cbcSMatt Macy
553*e92ffd9bSMartin Matuska const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
554*e92ffd9bSMartin Matuska
555*e92ffd9bSMartin Matuska const int safeDecode = (endOnInput==endOnInputSize);
556*e92ffd9bSMartin Matuska const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
557*e92ffd9bSMartin Matuska
558*e92ffd9bSMartin Matuska
559*e92ffd9bSMartin Matuska /* Set up the "end" pointers for the shortcut. */
560*e92ffd9bSMartin Matuska const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
561*e92ffd9bSMartin Matuska const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
562*e92ffd9bSMartin Matuska
563*e92ffd9bSMartin Matuska const BYTE* match;
564*e92ffd9bSMartin Matuska size_t offset;
565eda14cbcSMatt Macy unsigned token;
566eda14cbcSMatt Macy size_t length;
567eda14cbcSMatt Macy
568*e92ffd9bSMartin Matuska
569*e92ffd9bSMartin Matuska DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
570*e92ffd9bSMartin Matuska
571*e92ffd9bSMartin Matuska /* Special cases */
572*e92ffd9bSMartin Matuska assert(lowPrefix <= op);
573*e92ffd9bSMartin Matuska if ((endOnInput) && (unlikely(outputSize==0))) {
574*e92ffd9bSMartin Matuska /* Empty output buffer */
575*e92ffd9bSMartin Matuska if (partialDecoding) return 0;
576*e92ffd9bSMartin Matuska return ((srcSize==1) && (*ip==0)) ? 0 : -1;
577*e92ffd9bSMartin Matuska }
578*e92ffd9bSMartin Matuska if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); }
579*e92ffd9bSMartin Matuska if ((endOnInput) && unlikely(srcSize==0)) { return -1; }
580*e92ffd9bSMartin Matuska
581*e92ffd9bSMartin Matuska /* Currently the fast loop shows a regression on qualcomm arm chips. */
582*e92ffd9bSMartin Matuska #if LZ4_FAST_DEC_LOOP
583*e92ffd9bSMartin Matuska if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
584*e92ffd9bSMartin Matuska DEBUGLOG(6, "skip fast decode loop");
585*e92ffd9bSMartin Matuska goto safe_decode;
586*e92ffd9bSMartin Matuska }
587*e92ffd9bSMartin Matuska
588*e92ffd9bSMartin Matuska /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */
589*e92ffd9bSMartin Matuska while (1) {
590*e92ffd9bSMartin Matuska /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
591*e92ffd9bSMartin Matuska assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
592*e92ffd9bSMartin Matuska if (endOnInput) { assert(ip < iend); }
593eda14cbcSMatt Macy token = *ip++;
594*e92ffd9bSMartin Matuska length = token >> ML_BITS; /* literal length */
595*e92ffd9bSMartin Matuska
596*e92ffd9bSMartin Matuska assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
597*e92ffd9bSMartin Matuska
598*e92ffd9bSMartin Matuska /* decode literal length */
599*e92ffd9bSMartin Matuska if (length == RUN_MASK) {
600*e92ffd9bSMartin Matuska variable_length_error error = ok;
601*e92ffd9bSMartin Matuska length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
602*e92ffd9bSMartin Matuska if (error == initial_error) { goto _output_error; }
603*e92ffd9bSMartin Matuska if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
604*e92ffd9bSMartin Matuska if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
605*e92ffd9bSMartin Matuska
606eda14cbcSMatt Macy /* copy literals */
607eda14cbcSMatt Macy cpy = op+length;
608*e92ffd9bSMartin Matuska LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
609*e92ffd9bSMartin Matuska if (endOnInput) { /* LZ4_decompress_safe() */
610*e92ffd9bSMartin Matuska if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
611*e92ffd9bSMartin Matuska LZ4_wildCopy32(op, ip, cpy);
612*e92ffd9bSMartin Matuska } else { /* LZ4_decompress_fast() */
613*e92ffd9bSMartin Matuska if (cpy>oend-8) { goto safe_literal_copy; }
614*e92ffd9bSMartin Matuska LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
615*e92ffd9bSMartin Matuska * it doesn't know input length, and only relies on end-of-block properties */
616eda14cbcSMatt Macy }
617*e92ffd9bSMartin Matuska ip += length; op = cpy;
618*e92ffd9bSMartin Matuska } else {
619*e92ffd9bSMartin Matuska cpy = op+length;
620*e92ffd9bSMartin Matuska if (endOnInput) { /* LZ4_decompress_safe() */
621*e92ffd9bSMartin Matuska DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
622*e92ffd9bSMartin Matuska /* We don't need to check oend, since we check it once for each loop below */
623*e92ffd9bSMartin Matuska if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
624*e92ffd9bSMartin Matuska /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
625*e92ffd9bSMartin Matuska LZ4_memcpy(op, ip, 16);
626*e92ffd9bSMartin Matuska } else { /* LZ4_decompress_fast() */
627*e92ffd9bSMartin Matuska /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
628*e92ffd9bSMartin Matuska * it doesn't know input length, and relies on end-of-block properties */
629*e92ffd9bSMartin Matuska LZ4_memcpy(op, ip, 8);
630*e92ffd9bSMartin Matuska if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); }
631*e92ffd9bSMartin Matuska }
632*e92ffd9bSMartin Matuska ip += length; op = cpy;
633*e92ffd9bSMartin Matuska }
634eda14cbcSMatt Macy
635eda14cbcSMatt Macy /* get offset */
636*e92ffd9bSMartin Matuska offset = LZ4_readLE16(ip); ip+=2;
637*e92ffd9bSMartin Matuska match = op - offset;
638*e92ffd9bSMartin Matuska assert(match <= op);
639eda14cbcSMatt Macy
640eda14cbcSMatt Macy /* get matchlength */
641*e92ffd9bSMartin Matuska length = token & ML_MASK;
642*e92ffd9bSMartin Matuska
643*e92ffd9bSMartin Matuska if (length == ML_MASK) {
644*e92ffd9bSMartin Matuska variable_length_error error = ok;
645*e92ffd9bSMartin Matuska if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
646*e92ffd9bSMartin Matuska length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
647*e92ffd9bSMartin Matuska if (error != ok) { goto _output_error; }
648*e92ffd9bSMartin Matuska if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
649*e92ffd9bSMartin Matuska length += MINMATCH;
650*e92ffd9bSMartin Matuska if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
651*e92ffd9bSMartin Matuska goto safe_match_copy;
652*e92ffd9bSMartin Matuska }
653*e92ffd9bSMartin Matuska } else {
654*e92ffd9bSMartin Matuska length += MINMATCH;
655*e92ffd9bSMartin Matuska if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
656*e92ffd9bSMartin Matuska goto safe_match_copy;
657*e92ffd9bSMartin Matuska }
658*e92ffd9bSMartin Matuska
659*e92ffd9bSMartin Matuska /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */
660*e92ffd9bSMartin Matuska if ((dict == withPrefix64k) || (match >= lowPrefix)) {
661*e92ffd9bSMartin Matuska if (offset >= 8) {
662*e92ffd9bSMartin Matuska assert(match >= lowPrefix);
663*e92ffd9bSMartin Matuska assert(match <= op);
664*e92ffd9bSMartin Matuska assert(op + 18 <= oend);
665*e92ffd9bSMartin Matuska
666*e92ffd9bSMartin Matuska LZ4_memcpy(op, match, 8);
667*e92ffd9bSMartin Matuska LZ4_memcpy(op+8, match+8, 8);
668*e92ffd9bSMartin Matuska LZ4_memcpy(op+16, match+16, 2);
669*e92ffd9bSMartin Matuska op += length;
670eda14cbcSMatt Macy continue;
671*e92ffd9bSMartin Matuska } } }
672*e92ffd9bSMartin Matuska
673*e92ffd9bSMartin Matuska if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
674*e92ffd9bSMartin Matuska /* match starting within external dictionary */
675*e92ffd9bSMartin Matuska if ((dict==usingExtDict) && (match < lowPrefix)) {
676*e92ffd9bSMartin Matuska if (unlikely(op+length > oend-LASTLITERALS)) {
677*e92ffd9bSMartin Matuska if (partialDecoding) {
678*e92ffd9bSMartin Matuska DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
679*e92ffd9bSMartin Matuska length = MIN(length, (size_t)(oend-op));
680*e92ffd9bSMartin Matuska } else {
681*e92ffd9bSMartin Matuska goto _output_error; /* end-of-block condition violated */
682*e92ffd9bSMartin Matuska } }
683*e92ffd9bSMartin Matuska
684*e92ffd9bSMartin Matuska if (length <= (size_t)(lowPrefix-match)) {
685*e92ffd9bSMartin Matuska /* match fits entirely within external dictionary : just copy */
686*e92ffd9bSMartin Matuska memmove(op, dictEnd - (lowPrefix-match), length);
687*e92ffd9bSMartin Matuska op += length;
688*e92ffd9bSMartin Matuska } else {
689*e92ffd9bSMartin Matuska /* match stretches into both external dictionary and current block */
690*e92ffd9bSMartin Matuska size_t const copySize = (size_t)(lowPrefix - match);
691*e92ffd9bSMartin Matuska size_t const restSize = length - copySize;
692*e92ffd9bSMartin Matuska LZ4_memcpy(op, dictEnd - copySize, copySize);
693*e92ffd9bSMartin Matuska op += copySize;
694*e92ffd9bSMartin Matuska if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */
695*e92ffd9bSMartin Matuska BYTE* const endOfMatch = op + restSize;
696*e92ffd9bSMartin Matuska const BYTE* copyFrom = lowPrefix;
697*e92ffd9bSMartin Matuska while (op < endOfMatch) { *op++ = *copyFrom++; }
698*e92ffd9bSMartin Matuska } else {
699*e92ffd9bSMartin Matuska LZ4_memcpy(op, lowPrefix, restSize);
700*e92ffd9bSMartin Matuska op += restSize;
701*e92ffd9bSMartin Matuska } }
702*e92ffd9bSMartin Matuska continue;
703*e92ffd9bSMartin Matuska }
704*e92ffd9bSMartin Matuska
705*e92ffd9bSMartin Matuska /* copy match within block */
706*e92ffd9bSMartin Matuska cpy = op + length;
707*e92ffd9bSMartin Matuska
708*e92ffd9bSMartin Matuska assert((op <= oend) && (oend-op >= 32));
709*e92ffd9bSMartin Matuska if (unlikely(offset<16)) {
710*e92ffd9bSMartin Matuska LZ4_memcpy_using_offset(op, match, cpy, offset);
711*e92ffd9bSMartin Matuska } else {
712*e92ffd9bSMartin Matuska LZ4_wildCopy32(op, match, cpy);
713*e92ffd9bSMartin Matuska }
714*e92ffd9bSMartin Matuska
715*e92ffd9bSMartin Matuska op = cpy; /* wildcopy correction */
716*e92ffd9bSMartin Matuska }
717*e92ffd9bSMartin Matuska safe_decode:
718*e92ffd9bSMartin Matuska #endif
719*e92ffd9bSMartin Matuska
720*e92ffd9bSMartin Matuska /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
721*e92ffd9bSMartin Matuska while (1) {
722*e92ffd9bSMartin Matuska token = *ip++;
723*e92ffd9bSMartin Matuska length = token >> ML_BITS; /* literal length */
724*e92ffd9bSMartin Matuska
725*e92ffd9bSMartin Matuska assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
726*e92ffd9bSMartin Matuska
727*e92ffd9bSMartin Matuska /* A two-stage shortcut for the most common case:
728*e92ffd9bSMartin Matuska * 1) If the literal length is 0..14, and there is enough space,
729*e92ffd9bSMartin Matuska * enter the shortcut and copy 16 bytes on behalf of the literals
730*e92ffd9bSMartin Matuska * (in the fast mode, only 8 bytes can be safely copied this way).
731*e92ffd9bSMartin Matuska * 2) Further if the match length is 4..18, copy 18 bytes in a similar
732*e92ffd9bSMartin Matuska * manner; but we ensure that there's enough space in the output for
733*e92ffd9bSMartin Matuska * those 18 bytes earlier, upon entering the shortcut (in other words,
734*e92ffd9bSMartin Matuska * there is a combined check for both stages).
735*e92ffd9bSMartin Matuska */
736*e92ffd9bSMartin Matuska if ( (endOnInput ? length != RUN_MASK : length <= 8)
737*e92ffd9bSMartin Matuska /* strictly "less than" on input, to re-enter the loop with at least one byte */
738*e92ffd9bSMartin Matuska && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
739*e92ffd9bSMartin Matuska /* Copy the literals */
740*e92ffd9bSMartin Matuska LZ4_memcpy(op, ip, endOnInput ? 16 : 8);
741*e92ffd9bSMartin Matuska op += length; ip += length;
742*e92ffd9bSMartin Matuska
743*e92ffd9bSMartin Matuska /* The second stage: prepare for match copying, decode full info.
744*e92ffd9bSMartin Matuska * If it doesn't work out, the info won't be wasted. */
745*e92ffd9bSMartin Matuska length = token & ML_MASK; /* match length */
746*e92ffd9bSMartin Matuska offset = LZ4_readLE16(ip); ip += 2;
747*e92ffd9bSMartin Matuska match = op - offset;
748*e92ffd9bSMartin Matuska assert(match <= op); /* check overflow */
749*e92ffd9bSMartin Matuska
750*e92ffd9bSMartin Matuska /* Do not deal with overlapping matches. */
751*e92ffd9bSMartin Matuska if ( (length != ML_MASK)
752*e92ffd9bSMartin Matuska && (offset >= 8)
753*e92ffd9bSMartin Matuska && (dict==withPrefix64k || match >= lowPrefix) ) {
754*e92ffd9bSMartin Matuska /* Copy the match. */
755*e92ffd9bSMartin Matuska LZ4_memcpy(op + 0, match + 0, 8);
756*e92ffd9bSMartin Matuska LZ4_memcpy(op + 8, match + 8, 8);
757*e92ffd9bSMartin Matuska LZ4_memcpy(op +16, match +16, 2);
758*e92ffd9bSMartin Matuska op += length + MINMATCH;
759*e92ffd9bSMartin Matuska /* Both stages worked, load the next token. */
760*e92ffd9bSMartin Matuska continue;
761*e92ffd9bSMartin Matuska }
762*e92ffd9bSMartin Matuska
763*e92ffd9bSMartin Matuska /* The second stage didn't work out, but the info is ready.
764*e92ffd9bSMartin Matuska * Propel it right to the point of match copying. */
765*e92ffd9bSMartin Matuska goto _copy_match;
766*e92ffd9bSMartin Matuska }
767*e92ffd9bSMartin Matuska
768*e92ffd9bSMartin Matuska /* decode literal length */
769*e92ffd9bSMartin Matuska if (length == RUN_MASK) {
770*e92ffd9bSMartin Matuska variable_length_error error = ok;
771*e92ffd9bSMartin Matuska length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
772*e92ffd9bSMartin Matuska if (error == initial_error) { goto _output_error; }
773*e92ffd9bSMartin Matuska if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
774*e92ffd9bSMartin Matuska if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
775*e92ffd9bSMartin Matuska }
776*e92ffd9bSMartin Matuska
777*e92ffd9bSMartin Matuska /* copy literals */
778*e92ffd9bSMartin Matuska cpy = op+length;
779*e92ffd9bSMartin Matuska #if LZ4_FAST_DEC_LOOP
780*e92ffd9bSMartin Matuska safe_literal_copy:
781*e92ffd9bSMartin Matuska #endif
782*e92ffd9bSMartin Matuska LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
783*e92ffd9bSMartin Matuska if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
784*e92ffd9bSMartin Matuska || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
785*e92ffd9bSMartin Matuska {
786*e92ffd9bSMartin Matuska /* We've either hit the input parsing restriction or the output parsing restriction.
787*e92ffd9bSMartin Matuska * In the normal scenario, decoding a full block, it must be the last sequence,
788*e92ffd9bSMartin Matuska * otherwise it's an error (invalid input or dimensions).
789*e92ffd9bSMartin Matuska * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
790*e92ffd9bSMartin Matuska */
791*e92ffd9bSMartin Matuska if (partialDecoding) {
792*e92ffd9bSMartin Matuska /* Since we are partial decoding we may be in this block because of the output parsing
793*e92ffd9bSMartin Matuska * restriction, which is not valid since the output buffer is allowed to be undersized.
794*e92ffd9bSMartin Matuska */
795*e92ffd9bSMartin Matuska assert(endOnInput);
796*e92ffd9bSMartin Matuska DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
797*e92ffd9bSMartin Matuska DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
798*e92ffd9bSMartin Matuska DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
799*e92ffd9bSMartin Matuska DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
800*e92ffd9bSMartin Matuska /* Finishing in the middle of a literals segment,
801*e92ffd9bSMartin Matuska * due to lack of input.
802*e92ffd9bSMartin Matuska */
803*e92ffd9bSMartin Matuska if (ip+length > iend) {
804*e92ffd9bSMartin Matuska length = (size_t)(iend-ip);
805*e92ffd9bSMartin Matuska cpy = op + length;
806*e92ffd9bSMartin Matuska }
807*e92ffd9bSMartin Matuska /* Finishing in the middle of a literals segment,
808*e92ffd9bSMartin Matuska * due to lack of output space.
809*e92ffd9bSMartin Matuska */
810*e92ffd9bSMartin Matuska if (cpy > oend) {
811*e92ffd9bSMartin Matuska cpy = oend;
812*e92ffd9bSMartin Matuska assert(op<=oend);
813*e92ffd9bSMartin Matuska length = (size_t)(oend-op);
814*e92ffd9bSMartin Matuska }
815*e92ffd9bSMartin Matuska } else {
816*e92ffd9bSMartin Matuska /* We must be on the last sequence because of the parsing limitations so check
817*e92ffd9bSMartin Matuska * that we exactly regenerate the original size (must be exact when !endOnInput).
818*e92ffd9bSMartin Matuska */
819*e92ffd9bSMartin Matuska if ((!endOnInput) && (cpy != oend)) { goto _output_error; }
820*e92ffd9bSMartin Matuska /* We must be on the last sequence (or invalid) because of the parsing limitations
821*e92ffd9bSMartin Matuska * so check that we exactly consume the input and don't overrun the output buffer.
822*e92ffd9bSMartin Matuska */
823*e92ffd9bSMartin Matuska if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) {
824*e92ffd9bSMartin Matuska DEBUGLOG(6, "should have been last run of literals")
825*e92ffd9bSMartin Matuska DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
826*e92ffd9bSMartin Matuska DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
827*e92ffd9bSMartin Matuska goto _output_error;
828*e92ffd9bSMartin Matuska }
829*e92ffd9bSMartin Matuska }
830*e92ffd9bSMartin Matuska memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */
831*e92ffd9bSMartin Matuska ip += length;
832*e92ffd9bSMartin Matuska op += length;
833*e92ffd9bSMartin Matuska /* Necessarily EOF when !partialDecoding.
834*e92ffd9bSMartin Matuska * When partialDecoding, it is EOF if we've either
835*e92ffd9bSMartin Matuska * filled the output buffer or
836*e92ffd9bSMartin Matuska * can't proceed with reading an offset for following match.
837*e92ffd9bSMartin Matuska */
838*e92ffd9bSMartin Matuska if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
839eda14cbcSMatt Macy break;
840eda14cbcSMatt Macy }
841eda14cbcSMatt Macy } else {
842*e92ffd9bSMartin Matuska LZ4_wildCopy8(op, ip, cpy); /* may overwrite up to WILDCOPYLENGTH beyond cpy */
843*e92ffd9bSMartin Matuska ip += length; op = cpy;
844eda14cbcSMatt Macy }
845*e92ffd9bSMartin Matuska
846*e92ffd9bSMartin Matuska /* get offset */
847*e92ffd9bSMartin Matuska offset = LZ4_readLE16(ip); ip+=2;
848*e92ffd9bSMartin Matuska match = op - offset;
849*e92ffd9bSMartin Matuska
850*e92ffd9bSMartin Matuska /* get matchlength */
851*e92ffd9bSMartin Matuska length = token & ML_MASK;
852*e92ffd9bSMartin Matuska
853*e92ffd9bSMartin Matuska _copy_match:
854*e92ffd9bSMartin Matuska if (length == ML_MASK) {
855*e92ffd9bSMartin Matuska variable_length_error error = ok;
856*e92ffd9bSMartin Matuska length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
857*e92ffd9bSMartin Matuska if (error != ok) goto _output_error;
858*e92ffd9bSMartin Matuska if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */
859*e92ffd9bSMartin Matuska }
860*e92ffd9bSMartin Matuska length += MINMATCH;
861*e92ffd9bSMartin Matuska
862*e92ffd9bSMartin Matuska #if LZ4_FAST_DEC_LOOP
863*e92ffd9bSMartin Matuska safe_match_copy:
864eda14cbcSMatt Macy #endif
865*e92ffd9bSMartin Matuska if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */
866*e92ffd9bSMartin Matuska /* match starting within external dictionary */
867*e92ffd9bSMartin Matuska if ((dict==usingExtDict) && (match < lowPrefix)) {
868*e92ffd9bSMartin Matuska if (unlikely(op+length > oend-LASTLITERALS)) {
869*e92ffd9bSMartin Matuska if (partialDecoding) length = MIN(length, (size_t)(oend-op));
870*e92ffd9bSMartin Matuska else goto _output_error; /* doesn't respect parsing restriction */
871*e92ffd9bSMartin Matuska }
872*e92ffd9bSMartin Matuska
873*e92ffd9bSMartin Matuska if (length <= (size_t)(lowPrefix-match)) {
874*e92ffd9bSMartin Matuska /* match fits entirely within external dictionary : just copy */
875*e92ffd9bSMartin Matuska memmove(op, dictEnd - (lowPrefix-match), length);
876*e92ffd9bSMartin Matuska op += length;
877*e92ffd9bSMartin Matuska } else {
878*e92ffd9bSMartin Matuska /* match stretches into both external dictionary and current block */
879*e92ffd9bSMartin Matuska size_t const copySize = (size_t)(lowPrefix - match);
880*e92ffd9bSMartin Matuska size_t const restSize = length - copySize;
881*e92ffd9bSMartin Matuska LZ4_memcpy(op, dictEnd - copySize, copySize);
882*e92ffd9bSMartin Matuska op += copySize;
883*e92ffd9bSMartin Matuska if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */
884*e92ffd9bSMartin Matuska BYTE* const endOfMatch = op + restSize;
885*e92ffd9bSMartin Matuska const BYTE* copyFrom = lowPrefix;
886*e92ffd9bSMartin Matuska while (op < endOfMatch) *op++ = *copyFrom++;
887*e92ffd9bSMartin Matuska } else {
888*e92ffd9bSMartin Matuska LZ4_memcpy(op, lowPrefix, restSize);
889*e92ffd9bSMartin Matuska op += restSize;
890*e92ffd9bSMartin Matuska } }
891eda14cbcSMatt Macy continue;
892eda14cbcSMatt Macy }
893*e92ffd9bSMartin Matuska assert(match >= lowPrefix);
894*e92ffd9bSMartin Matuska
895*e92ffd9bSMartin Matuska /* copy match within block */
896*e92ffd9bSMartin Matuska cpy = op + length;
897*e92ffd9bSMartin Matuska
898*e92ffd9bSMartin Matuska /* partialDecoding : may end anywhere within the block */
899*e92ffd9bSMartin Matuska assert(op<=oend);
900*e92ffd9bSMartin Matuska if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
901*e92ffd9bSMartin Matuska size_t const mlen = MIN(length, (size_t)(oend-op));
902*e92ffd9bSMartin Matuska const BYTE* const matchEnd = match + mlen;
903*e92ffd9bSMartin Matuska BYTE* const copyEnd = op + mlen;
904*e92ffd9bSMartin Matuska if (matchEnd > op) { /* overlap copy */
905*e92ffd9bSMartin Matuska while (op < copyEnd) { *op++ = *match++; }
906*e92ffd9bSMartin Matuska } else {
907*e92ffd9bSMartin Matuska LZ4_memcpy(op, match, mlen);
908*e92ffd9bSMartin Matuska }
909*e92ffd9bSMartin Matuska op = copyEnd;
910*e92ffd9bSMartin Matuska if (op == oend) { break; }
911*e92ffd9bSMartin Matuska continue;
912*e92ffd9bSMartin Matuska }
913*e92ffd9bSMartin Matuska
914*e92ffd9bSMartin Matuska if (unlikely(offset<8)) {
915*e92ffd9bSMartin Matuska LZ4_write32(op, 0); /* silence msan warning when offset==0 */
916*e92ffd9bSMartin Matuska op[0] = match[0];
917*e92ffd9bSMartin Matuska op[1] = match[1];
918*e92ffd9bSMartin Matuska op[2] = match[2];
919*e92ffd9bSMartin Matuska op[3] = match[3];
920*e92ffd9bSMartin Matuska match += inc32table[offset];
921*e92ffd9bSMartin Matuska LZ4_memcpy(op+4, match, 4);
922*e92ffd9bSMartin Matuska match -= dec64table[offset];
923*e92ffd9bSMartin Matuska } else {
924*e92ffd9bSMartin Matuska LZ4_memcpy(op, match, 8);
925*e92ffd9bSMartin Matuska match += 8;
926*e92ffd9bSMartin Matuska }
927*e92ffd9bSMartin Matuska op += 8;
928*e92ffd9bSMartin Matuska
929*e92ffd9bSMartin Matuska if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
930*e92ffd9bSMartin Matuska BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
931*e92ffd9bSMartin Matuska if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
932*e92ffd9bSMartin Matuska if (op < oCopyLimit) {
933*e92ffd9bSMartin Matuska LZ4_wildCopy8(op, match, oCopyLimit);
934*e92ffd9bSMartin Matuska match += oCopyLimit - op;
935*e92ffd9bSMartin Matuska op = oCopyLimit;
936*e92ffd9bSMartin Matuska }
937*e92ffd9bSMartin Matuska while (op < cpy) { *op++ = *match++; }
938*e92ffd9bSMartin Matuska } else {
939*e92ffd9bSMartin Matuska LZ4_memcpy(op, match, 8);
940*e92ffd9bSMartin Matuska if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); }
941*e92ffd9bSMartin Matuska }
942*e92ffd9bSMartin Matuska op = cpy; /* wildcopy correction */
943eda14cbcSMatt Macy }
944eda14cbcSMatt Macy
945eda14cbcSMatt Macy /* end of decoding */
946*e92ffd9bSMartin Matuska if (endOnInput) {
947*e92ffd9bSMartin Matuska DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
948*e92ffd9bSMartin Matuska return (int) (((char*)op)-dst); /* Nb of output bytes decoded */
949*e92ffd9bSMartin Matuska } else {
950*e92ffd9bSMartin Matuska return (int) (((const char*)ip)-src); /* Nb of input bytes read */
951*e92ffd9bSMartin Matuska }
952eda14cbcSMatt Macy
953*e92ffd9bSMartin Matuska /* Overflow error detected */
954eda14cbcSMatt Macy _output_error:
955*e92ffd9bSMartin Matuska return (int) (-(((const char*)ip)-src))-1;
956*e92ffd9bSMartin Matuska }
957eda14cbcSMatt Macy }
958eda14cbcSMatt Macy
959aebc9683SMateusz Guzik /*
960*e92ffd9bSMartin Matuska * LZ4_uncompress_unknownOutputSize() :
961*e92ffd9bSMartin Matuska * isize : is the input size, therefore the compressed size
962*e92ffd9bSMartin Matuska * maxOutputSize : is the size of the destination buffer (which must be
963*e92ffd9bSMartin Matuska * already allocated)
964*e92ffd9bSMartin Matuska * return : the number of bytes decoded in the destination buffer
965*e92ffd9bSMartin Matuska * (necessarily <= maxOutputSize). If the source stream is
966*e92ffd9bSMartin Matuska * malformed, the function will stop decoding and return a
967*e92ffd9bSMartin Matuska * negative result, indicating the byte position of the faulty
968*e92ffd9bSMartin Matuska * instruction. This function never writes beyond dest +
969*e92ffd9bSMartin Matuska * maxOutputSize, and is therefore protected against malicious
970*e92ffd9bSMartin Matuska * data packets.
971*e92ffd9bSMartin Matuska * note : Destination buffer must be already allocated.
972*e92ffd9bSMartin Matuska * This version is slightly slower than real_LZ4_uncompress()
973*e92ffd9bSMartin Matuska *
974aebc9683SMateusz Guzik */
975aebc9683SMateusz Guzik
976*e92ffd9bSMartin Matuska /*
977*e92ffd9bSMartin Matuska * Note: In upstream code, LZ4_uncompress_unknownOutputSize is now a legacy
978*e92ffd9bSMartin Matuska * wrapper for LZ4_decompress_safe which is a wrapper for
979*e92ffd9bSMartin Matuska * LZ4_decompress_generic; this wrapper flattens that, rather than
980*e92ffd9bSMartin Matuska * rewriting the callers.
981*e92ffd9bSMartin Matuska */
LZ4_uncompress_unknownOutputSize(const char * source,char * dest,int compressedSize,int maxDecompressedSize)982*e92ffd9bSMartin Matuska int LZ4_uncompress_unknownOutputSize(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
983eda14cbcSMatt Macy {
984*e92ffd9bSMartin Matuska return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
985*e92ffd9bSMartin Matuska endOnInputSize, decode_full_block, noDict,
986*e92ffd9bSMartin Matuska (BYTE*)dest, NULL, 0);
987eda14cbcSMatt Macy }
988