xref: /freebsd-src/sys/contrib/openzfs/module/zfs/lz4_zfs.c (revision e2df9bb44109577475aeb186e7186ac040f9bde1)
1e92ffd9bSMartin Matuska /*
2e92ffd9bSMartin Matuska  * LZ4 - Fast LZ compression algorithm
3e92ffd9bSMartin Matuska  * Header File
4e92ffd9bSMartin Matuska  * Copyright (C) 2011-2013, Yann Collet.
5e92ffd9bSMartin Matuska  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6e92ffd9bSMartin Matuska  *
7e92ffd9bSMartin Matuska  * Redistribution and use in source and binary forms, with or without
8e92ffd9bSMartin Matuska  * modification, are permitted provided that the following conditions are
9e92ffd9bSMartin Matuska  * met:
10e92ffd9bSMartin Matuska  *
11e92ffd9bSMartin Matuska  *     * Redistributions of source code must retain the above copyright
12e92ffd9bSMartin Matuska  * notice, this list of conditions and the following disclaimer.
13e92ffd9bSMartin Matuska  *     * Redistributions in binary form must reproduce the above
14e92ffd9bSMartin Matuska  * copyright notice, this list of conditions and the following disclaimer
15e92ffd9bSMartin Matuska  * in the documentation and/or other materials provided with the
16e92ffd9bSMartin Matuska  * distribution.
17e92ffd9bSMartin Matuska  *
18e92ffd9bSMartin Matuska  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19e92ffd9bSMartin Matuska  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20e92ffd9bSMartin Matuska  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21e92ffd9bSMartin Matuska  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22e92ffd9bSMartin Matuska  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23e92ffd9bSMartin Matuska  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24e92ffd9bSMartin Matuska  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25e92ffd9bSMartin Matuska  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26e92ffd9bSMartin Matuska  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27e92ffd9bSMartin Matuska  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28e92ffd9bSMartin Matuska  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29e92ffd9bSMartin Matuska  *
30e92ffd9bSMartin Matuska  * You can contact the author at :
31e92ffd9bSMartin Matuska  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32e92ffd9bSMartin Matuska  * - LZ4 source repository : http://code.google.com/p/lz4/
33e92ffd9bSMartin Matuska  */
34e92ffd9bSMartin Matuska 
35e92ffd9bSMartin Matuska /*
36e92ffd9bSMartin Matuska  * N.B. - This file seems to be based on LZ4 r85, dated Dec 10, 2012
37e92ffd9bSMartin Matuska  */
38e92ffd9bSMartin Matuska 
39e92ffd9bSMartin Matuska #include <sys/zfs_context.h>
40e92ffd9bSMartin Matuska #include <sys/zio_compress.h>
41e92ffd9bSMartin Matuska 
42e92ffd9bSMartin Matuska static int real_LZ4_compress(const char *source, char *dest, int isize,
43e92ffd9bSMartin Matuska     int osize);
44e92ffd9bSMartin Matuska static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
45e92ffd9bSMartin Matuska     int isize, int osize);
46e92ffd9bSMartin Matuska static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
47e92ffd9bSMartin Matuska     int isize, int osize);
48e92ffd9bSMartin Matuska 
49e92ffd9bSMartin Matuska /* See lz4.c */
50e92ffd9bSMartin Matuska int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
51e92ffd9bSMartin Matuska     int isize, int maxOutputSize);
52e92ffd9bSMartin Matuska 
53e92ffd9bSMartin Matuska static void *lz4_alloc(int flags);
54e92ffd9bSMartin Matuska static void lz4_free(void *ctx);
55e92ffd9bSMartin Matuska 
56*e2df9bb4SMartin Matuska static size_t
57*e2df9bb4SMartin Matuska zfs_lz4_compress_buf(void *s_start, void *d_start, size_t s_len,
58e92ffd9bSMartin Matuska     size_t d_len, int n)
59e92ffd9bSMartin Matuska {
60e92ffd9bSMartin Matuska 	(void) n;
61e92ffd9bSMartin Matuska 	uint32_t bufsiz;
62e92ffd9bSMartin Matuska 	char *dest = d_start;
63e92ffd9bSMartin Matuska 
64e92ffd9bSMartin Matuska 	ASSERT(d_len >= sizeof (bufsiz));
65e92ffd9bSMartin Matuska 
66e92ffd9bSMartin Matuska 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
67e92ffd9bSMartin Matuska 	    d_len - sizeof (bufsiz));
68e92ffd9bSMartin Matuska 
69e92ffd9bSMartin Matuska 	/* Signal an error if the compression routine returned zero. */
70e92ffd9bSMartin Matuska 	if (bufsiz == 0)
71e92ffd9bSMartin Matuska 		return (s_len);
72e92ffd9bSMartin Matuska 
73e92ffd9bSMartin Matuska 	/*
74e92ffd9bSMartin Matuska 	 * The exact compressed size is needed by the decompression routine,
75e92ffd9bSMartin Matuska 	 * so it is stored at the start of the buffer. Note that this may be
76e92ffd9bSMartin Matuska 	 * less than the compressed block size, which is rounded up to a
77e92ffd9bSMartin Matuska 	 * multiple of 1<<ashift.
78e92ffd9bSMartin Matuska 	 */
79e92ffd9bSMartin Matuska 	*(uint32_t *)dest = BE_32(bufsiz);
80e92ffd9bSMartin Matuska 
81e92ffd9bSMartin Matuska 	return (bufsiz + sizeof (bufsiz));
82e92ffd9bSMartin Matuska }
83e92ffd9bSMartin Matuska 
84*e2df9bb4SMartin Matuska static int
85*e2df9bb4SMartin Matuska zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len,
86e92ffd9bSMartin Matuska     size_t d_len, int n)
87e92ffd9bSMartin Matuska {
88e92ffd9bSMartin Matuska 	(void) n;
89e92ffd9bSMartin Matuska 	const char *src = s_start;
90e92ffd9bSMartin Matuska 	uint32_t bufsiz = BE_IN32(src);
91e92ffd9bSMartin Matuska 
92e92ffd9bSMartin Matuska 	/* invalid compressed buffer size encoded at start */
93e92ffd9bSMartin Matuska 	if (bufsiz + sizeof (bufsiz) > s_len)
94e92ffd9bSMartin Matuska 		return (1);
95e92ffd9bSMartin Matuska 
96e92ffd9bSMartin Matuska 	/*
97e92ffd9bSMartin Matuska 	 * Returns 0 on success (decompression function returned non-negative)
98e92ffd9bSMartin Matuska 	 * and non-zero on failure (decompression function returned negative).
99e92ffd9bSMartin Matuska 	 */
100e92ffd9bSMartin Matuska 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
101e92ffd9bSMartin Matuska 	    d_start, bufsiz, d_len) < 0);
102e92ffd9bSMartin Matuska }
103e92ffd9bSMartin Matuska 
104*e2df9bb4SMartin Matuska ZFS_COMPRESS_WRAP_DECL(zfs_lz4_compress)
105*e2df9bb4SMartin Matuska ZFS_DECOMPRESS_WRAP_DECL(zfs_lz4_decompress)
106*e2df9bb4SMartin Matuska 
107e92ffd9bSMartin Matuska /*
108e92ffd9bSMartin Matuska  * LZ4 API Description:
109e92ffd9bSMartin Matuska  *
110e92ffd9bSMartin Matuska  * Simple Functions:
111e92ffd9bSMartin Matuska  * real_LZ4_compress() :
112e92ffd9bSMartin Matuska  * 	isize  : is the input size. Max supported value is ~1.9GB
113e92ffd9bSMartin Matuska  * 	return : the number of bytes written in buffer dest
114e92ffd9bSMartin Matuska  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
115e92ffd9bSMartin Matuska  * 	note : destination buffer must be already allocated.
116e92ffd9bSMartin Matuska  * 		destination buffer must be sized to handle worst cases
117e92ffd9bSMartin Matuska  * 		situations (input data not compressible) worst case size
118e92ffd9bSMartin Matuska  * 		evaluation is provided by function LZ4_compressBound().
119e92ffd9bSMartin Matuska  *
120e92ffd9bSMartin Matuska  * real_LZ4_uncompress() :
121e92ffd9bSMartin Matuska  * 	osize  : is the output size, therefore the original size
122e92ffd9bSMartin Matuska  * 	return : the number of bytes read in the source buffer.
123e92ffd9bSMartin Matuska  * 		If the source stream is malformed, the function will stop
124e92ffd9bSMartin Matuska  * 		decoding and return a negative result, indicating the byte
125e92ffd9bSMartin Matuska  * 		position of the faulty instruction. This function never
126e92ffd9bSMartin Matuska  * 		writes beyond dest + osize, and is therefore protected
127e92ffd9bSMartin Matuska  * 		against malicious data packets.
128e92ffd9bSMartin Matuska  * 	note : destination buffer must be already allocated
129e92ffd9bSMartin Matuska  *	note : real_LZ4_uncompress() is not used in ZFS so its code
130e92ffd9bSMartin Matuska  *	       is not present here.
131e92ffd9bSMartin Matuska  *
132e92ffd9bSMartin Matuska  * Advanced Functions
133e92ffd9bSMartin Matuska  *
134e92ffd9bSMartin Matuska  * LZ4_compressBound() :
135e92ffd9bSMartin Matuska  * 	Provides the maximum size that LZ4 may output in a "worst case"
136e92ffd9bSMartin Matuska  * 	scenario (input data not compressible) primarily useful for memory
137e92ffd9bSMartin Matuska  * 	allocation of output buffer.
138e92ffd9bSMartin Matuska  *
139e92ffd9bSMartin Matuska  * 	isize  : is the input size. Max supported value is ~1.9GB
140e92ffd9bSMartin Matuska  * 	return : maximum output size in a "worst case" scenario
141e92ffd9bSMartin Matuska  * 	note : this function is limited by "int" range (2^31-1)
142e92ffd9bSMartin Matuska  *
143e92ffd9bSMartin Matuska  * LZ4_uncompress_unknownOutputSize() :
144e92ffd9bSMartin Matuska  * 	isize  : is the input size, therefore the compressed size
145e92ffd9bSMartin Matuska  * 	maxOutputSize : is the size of the destination buffer (which must be
146e92ffd9bSMartin Matuska  * 		already allocated)
147e92ffd9bSMartin Matuska  * 	return : the number of bytes decoded in the destination buffer
148e92ffd9bSMartin Matuska  * 		(necessarily <= maxOutputSize). If the source stream is
149e92ffd9bSMartin Matuska  * 		malformed, the function will stop decoding and return a
150e92ffd9bSMartin Matuska  * 		negative result, indicating the byte position of the faulty
151e92ffd9bSMartin Matuska  * 		instruction. This function never writes beyond dest +
152e92ffd9bSMartin Matuska  * 		maxOutputSize, and is therefore protected against malicious
153e92ffd9bSMartin Matuska  * 		data packets.
154e92ffd9bSMartin Matuska  * 	note   : Destination buffer must be already allocated.
155e92ffd9bSMartin Matuska  *		This version is slightly slower than real_LZ4_uncompress()
156e92ffd9bSMartin Matuska  *
157e92ffd9bSMartin Matuska  * LZ4_compressCtx() :
158e92ffd9bSMartin Matuska  * 	This function explicitly handles the CTX memory structure.
159e92ffd9bSMartin Matuska  *
160e92ffd9bSMartin Matuska  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
161e92ffd9bSMartin Matuska  * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
162e92ffd9bSMartin Matuska  * 	NULL isn't valid.
163e92ffd9bSMartin Matuska  *
164e92ffd9bSMartin Matuska  * LZ4_compress64kCtx() :
165e92ffd9bSMartin Matuska  * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
166e92ffd9bSMartin Matuska  * 	isize *Must* be <64KB, otherwise the output will be corrupted.
167e92ffd9bSMartin Matuska  *
168e92ffd9bSMartin Matuska  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
169e92ffd9bSMartin Matuska  * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
170e92ffd9bSMartin Matuska  * 	NULL isn't valid.
171e92ffd9bSMartin Matuska  */
172e92ffd9bSMartin Matuska 
173e92ffd9bSMartin Matuska /*
174e92ffd9bSMartin Matuska  * Tuning parameters
175e92ffd9bSMartin Matuska  */
176e92ffd9bSMartin Matuska 
177e92ffd9bSMartin Matuska /*
178e92ffd9bSMartin Matuska  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
179e92ffd9bSMartin Matuska  *	 Lowering this value reduces memory usage. Reduced memory usage
180e92ffd9bSMartin Matuska  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
181e92ffd9bSMartin Matuska  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
182e92ffd9bSMartin Matuska  *	(examples : 12 -> 16KB ; 17 -> 512KB)
183e92ffd9bSMartin Matuska  */
184e92ffd9bSMartin Matuska #define	COMPRESSIONLEVEL 12
185e92ffd9bSMartin Matuska 
186e92ffd9bSMartin Matuska /*
187e92ffd9bSMartin Matuska  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
188e92ffd9bSMartin Matuska  *	algorithm skip faster data segments considered "incompressible".
189e92ffd9bSMartin Matuska  *	This may decrease compression ratio dramatically, but will be
190e92ffd9bSMartin Matuska  *	faster on incompressible data. Increasing this value will make
191e92ffd9bSMartin Matuska  *	the algorithm search more before declaring a segment "incompressible".
192e92ffd9bSMartin Matuska  *	This could improve compression a bit, but will be slower on
193e92ffd9bSMartin Matuska  *	incompressible data. The default value (6) is recommended.
194e92ffd9bSMartin Matuska  */
195e92ffd9bSMartin Matuska #define	NOTCOMPRESSIBLE_CONFIRMATION 6
196e92ffd9bSMartin Matuska 
197e92ffd9bSMartin Matuska /*
198e92ffd9bSMartin Matuska  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
199e92ffd9bSMartin Matuska  * performance for big endian cpu, but the resulting compressed stream
200e92ffd9bSMartin Matuska  * will be incompatible with little-endian CPU. You can set this option
201e92ffd9bSMartin Matuska  * to 1 in situations where data will stay within closed environment.
202e92ffd9bSMartin Matuska  * This option is useless on Little_Endian CPU (such as x86).
203e92ffd9bSMartin Matuska  */
204e92ffd9bSMartin Matuska /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
205e92ffd9bSMartin Matuska 
206e92ffd9bSMartin Matuska /*
207e92ffd9bSMartin Matuska  * CPU Feature Detection
208e92ffd9bSMartin Matuska  */
209e92ffd9bSMartin Matuska 
210e92ffd9bSMartin Matuska /* 32 or 64 bits ? */
211e92ffd9bSMartin Matuska #if defined(_LP64)
212e92ffd9bSMartin Matuska #define	LZ4_ARCH64 1
213e92ffd9bSMartin Matuska #else
214e92ffd9bSMartin Matuska #define	LZ4_ARCH64 0
215e92ffd9bSMartin Matuska #endif
216e92ffd9bSMartin Matuska 
217e92ffd9bSMartin Matuska /*
218e92ffd9bSMartin Matuska  * Little Endian or Big Endian?
219e92ffd9bSMartin Matuska  * Note: overwrite the below #define if you know your architecture endianness.
220e92ffd9bSMartin Matuska  */
221e92ffd9bSMartin Matuska #if defined(_ZFS_BIG_ENDIAN)
222e92ffd9bSMartin Matuska #define	LZ4_BIG_ENDIAN 1
223e92ffd9bSMartin Matuska #else
224e92ffd9bSMartin Matuska /*
225e92ffd9bSMartin Matuska  * Little Endian assumed. PDP Endian and other very rare endian format
226e92ffd9bSMartin Matuska  * are unsupported.
227e92ffd9bSMartin Matuska  */
228e92ffd9bSMartin Matuska #undef LZ4_BIG_ENDIAN
229e92ffd9bSMartin Matuska #endif
230e92ffd9bSMartin Matuska 
231e92ffd9bSMartin Matuska /*
232e92ffd9bSMartin Matuska  * Unaligned memory access is automatically enabled for "common" CPU,
233e92ffd9bSMartin Matuska  * such as x86. For others CPU, the compiler will be more cautious, and
234e92ffd9bSMartin Matuska  * insert extra code to ensure aligned access is respected. If you know
235e92ffd9bSMartin Matuska  * your target CPU supports unaligned memory access, you may want to
236e92ffd9bSMartin Matuska  * force this option manually to improve performance
237e92ffd9bSMartin Matuska  */
238e92ffd9bSMartin Matuska #if defined(__ARM_FEATURE_UNALIGNED)
239e92ffd9bSMartin Matuska #define	LZ4_FORCE_UNALIGNED_ACCESS 1
240e92ffd9bSMartin Matuska #endif
241e92ffd9bSMartin Matuska 
242e92ffd9bSMartin Matuska /*
243e92ffd9bSMartin Matuska  * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
244e92ffd9bSMartin Matuska  * kernel
245e92ffd9bSMartin Matuska  * Linux : we can use GCC's __builtin_ctz family of builtins in the
246e92ffd9bSMartin Matuska  * kernel
247e92ffd9bSMartin Matuska  */
248e92ffd9bSMartin Matuska #undef	LZ4_FORCE_SW_BITCOUNT
249e92ffd9bSMartin Matuska #if defined(__sparc)
250e92ffd9bSMartin Matuska #define	LZ4_FORCE_SW_BITCOUNT
251e92ffd9bSMartin Matuska #endif
252e92ffd9bSMartin Matuska 
253e92ffd9bSMartin Matuska /*
254e92ffd9bSMartin Matuska  * Compiler Options
255e92ffd9bSMartin Matuska  */
256e92ffd9bSMartin Matuska /* Disable restrict */
257e92ffd9bSMartin Matuska #define	restrict
258e92ffd9bSMartin Matuska 
259e92ffd9bSMartin Matuska /*
260e92ffd9bSMartin Matuska  * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
261e92ffd9bSMartin Matuska  * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
262e92ffd9bSMartin Matuska  */
263e92ffd9bSMartin Matuska #ifdef GCC_VERSION
264e92ffd9bSMartin Matuska #undef GCC_VERSION
265e92ffd9bSMartin Matuska #endif
266e92ffd9bSMartin Matuska 
267e92ffd9bSMartin Matuska #define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
268e92ffd9bSMartin Matuska 
269e92ffd9bSMartin Matuska #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
270e92ffd9bSMartin Matuska #define	expect(expr, value)    (__builtin_expect((expr), (value)))
271e92ffd9bSMartin Matuska #else
272e92ffd9bSMartin Matuska #define	expect(expr, value)    (expr)
273e92ffd9bSMartin Matuska #endif
274e92ffd9bSMartin Matuska 
275e92ffd9bSMartin Matuska #ifndef likely
276e92ffd9bSMartin Matuska #define	likely(expr)	expect((expr) != 0, 1)
277e92ffd9bSMartin Matuska #endif
278e92ffd9bSMartin Matuska 
279e92ffd9bSMartin Matuska #ifndef unlikely
280e92ffd9bSMartin Matuska #define	unlikely(expr)	expect((expr) != 0, 0)
281e92ffd9bSMartin Matuska #endif
282e92ffd9bSMartin Matuska 
283e92ffd9bSMartin Matuska #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
284e92ffd9bSMartin Matuska 	(((x) & 0xffu) << 8)))
285e92ffd9bSMartin Matuska 
286e92ffd9bSMartin Matuska /* Basic types */
287e92ffd9bSMartin Matuska #define	BYTE	uint8_t
288e92ffd9bSMartin Matuska #define	U16	uint16_t
289e92ffd9bSMartin Matuska #define	U32	uint32_t
290e92ffd9bSMartin Matuska #define	S32	int32_t
291e92ffd9bSMartin Matuska #define	U64	uint64_t
292e92ffd9bSMartin Matuska 
293e92ffd9bSMartin Matuska #ifndef LZ4_FORCE_UNALIGNED_ACCESS
294e92ffd9bSMartin Matuska #pragma pack(1)
295e92ffd9bSMartin Matuska #endif
296e92ffd9bSMartin Matuska 
297e92ffd9bSMartin Matuska typedef struct _U16_S {
298e92ffd9bSMartin Matuska 	U16 v;
299e92ffd9bSMartin Matuska } U16_S;
300e92ffd9bSMartin Matuska typedef struct _U32_S {
301e92ffd9bSMartin Matuska 	U32 v;
302e92ffd9bSMartin Matuska } U32_S;
303e92ffd9bSMartin Matuska typedef struct _U64_S {
304e92ffd9bSMartin Matuska 	U64 v;
305e92ffd9bSMartin Matuska } U64_S;
306e92ffd9bSMartin Matuska 
307e92ffd9bSMartin Matuska #ifndef LZ4_FORCE_UNALIGNED_ACCESS
308e92ffd9bSMartin Matuska #pragma pack()
309e92ffd9bSMartin Matuska #endif
310e92ffd9bSMartin Matuska 
311e92ffd9bSMartin Matuska #define	A64(x) (((U64_S *)(x))->v)
312e92ffd9bSMartin Matuska #define	A32(x) (((U32_S *)(x))->v)
313e92ffd9bSMartin Matuska #define	A16(x) (((U16_S *)(x))->v)
314e92ffd9bSMartin Matuska 
315e92ffd9bSMartin Matuska /*
316e92ffd9bSMartin Matuska  * Constants
317e92ffd9bSMartin Matuska  */
318e92ffd9bSMartin Matuska #define	MINMATCH 4
319e92ffd9bSMartin Matuska 
320e92ffd9bSMartin Matuska #define	HASH_LOG COMPRESSIONLEVEL
321e92ffd9bSMartin Matuska #define	HASHTABLESIZE (1 << HASH_LOG)
322e92ffd9bSMartin Matuska #define	HASH_MASK (HASHTABLESIZE - 1)
323e92ffd9bSMartin Matuska 
324e92ffd9bSMartin Matuska #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
325e92ffd9bSMartin Matuska 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
326e92ffd9bSMartin Matuska 
327e92ffd9bSMartin Matuska #define	COPYLENGTH 8
328e92ffd9bSMartin Matuska #define	LASTLITERALS 5
329e92ffd9bSMartin Matuska #define	MFLIMIT (COPYLENGTH + MINMATCH)
330e92ffd9bSMartin Matuska #define	MINLENGTH (MFLIMIT + 1)
331e92ffd9bSMartin Matuska 
332e92ffd9bSMartin Matuska #define	MAXD_LOG 16
333e92ffd9bSMartin Matuska #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
334e92ffd9bSMartin Matuska 
335e92ffd9bSMartin Matuska #define	ML_BITS 4
336e92ffd9bSMartin Matuska #define	ML_MASK ((1U<<ML_BITS)-1)
337e92ffd9bSMartin Matuska #define	RUN_BITS (8-ML_BITS)
338e92ffd9bSMartin Matuska #define	RUN_MASK ((1U<<RUN_BITS)-1)
339e92ffd9bSMartin Matuska 
340e92ffd9bSMartin Matuska 
341e92ffd9bSMartin Matuska /*
342e92ffd9bSMartin Matuska  * Architecture-specific macros
343e92ffd9bSMartin Matuska  */
344e92ffd9bSMartin Matuska #if LZ4_ARCH64
345e92ffd9bSMartin Matuska #define	STEPSIZE 8
346e92ffd9bSMartin Matuska #define	UARCH U64
347e92ffd9bSMartin Matuska #define	AARCH A64
348e92ffd9bSMartin Matuska #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
349e92ffd9bSMartin Matuska #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
350e92ffd9bSMartin Matuska #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
351e92ffd9bSMartin Matuska #define	HTYPE U32
352e92ffd9bSMartin Matuska #define	INITBASE(base)		const BYTE* const base = ip
353e92ffd9bSMartin Matuska #else /* !LZ4_ARCH64 */
354e92ffd9bSMartin Matuska #define	STEPSIZE 4
355e92ffd9bSMartin Matuska #define	UARCH U32
356e92ffd9bSMartin Matuska #define	AARCH A32
357e92ffd9bSMartin Matuska #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
358e92ffd9bSMartin Matuska #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
359e92ffd9bSMartin Matuska #define	LZ4_SECURECOPY		LZ4_WILDCOPY
360e92ffd9bSMartin Matuska #define	HTYPE const BYTE *
361e92ffd9bSMartin Matuska #define	INITBASE(base)		const int base = 0
362e92ffd9bSMartin Matuska #endif /* !LZ4_ARCH64 */
363e92ffd9bSMartin Matuska 
364e92ffd9bSMartin Matuska #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
365e92ffd9bSMartin Matuska #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
366e92ffd9bSMartin Matuska 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
367e92ffd9bSMartin Matuska #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
368e92ffd9bSMartin Matuska 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
369e92ffd9bSMartin Matuska #else
370e92ffd9bSMartin Matuska #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
371e92ffd9bSMartin Matuska #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
372e92ffd9bSMartin Matuska #endif
373e92ffd9bSMartin Matuska 
374e92ffd9bSMartin Matuska 
375e92ffd9bSMartin Matuska /* Local structures */
376e92ffd9bSMartin Matuska struct refTables {
377e92ffd9bSMartin Matuska 	HTYPE hashTable[HASHTABLESIZE];
378e92ffd9bSMartin Matuska };
379e92ffd9bSMartin Matuska 
380e92ffd9bSMartin Matuska 
381e92ffd9bSMartin Matuska /* Macros */
382e92ffd9bSMartin Matuska #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
383e92ffd9bSMartin Matuska 	HASH_LOG))
384e92ffd9bSMartin Matuska #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
385e92ffd9bSMartin Matuska #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
386e92ffd9bSMartin Matuska #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
387e92ffd9bSMartin Matuska 	d = e; }
388e92ffd9bSMartin Matuska 
389e92ffd9bSMartin Matuska 
390e92ffd9bSMartin Matuska /* Private functions */
391e92ffd9bSMartin Matuska #if LZ4_ARCH64
392e92ffd9bSMartin Matuska 
393e92ffd9bSMartin Matuska static inline int
394e92ffd9bSMartin Matuska LZ4_NbCommonBytes(register U64 val)
395e92ffd9bSMartin Matuska {
396e92ffd9bSMartin Matuska #if defined(LZ4_BIG_ENDIAN)
397e92ffd9bSMartin Matuska #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
398e92ffd9bSMartin Matuska 	!defined(LZ4_FORCE_SW_BITCOUNT)
399e92ffd9bSMartin Matuska 	return (__builtin_clzll(val) >> 3);
400e92ffd9bSMartin Matuska #else
401e92ffd9bSMartin Matuska 	int r;
402e92ffd9bSMartin Matuska 	if (!(val >> 32)) {
403e92ffd9bSMartin Matuska 		r = 4;
404e92ffd9bSMartin Matuska 	} else {
405e92ffd9bSMartin Matuska 		r = 0;
406e92ffd9bSMartin Matuska 		val >>= 32;
407e92ffd9bSMartin Matuska 	}
408e92ffd9bSMartin Matuska 	if (!(val >> 16)) {
409e92ffd9bSMartin Matuska 		r += 2;
410e92ffd9bSMartin Matuska 		val >>= 8;
411e92ffd9bSMartin Matuska 	} else {
412e92ffd9bSMartin Matuska 		val >>= 24;
413e92ffd9bSMartin Matuska 	}
414e92ffd9bSMartin Matuska 	r += (!val);
415e92ffd9bSMartin Matuska 	return (r);
416e92ffd9bSMartin Matuska #endif
417e92ffd9bSMartin Matuska #else
418e92ffd9bSMartin Matuska #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
419e92ffd9bSMartin Matuska 	!defined(LZ4_FORCE_SW_BITCOUNT)
420e92ffd9bSMartin Matuska 	return (__builtin_ctzll(val) >> 3);
421e92ffd9bSMartin Matuska #else
422e92ffd9bSMartin Matuska 	static const int DeBruijnBytePos[64] =
423e92ffd9bSMartin Matuska 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
424e92ffd9bSMartin Matuska 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
425e92ffd9bSMartin Matuska 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
426e92ffd9bSMartin Matuska 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
427e92ffd9bSMartin Matuska 	};
428e92ffd9bSMartin Matuska 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
429e92ffd9bSMartin Matuska 	    58];
430e92ffd9bSMartin Matuska #endif
431e92ffd9bSMartin Matuska #endif
432e92ffd9bSMartin Matuska }
433e92ffd9bSMartin Matuska 
434e92ffd9bSMartin Matuska #else
435e92ffd9bSMartin Matuska 
436e92ffd9bSMartin Matuska static inline int
437e92ffd9bSMartin Matuska LZ4_NbCommonBytes(register U32 val)
438e92ffd9bSMartin Matuska {
439e92ffd9bSMartin Matuska #if defined(LZ4_BIG_ENDIAN)
440e92ffd9bSMartin Matuska #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
441e92ffd9bSMartin Matuska 	!defined(LZ4_FORCE_SW_BITCOUNT)
442e92ffd9bSMartin Matuska 	return (__builtin_clz(val) >> 3);
443e92ffd9bSMartin Matuska #else
444e92ffd9bSMartin Matuska 	int r;
445e92ffd9bSMartin Matuska 	if (!(val >> 16)) {
446e92ffd9bSMartin Matuska 		r = 2;
447e92ffd9bSMartin Matuska 		val >>= 8;
448e92ffd9bSMartin Matuska 	} else {
449e92ffd9bSMartin Matuska 		r = 0;
450e92ffd9bSMartin Matuska 		val >>= 24;
451e92ffd9bSMartin Matuska 	}
452e92ffd9bSMartin Matuska 	r += (!val);
453e92ffd9bSMartin Matuska 	return (r);
454e92ffd9bSMartin Matuska #endif
455e92ffd9bSMartin Matuska #else
456e92ffd9bSMartin Matuska #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
457e92ffd9bSMartin Matuska 	!defined(LZ4_FORCE_SW_BITCOUNT)
458e92ffd9bSMartin Matuska 	return (__builtin_ctz(val) >> 3);
459e92ffd9bSMartin Matuska #else
460e92ffd9bSMartin Matuska 	static const int DeBruijnBytePos[32] = {
461e92ffd9bSMartin Matuska 		0, 0, 3, 0, 3, 1, 3, 0,
462e92ffd9bSMartin Matuska 		3, 2, 2, 1, 3, 2, 0, 1,
463e92ffd9bSMartin Matuska 		3, 3, 1, 2, 2, 2, 2, 0,
464e92ffd9bSMartin Matuska 		3, 1, 2, 0, 1, 0, 1, 1
465e92ffd9bSMartin Matuska 	};
466e92ffd9bSMartin Matuska 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
467e92ffd9bSMartin Matuska 	    27];
468e92ffd9bSMartin Matuska #endif
469e92ffd9bSMartin Matuska #endif
470e92ffd9bSMartin Matuska }
471e92ffd9bSMartin Matuska 
472e92ffd9bSMartin Matuska #endif
473e92ffd9bSMartin Matuska 
474e92ffd9bSMartin Matuska /* Compression functions */
475e92ffd9bSMartin Matuska 
476e92ffd9bSMartin Matuska static int
477e92ffd9bSMartin Matuska LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
478e92ffd9bSMartin Matuska     int osize)
479e92ffd9bSMartin Matuska {
480e92ffd9bSMartin Matuska 	struct refTables *srt = (struct refTables *)ctx;
481e92ffd9bSMartin Matuska 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
482e92ffd9bSMartin Matuska 
483e92ffd9bSMartin Matuska 	const BYTE *ip = (BYTE *) source;
484e92ffd9bSMartin Matuska 	INITBASE(base);
485e92ffd9bSMartin Matuska 	const BYTE *anchor = ip;
486e92ffd9bSMartin Matuska 	const BYTE *const iend = ip + isize;
487e92ffd9bSMartin Matuska 	const BYTE *const oend = (BYTE *) dest + osize;
488e92ffd9bSMartin Matuska 	const BYTE *const mflimit = iend - MFLIMIT;
489e92ffd9bSMartin Matuska #define	matchlimit (iend - LASTLITERALS)
490e92ffd9bSMartin Matuska 
491e92ffd9bSMartin Matuska 	BYTE *op = (BYTE *) dest;
492e92ffd9bSMartin Matuska 
493e92ffd9bSMartin Matuska 	int len, length;
494e92ffd9bSMartin Matuska 	const int skipStrength = SKIPSTRENGTH;
495e92ffd9bSMartin Matuska 	U32 forwardH;
496e92ffd9bSMartin Matuska 
497e92ffd9bSMartin Matuska 
498e92ffd9bSMartin Matuska 	/* Init */
499e92ffd9bSMartin Matuska 	if (isize < MINLENGTH)
500e92ffd9bSMartin Matuska 		goto _last_literals;
501e92ffd9bSMartin Matuska 
502e92ffd9bSMartin Matuska 	/* First Byte */
503e92ffd9bSMartin Matuska 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
504e92ffd9bSMartin Matuska 	ip++;
505e92ffd9bSMartin Matuska 	forwardH = LZ4_HASH_VALUE(ip);
506e92ffd9bSMartin Matuska 
507e92ffd9bSMartin Matuska 	/* Main Loop */
508e92ffd9bSMartin Matuska 	for (;;) {
509e92ffd9bSMartin Matuska 		int findMatchAttempts = (1U << skipStrength) + 3;
510e92ffd9bSMartin Matuska 		const BYTE *forwardIp = ip;
511e92ffd9bSMartin Matuska 		const BYTE *ref;
512e92ffd9bSMartin Matuska 		BYTE *token;
513e92ffd9bSMartin Matuska 
514e92ffd9bSMartin Matuska 		/* Find a match */
515e92ffd9bSMartin Matuska 		do {
516e92ffd9bSMartin Matuska 			U32 h = forwardH;
517e92ffd9bSMartin Matuska 			int step = findMatchAttempts++ >> skipStrength;
518e92ffd9bSMartin Matuska 			ip = forwardIp;
519e92ffd9bSMartin Matuska 			forwardIp = ip + step;
520e92ffd9bSMartin Matuska 
521e92ffd9bSMartin Matuska 			if (unlikely(forwardIp > mflimit)) {
522e92ffd9bSMartin Matuska 				goto _last_literals;
523e92ffd9bSMartin Matuska 			}
524e92ffd9bSMartin Matuska 
525e92ffd9bSMartin Matuska 			forwardH = LZ4_HASH_VALUE(forwardIp);
526e92ffd9bSMartin Matuska 			ref = base + HashTable[h];
527e92ffd9bSMartin Matuska 			HashTable[h] = ip - base;
528e92ffd9bSMartin Matuska 
529e92ffd9bSMartin Matuska 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
530e92ffd9bSMartin Matuska 
531e92ffd9bSMartin Matuska 		/* Catch up */
532e92ffd9bSMartin Matuska 		while ((ip > anchor) && (ref > (BYTE *) source) &&
533e92ffd9bSMartin Matuska 		    unlikely(ip[-1] == ref[-1])) {
534e92ffd9bSMartin Matuska 			ip--;
535e92ffd9bSMartin Matuska 			ref--;
536e92ffd9bSMartin Matuska 		}
537e92ffd9bSMartin Matuska 
538e92ffd9bSMartin Matuska 		/* Encode Literal length */
539e92ffd9bSMartin Matuska 		length = ip - anchor;
540e92ffd9bSMartin Matuska 		token = op++;
541e92ffd9bSMartin Matuska 
542e92ffd9bSMartin Matuska 		/* Check output limit */
543e92ffd9bSMartin Matuska 		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
544e92ffd9bSMartin Matuska 		    (length >> 8) > oend))
545e92ffd9bSMartin Matuska 			return (0);
546e92ffd9bSMartin Matuska 
547e92ffd9bSMartin Matuska 		if (length >= (int)RUN_MASK) {
548e92ffd9bSMartin Matuska 			*token = (RUN_MASK << ML_BITS);
549e92ffd9bSMartin Matuska 			len = length - RUN_MASK;
550e92ffd9bSMartin Matuska 			for (; len > 254; len -= 255)
551e92ffd9bSMartin Matuska 				*op++ = 255;
552e92ffd9bSMartin Matuska 			*op++ = (BYTE)len;
553e92ffd9bSMartin Matuska 		} else
554e92ffd9bSMartin Matuska 			*token = (length << ML_BITS);
555e92ffd9bSMartin Matuska 
556e92ffd9bSMartin Matuska 		/* Copy Literals */
557e92ffd9bSMartin Matuska 		LZ4_BLINDCOPY(anchor, op, length);
558e92ffd9bSMartin Matuska 
559e92ffd9bSMartin Matuska 		_next_match:
560e92ffd9bSMartin Matuska 		/* Encode Offset */
561e92ffd9bSMartin Matuska 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
562e92ffd9bSMartin Matuska 
563e92ffd9bSMartin Matuska 		/* Start Counting */
564e92ffd9bSMartin Matuska 		ip += MINMATCH;
565e92ffd9bSMartin Matuska 		ref += MINMATCH;	/* MinMatch verified */
566e92ffd9bSMartin Matuska 		anchor = ip;
567e92ffd9bSMartin Matuska 		while (likely(ip < matchlimit - (STEPSIZE - 1))) {
568e92ffd9bSMartin Matuska 			UARCH diff = AARCH(ref) ^ AARCH(ip);
569e92ffd9bSMartin Matuska 			if (!diff) {
570e92ffd9bSMartin Matuska 				ip += STEPSIZE;
571e92ffd9bSMartin Matuska 				ref += STEPSIZE;
572e92ffd9bSMartin Matuska 				continue;
573e92ffd9bSMartin Matuska 			}
574e92ffd9bSMartin Matuska 			ip += LZ4_NbCommonBytes(diff);
575e92ffd9bSMartin Matuska 			goto _endCount;
576e92ffd9bSMartin Matuska 		}
577e92ffd9bSMartin Matuska #if LZ4_ARCH64
578e92ffd9bSMartin Matuska 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
579e92ffd9bSMartin Matuska 			ip += 4;
580e92ffd9bSMartin Matuska 			ref += 4;
581e92ffd9bSMartin Matuska 		}
582e92ffd9bSMartin Matuska #endif
583e92ffd9bSMartin Matuska 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
584e92ffd9bSMartin Matuska 			ip += 2;
585e92ffd9bSMartin Matuska 			ref += 2;
586e92ffd9bSMartin Matuska 		}
587e92ffd9bSMartin Matuska 		if ((ip < matchlimit) && (*ref == *ip))
588e92ffd9bSMartin Matuska 			ip++;
589e92ffd9bSMartin Matuska 		_endCount:
590e92ffd9bSMartin Matuska 
591e92ffd9bSMartin Matuska 		/* Encode MatchLength */
592e92ffd9bSMartin Matuska 		len = (ip - anchor);
593e92ffd9bSMartin Matuska 		/* Check output limit */
594e92ffd9bSMartin Matuska 		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
595e92ffd9bSMartin Matuska 			return (0);
596e92ffd9bSMartin Matuska 		if (len >= (int)ML_MASK) {
597e92ffd9bSMartin Matuska 			*token += ML_MASK;
598e92ffd9bSMartin Matuska 			len -= ML_MASK;
599e92ffd9bSMartin Matuska 			for (; len > 509; len -= 510) {
600e92ffd9bSMartin Matuska 				*op++ = 255;
601e92ffd9bSMartin Matuska 				*op++ = 255;
602e92ffd9bSMartin Matuska 			}
603e92ffd9bSMartin Matuska 			if (len > 254) {
604e92ffd9bSMartin Matuska 				len -= 255;
605e92ffd9bSMartin Matuska 				*op++ = 255;
606e92ffd9bSMartin Matuska 			}
607e92ffd9bSMartin Matuska 			*op++ = (BYTE)len;
608e92ffd9bSMartin Matuska 		} else
609e92ffd9bSMartin Matuska 			*token += len;
610e92ffd9bSMartin Matuska 
611e92ffd9bSMartin Matuska 		/* Test end of chunk */
612e92ffd9bSMartin Matuska 		if (ip > mflimit) {
613e92ffd9bSMartin Matuska 			anchor = ip;
614e92ffd9bSMartin Matuska 			break;
615e92ffd9bSMartin Matuska 		}
616e92ffd9bSMartin Matuska 		/* Fill table */
617e92ffd9bSMartin Matuska 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
618e92ffd9bSMartin Matuska 
619e92ffd9bSMartin Matuska 		/* Test next position */
620e92ffd9bSMartin Matuska 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
621e92ffd9bSMartin Matuska 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
622e92ffd9bSMartin Matuska 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
623e92ffd9bSMartin Matuska 			token = op++;
624e92ffd9bSMartin Matuska 			*token = 0;
625e92ffd9bSMartin Matuska 			goto _next_match;
626e92ffd9bSMartin Matuska 		}
627e92ffd9bSMartin Matuska 		/* Prepare next loop */
628e92ffd9bSMartin Matuska 		anchor = ip++;
629e92ffd9bSMartin Matuska 		forwardH = LZ4_HASH_VALUE(ip);
630e92ffd9bSMartin Matuska 	}
631e92ffd9bSMartin Matuska 
632e92ffd9bSMartin Matuska 	_last_literals:
633e92ffd9bSMartin Matuska 	/* Encode Last Literals */
634e92ffd9bSMartin Matuska 	{
635e92ffd9bSMartin Matuska 		int lastRun = iend - anchor;
636e92ffd9bSMartin Matuska 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
637e92ffd9bSMartin Matuska 		    oend)
638e92ffd9bSMartin Matuska 			return (0);
639e92ffd9bSMartin Matuska 		if (lastRun >= (int)RUN_MASK) {
640e92ffd9bSMartin Matuska 			*op++ = (RUN_MASK << ML_BITS);
641e92ffd9bSMartin Matuska 			lastRun -= RUN_MASK;
642e92ffd9bSMartin Matuska 			for (; lastRun > 254; lastRun -= 255) {
643e92ffd9bSMartin Matuska 				*op++ = 255;
644e92ffd9bSMartin Matuska 			}
645e92ffd9bSMartin Matuska 			*op++ = (BYTE)lastRun;
646e92ffd9bSMartin Matuska 		} else
647e92ffd9bSMartin Matuska 			*op++ = (lastRun << ML_BITS);
648e92ffd9bSMartin Matuska 		(void) memcpy(op, anchor, iend - anchor);
649e92ffd9bSMartin Matuska 		op += iend - anchor;
650e92ffd9bSMartin Matuska 	}
651e92ffd9bSMartin Matuska 
652e92ffd9bSMartin Matuska 	/* End */
653e92ffd9bSMartin Matuska 	return (int)(((char *)op) - dest);
654e92ffd9bSMartin Matuska }
655e92ffd9bSMartin Matuska 
656e92ffd9bSMartin Matuska 
657e92ffd9bSMartin Matuska 
658e92ffd9bSMartin Matuska /* Note : this function is valid only if isize < LZ4_64KLIMIT */
659e92ffd9bSMartin Matuska #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
660e92ffd9bSMartin Matuska #define	HASHLOG64K (HASH_LOG + 1)
661e92ffd9bSMartin Matuska #define	HASH64KTABLESIZE (1U << HASHLOG64K)
662e92ffd9bSMartin Matuska #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
663e92ffd9bSMartin Matuska 	HASHLOG64K))
664e92ffd9bSMartin Matuska #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
665e92ffd9bSMartin Matuska 
666e92ffd9bSMartin Matuska static int
667e92ffd9bSMartin Matuska LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
668e92ffd9bSMartin Matuska     int osize)
669e92ffd9bSMartin Matuska {
670e92ffd9bSMartin Matuska 	struct refTables *srt = (struct refTables *)ctx;
671e92ffd9bSMartin Matuska 	U16 *HashTable = (U16 *) (srt->hashTable);
672e92ffd9bSMartin Matuska 
673e92ffd9bSMartin Matuska 	const BYTE *ip = (BYTE *) source;
674e92ffd9bSMartin Matuska 	const BYTE *anchor = ip;
675e92ffd9bSMartin Matuska 	const BYTE *const base = ip;
676e92ffd9bSMartin Matuska 	const BYTE *const iend = ip + isize;
677e92ffd9bSMartin Matuska 	const BYTE *const oend = (BYTE *) dest + osize;
678e92ffd9bSMartin Matuska 	const BYTE *const mflimit = iend - MFLIMIT;
679e92ffd9bSMartin Matuska #define	matchlimit (iend - LASTLITERALS)
680e92ffd9bSMartin Matuska 
681e92ffd9bSMartin Matuska 	BYTE *op = (BYTE *) dest;
682e92ffd9bSMartin Matuska 
683e92ffd9bSMartin Matuska 	int len, length;
684e92ffd9bSMartin Matuska 	const int skipStrength = SKIPSTRENGTH;
685e92ffd9bSMartin Matuska 	U32 forwardH;
686e92ffd9bSMartin Matuska 
687e92ffd9bSMartin Matuska 	/* Init */
688e92ffd9bSMartin Matuska 	if (isize < MINLENGTH)
689e92ffd9bSMartin Matuska 		goto _last_literals;
690e92ffd9bSMartin Matuska 
691e92ffd9bSMartin Matuska 	/* First Byte */
692e92ffd9bSMartin Matuska 	ip++;
693e92ffd9bSMartin Matuska 	forwardH = LZ4_HASH64K_VALUE(ip);
694e92ffd9bSMartin Matuska 
695e92ffd9bSMartin Matuska 	/* Main Loop */
696e92ffd9bSMartin Matuska 	for (;;) {
697e92ffd9bSMartin Matuska 		int findMatchAttempts = (1U << skipStrength) + 3;
698e92ffd9bSMartin Matuska 		const BYTE *forwardIp = ip;
699e92ffd9bSMartin Matuska 		const BYTE *ref;
700e92ffd9bSMartin Matuska 		BYTE *token;
701e92ffd9bSMartin Matuska 
702e92ffd9bSMartin Matuska 		/* Find a match */
703e92ffd9bSMartin Matuska 		do {
704e92ffd9bSMartin Matuska 			U32 h = forwardH;
705e92ffd9bSMartin Matuska 			int step = findMatchAttempts++ >> skipStrength;
706e92ffd9bSMartin Matuska 			ip = forwardIp;
707e92ffd9bSMartin Matuska 			forwardIp = ip + step;
708e92ffd9bSMartin Matuska 
709e92ffd9bSMartin Matuska 			if (forwardIp > mflimit) {
710e92ffd9bSMartin Matuska 				goto _last_literals;
711e92ffd9bSMartin Matuska 			}
712e92ffd9bSMartin Matuska 
713e92ffd9bSMartin Matuska 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
714e92ffd9bSMartin Matuska 			ref = base + HashTable[h];
715e92ffd9bSMartin Matuska 			HashTable[h] = ip - base;
716e92ffd9bSMartin Matuska 
717e92ffd9bSMartin Matuska 		} while (A32(ref) != A32(ip));
718e92ffd9bSMartin Matuska 
719e92ffd9bSMartin Matuska 		/* Catch up */
720e92ffd9bSMartin Matuska 		while ((ip > anchor) && (ref > (BYTE *) source) &&
721e92ffd9bSMartin Matuska 		    (ip[-1] == ref[-1])) {
722e92ffd9bSMartin Matuska 			ip--;
723e92ffd9bSMartin Matuska 			ref--;
724e92ffd9bSMartin Matuska 		}
725e92ffd9bSMartin Matuska 
726e92ffd9bSMartin Matuska 		/* Encode Literal length */
727e92ffd9bSMartin Matuska 		length = ip - anchor;
728e92ffd9bSMartin Matuska 		token = op++;
729e92ffd9bSMartin Matuska 
730e92ffd9bSMartin Matuska 		/* Check output limit */
731e92ffd9bSMartin Matuska 		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
732e92ffd9bSMartin Matuska 		    (length >> 8) > oend))
733e92ffd9bSMartin Matuska 			return (0);
734e92ffd9bSMartin Matuska 
735e92ffd9bSMartin Matuska 		if (length >= (int)RUN_MASK) {
736e92ffd9bSMartin Matuska 			*token = (RUN_MASK << ML_BITS);
737e92ffd9bSMartin Matuska 			len = length - RUN_MASK;
738e92ffd9bSMartin Matuska 			for (; len > 254; len -= 255)
739e92ffd9bSMartin Matuska 				*op++ = 255;
740e92ffd9bSMartin Matuska 			*op++ = (BYTE)len;
741e92ffd9bSMartin Matuska 		} else
742e92ffd9bSMartin Matuska 			*token = (length << ML_BITS);
743e92ffd9bSMartin Matuska 
744e92ffd9bSMartin Matuska 		/* Copy Literals */
745e92ffd9bSMartin Matuska 		LZ4_BLINDCOPY(anchor, op, length);
746e92ffd9bSMartin Matuska 
747e92ffd9bSMartin Matuska 		_next_match:
748e92ffd9bSMartin Matuska 		/* Encode Offset */
749e92ffd9bSMartin Matuska 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
750e92ffd9bSMartin Matuska 
751e92ffd9bSMartin Matuska 		/* Start Counting */
752e92ffd9bSMartin Matuska 		ip += MINMATCH;
753e92ffd9bSMartin Matuska 		ref += MINMATCH;	/* MinMatch verified */
754e92ffd9bSMartin Matuska 		anchor = ip;
755e92ffd9bSMartin Matuska 		while (ip < matchlimit - (STEPSIZE - 1)) {
756e92ffd9bSMartin Matuska 			UARCH diff = AARCH(ref) ^ AARCH(ip);
757e92ffd9bSMartin Matuska 			if (!diff) {
758e92ffd9bSMartin Matuska 				ip += STEPSIZE;
759e92ffd9bSMartin Matuska 				ref += STEPSIZE;
760e92ffd9bSMartin Matuska 				continue;
761e92ffd9bSMartin Matuska 			}
762e92ffd9bSMartin Matuska 			ip += LZ4_NbCommonBytes(diff);
763e92ffd9bSMartin Matuska 			goto _endCount;
764e92ffd9bSMartin Matuska 		}
765e92ffd9bSMartin Matuska #if LZ4_ARCH64
766e92ffd9bSMartin Matuska 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
767e92ffd9bSMartin Matuska 			ip += 4;
768e92ffd9bSMartin Matuska 			ref += 4;
769e92ffd9bSMartin Matuska 		}
770e92ffd9bSMartin Matuska #endif
771e92ffd9bSMartin Matuska 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
772e92ffd9bSMartin Matuska 			ip += 2;
773e92ffd9bSMartin Matuska 			ref += 2;
774e92ffd9bSMartin Matuska 		}
775e92ffd9bSMartin Matuska 		if ((ip < matchlimit) && (*ref == *ip))
776e92ffd9bSMartin Matuska 			ip++;
777e92ffd9bSMartin Matuska 		_endCount:
778e92ffd9bSMartin Matuska 
779e92ffd9bSMartin Matuska 		/* Encode MatchLength */
780e92ffd9bSMartin Matuska 		len = (ip - anchor);
781e92ffd9bSMartin Matuska 		/* Check output limit */
782e92ffd9bSMartin Matuska 		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
783e92ffd9bSMartin Matuska 			return (0);
784e92ffd9bSMartin Matuska 		if (len >= (int)ML_MASK) {
785e92ffd9bSMartin Matuska 			*token += ML_MASK;
786e92ffd9bSMartin Matuska 			len -= ML_MASK;
787e92ffd9bSMartin Matuska 			for (; len > 509; len -= 510) {
788e92ffd9bSMartin Matuska 				*op++ = 255;
789e92ffd9bSMartin Matuska 				*op++ = 255;
790e92ffd9bSMartin Matuska 			}
791e92ffd9bSMartin Matuska 			if (len > 254) {
792e92ffd9bSMartin Matuska 				len -= 255;
793e92ffd9bSMartin Matuska 				*op++ = 255;
794e92ffd9bSMartin Matuska 			}
795e92ffd9bSMartin Matuska 			*op++ = (BYTE)len;
796e92ffd9bSMartin Matuska 		} else
797e92ffd9bSMartin Matuska 			*token += len;
798e92ffd9bSMartin Matuska 
799e92ffd9bSMartin Matuska 		/* Test end of chunk */
800e92ffd9bSMartin Matuska 		if (ip > mflimit) {
801e92ffd9bSMartin Matuska 			anchor = ip;
802e92ffd9bSMartin Matuska 			break;
803e92ffd9bSMartin Matuska 		}
804e92ffd9bSMartin Matuska 		/* Fill table */
805e92ffd9bSMartin Matuska 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
806e92ffd9bSMartin Matuska 
807e92ffd9bSMartin Matuska 		/* Test next position */
808e92ffd9bSMartin Matuska 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
809e92ffd9bSMartin Matuska 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
810e92ffd9bSMartin Matuska 		if (A32(ref) == A32(ip)) {
811e92ffd9bSMartin Matuska 			token = op++;
812e92ffd9bSMartin Matuska 			*token = 0;
813e92ffd9bSMartin Matuska 			goto _next_match;
814e92ffd9bSMartin Matuska 		}
815e92ffd9bSMartin Matuska 		/* Prepare next loop */
816e92ffd9bSMartin Matuska 		anchor = ip++;
817e92ffd9bSMartin Matuska 		forwardH = LZ4_HASH64K_VALUE(ip);
818e92ffd9bSMartin Matuska 	}
819e92ffd9bSMartin Matuska 
820e92ffd9bSMartin Matuska 	_last_literals:
821e92ffd9bSMartin Matuska 	/* Encode Last Literals */
822e92ffd9bSMartin Matuska 	{
823e92ffd9bSMartin Matuska 		int lastRun = iend - anchor;
824e92ffd9bSMartin Matuska 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
825e92ffd9bSMartin Matuska 		    oend)
826e92ffd9bSMartin Matuska 			return (0);
827e92ffd9bSMartin Matuska 		if (lastRun >= (int)RUN_MASK) {
828e92ffd9bSMartin Matuska 			*op++ = (RUN_MASK << ML_BITS);
829e92ffd9bSMartin Matuska 			lastRun -= RUN_MASK;
830e92ffd9bSMartin Matuska 			for (; lastRun > 254; lastRun -= 255)
831e92ffd9bSMartin Matuska 				*op++ = 255;
832e92ffd9bSMartin Matuska 			*op++ = (BYTE)lastRun;
833e92ffd9bSMartin Matuska 		} else
834e92ffd9bSMartin Matuska 			*op++ = (lastRun << ML_BITS);
835e92ffd9bSMartin Matuska 		(void) memcpy(op, anchor, iend - anchor);
836e92ffd9bSMartin Matuska 		op += iend - anchor;
837e92ffd9bSMartin Matuska 	}
838e92ffd9bSMartin Matuska 
839e92ffd9bSMartin Matuska 	/* End */
840e92ffd9bSMartin Matuska 	return (int)(((char *)op) - dest);
841e92ffd9bSMartin Matuska }
842e92ffd9bSMartin Matuska 
843e92ffd9bSMartin Matuska static int
844e92ffd9bSMartin Matuska real_LZ4_compress(const char *source, char *dest, int isize, int osize)
845e92ffd9bSMartin Matuska {
846e92ffd9bSMartin Matuska 	void *ctx;
847e92ffd9bSMartin Matuska 	int result;
848e92ffd9bSMartin Matuska 
849e92ffd9bSMartin Matuska 	ctx = lz4_alloc(KM_SLEEP);
850e92ffd9bSMartin Matuska 
851e92ffd9bSMartin Matuska 	/*
852e92ffd9bSMartin Matuska 	 * out of kernel memory, gently fall through - this will disable
853e92ffd9bSMartin Matuska 	 * compression in zio_compress_data
854e92ffd9bSMartin Matuska 	 */
855e92ffd9bSMartin Matuska 	if (ctx == NULL)
856e92ffd9bSMartin Matuska 		return (0);
857e92ffd9bSMartin Matuska 
858e92ffd9bSMartin Matuska 	memset(ctx, 0, sizeof (struct refTables));
859e92ffd9bSMartin Matuska 
860e92ffd9bSMartin Matuska 	if (isize < LZ4_64KLIMIT)
861e92ffd9bSMartin Matuska 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
862e92ffd9bSMartin Matuska 	else
863e92ffd9bSMartin Matuska 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
864e92ffd9bSMartin Matuska 
865e92ffd9bSMartin Matuska 	lz4_free(ctx);
866e92ffd9bSMartin Matuska 	return (result);
867e92ffd9bSMartin Matuska }
868e92ffd9bSMartin Matuska 
869e92ffd9bSMartin Matuska #ifdef __FreeBSD__
870e92ffd9bSMartin Matuska /*
871e92ffd9bSMartin Matuska  * FreeBSD has 4, 8 and 16 KB malloc zones which can be used here.
872e92ffd9bSMartin Matuska  * Should struct refTables get resized this may need to be revisited, hence
873e92ffd9bSMartin Matuska  * compiler-time asserts.
874e92ffd9bSMartin Matuska  */
875e92ffd9bSMartin Matuska _Static_assert(sizeof(struct refTables) <= 16384,
876e92ffd9bSMartin Matuska     "refTables too big for malloc");
877e92ffd9bSMartin Matuska _Static_assert((sizeof(struct refTables) % 4096) == 0,
878e92ffd9bSMartin Matuska     "refTables not a multiple of page size");
879e92ffd9bSMartin Matuska #else
880e92ffd9bSMartin Matuska #define ZFS_LZ4_USE_CACHE
881e92ffd9bSMartin Matuska #endif
882e92ffd9bSMartin Matuska 
883e92ffd9bSMartin Matuska #ifdef ZFS_LZ4_USE_CACHE
884e92ffd9bSMartin Matuska static kmem_cache_t *lz4_cache;
885e92ffd9bSMartin Matuska #endif
886e92ffd9bSMartin Matuska 
887e92ffd9bSMartin Matuska #ifdef ZFS_LZ4_USE_CACHE
888e92ffd9bSMartin Matuska void
889e92ffd9bSMartin Matuska lz4_init(void)
890e92ffd9bSMartin Matuska {
891e92ffd9bSMartin Matuska 	lz4_cache = kmem_cache_create("lz4_cache",
892ce4dcb97SMartin Matuska 	    sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL,
893ce4dcb97SMartin Matuska 	    KMC_RECLAIMABLE);
894e92ffd9bSMartin Matuska }
895e92ffd9bSMartin Matuska 
896e92ffd9bSMartin Matuska void
897e92ffd9bSMartin Matuska lz4_fini(void)
898e92ffd9bSMartin Matuska {
899e92ffd9bSMartin Matuska 	if (lz4_cache) {
900e92ffd9bSMartin Matuska 		kmem_cache_destroy(lz4_cache);
901e92ffd9bSMartin Matuska 		lz4_cache = NULL;
902e92ffd9bSMartin Matuska 	}
903e92ffd9bSMartin Matuska }
904e92ffd9bSMartin Matuska 
905e92ffd9bSMartin Matuska static void *
906e92ffd9bSMartin Matuska lz4_alloc(int flags)
907e92ffd9bSMartin Matuska {
908e92ffd9bSMartin Matuska 	ASSERT(lz4_cache != NULL);
909e92ffd9bSMartin Matuska 	return (kmem_cache_alloc(lz4_cache, flags));
910e92ffd9bSMartin Matuska }
911e92ffd9bSMartin Matuska 
912e92ffd9bSMartin Matuska static void
913e92ffd9bSMartin Matuska lz4_free(void *ctx)
914e92ffd9bSMartin Matuska {
915e92ffd9bSMartin Matuska 	kmem_cache_free(lz4_cache, ctx);
916e92ffd9bSMartin Matuska }
917e92ffd9bSMartin Matuska #else
918e92ffd9bSMartin Matuska void
919e92ffd9bSMartin Matuska lz4_init(void)
920e92ffd9bSMartin Matuska {
921e92ffd9bSMartin Matuska }
922e92ffd9bSMartin Matuska 
923e92ffd9bSMartin Matuska void
924e92ffd9bSMartin Matuska lz4_fini(void)
925e92ffd9bSMartin Matuska {
926e92ffd9bSMartin Matuska }
927e92ffd9bSMartin Matuska 
928e92ffd9bSMartin Matuska static void *
929e92ffd9bSMartin Matuska lz4_alloc(int flags)
930e92ffd9bSMartin Matuska {
931e92ffd9bSMartin Matuska 	return (kmem_alloc(sizeof (struct refTables), flags));
932e92ffd9bSMartin Matuska }
933e92ffd9bSMartin Matuska 
934e92ffd9bSMartin Matuska static void
935e92ffd9bSMartin Matuska lz4_free(void *ctx)
936e92ffd9bSMartin Matuska {
937e92ffd9bSMartin Matuska 	kmem_free(ctx, sizeof (struct refTables));
938e92ffd9bSMartin Matuska }
939e92ffd9bSMartin Matuska #endif
940