1e92ffd9bSMartin Matuska /* 2e92ffd9bSMartin Matuska * LZ4 - Fast LZ compression algorithm 3e92ffd9bSMartin Matuska * Header File 4e92ffd9bSMartin Matuska * Copyright (C) 2011-2013, Yann Collet. 5e92ffd9bSMartin Matuska * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6e92ffd9bSMartin Matuska * 7e92ffd9bSMartin Matuska * Redistribution and use in source and binary forms, with or without 8e92ffd9bSMartin Matuska * modification, are permitted provided that the following conditions are 9e92ffd9bSMartin Matuska * met: 10e92ffd9bSMartin Matuska * 11e92ffd9bSMartin Matuska * * Redistributions of source code must retain the above copyright 12e92ffd9bSMartin Matuska * notice, this list of conditions and the following disclaimer. 13e92ffd9bSMartin Matuska * * Redistributions in binary form must reproduce the above 14e92ffd9bSMartin Matuska * copyright notice, this list of conditions and the following disclaimer 15e92ffd9bSMartin Matuska * in the documentation and/or other materials provided with the 16e92ffd9bSMartin Matuska * distribution. 17e92ffd9bSMartin Matuska * 18e92ffd9bSMartin Matuska * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19e92ffd9bSMartin Matuska * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20e92ffd9bSMartin Matuska * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21e92ffd9bSMartin Matuska * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22e92ffd9bSMartin Matuska * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23e92ffd9bSMartin Matuska * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24e92ffd9bSMartin Matuska * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25e92ffd9bSMartin Matuska * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26e92ffd9bSMartin Matuska * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27e92ffd9bSMartin Matuska * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28e92ffd9bSMartin Matuska * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29e92ffd9bSMartin Matuska * 30e92ffd9bSMartin Matuska * You can contact the author at : 31e92ffd9bSMartin Matuska * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 32e92ffd9bSMartin Matuska * - LZ4 source repository : http://code.google.com/p/lz4/ 33e92ffd9bSMartin Matuska */ 34e92ffd9bSMartin Matuska 35e92ffd9bSMartin Matuska /* 36e92ffd9bSMartin Matuska * N.B. - This file seems to be based on LZ4 r85, dated Dec 10, 2012 37e92ffd9bSMartin Matuska */ 38e92ffd9bSMartin Matuska 39e92ffd9bSMartin Matuska #include <sys/zfs_context.h> 40e92ffd9bSMartin Matuska #include <sys/zio_compress.h> 41e92ffd9bSMartin Matuska 42e92ffd9bSMartin Matuska static int real_LZ4_compress(const char *source, char *dest, int isize, 43e92ffd9bSMartin Matuska int osize); 44e92ffd9bSMartin Matuska static int LZ4_compressCtx(void *ctx, const char *source, char *dest, 45e92ffd9bSMartin Matuska int isize, int osize); 46e92ffd9bSMartin Matuska static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, 47e92ffd9bSMartin Matuska int isize, int osize); 48e92ffd9bSMartin Matuska 49e92ffd9bSMartin Matuska /* See lz4.c */ 50e92ffd9bSMartin Matuska int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, 51e92ffd9bSMartin Matuska int isize, int maxOutputSize); 52e92ffd9bSMartin Matuska 53e92ffd9bSMartin Matuska static void *lz4_alloc(int flags); 54e92ffd9bSMartin Matuska static void lz4_free(void *ctx); 55e92ffd9bSMartin Matuska 56*e2df9bb4SMartin Matuska static size_t 57*e2df9bb4SMartin Matuska zfs_lz4_compress_buf(void *s_start, void *d_start, size_t s_len, 58e92ffd9bSMartin Matuska size_t d_len, int n) 59e92ffd9bSMartin Matuska { 60e92ffd9bSMartin Matuska (void) n; 61e92ffd9bSMartin Matuska uint32_t bufsiz; 62e92ffd9bSMartin Matuska char *dest = d_start; 63e92ffd9bSMartin Matuska 64e92ffd9bSMartin Matuska ASSERT(d_len >= sizeof (bufsiz)); 65e92ffd9bSMartin Matuska 66e92ffd9bSMartin Matuska bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, 67e92ffd9bSMartin Matuska d_len - sizeof (bufsiz)); 68e92ffd9bSMartin Matuska 69e92ffd9bSMartin Matuska /* Signal an error if the compression routine returned zero. */ 70e92ffd9bSMartin Matuska if (bufsiz == 0) 71e92ffd9bSMartin Matuska return (s_len); 72e92ffd9bSMartin Matuska 73e92ffd9bSMartin Matuska /* 74e92ffd9bSMartin Matuska * The exact compressed size is needed by the decompression routine, 75e92ffd9bSMartin Matuska * so it is stored at the start of the buffer. Note that this may be 76e92ffd9bSMartin Matuska * less than the compressed block size, which is rounded up to a 77e92ffd9bSMartin Matuska * multiple of 1<<ashift. 78e92ffd9bSMartin Matuska */ 79e92ffd9bSMartin Matuska *(uint32_t *)dest = BE_32(bufsiz); 80e92ffd9bSMartin Matuska 81e92ffd9bSMartin Matuska return (bufsiz + sizeof (bufsiz)); 82e92ffd9bSMartin Matuska } 83e92ffd9bSMartin Matuska 84*e2df9bb4SMartin Matuska static int 85*e2df9bb4SMartin Matuska zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len, 86e92ffd9bSMartin Matuska size_t d_len, int n) 87e92ffd9bSMartin Matuska { 88e92ffd9bSMartin Matuska (void) n; 89e92ffd9bSMartin Matuska const char *src = s_start; 90e92ffd9bSMartin Matuska uint32_t bufsiz = BE_IN32(src); 91e92ffd9bSMartin Matuska 92e92ffd9bSMartin Matuska /* invalid compressed buffer size encoded at start */ 93e92ffd9bSMartin Matuska if (bufsiz + sizeof (bufsiz) > s_len) 94e92ffd9bSMartin Matuska return (1); 95e92ffd9bSMartin Matuska 96e92ffd9bSMartin Matuska /* 97e92ffd9bSMartin Matuska * Returns 0 on success (decompression function returned non-negative) 98e92ffd9bSMartin Matuska * and non-zero on failure (decompression function returned negative). 99e92ffd9bSMartin Matuska */ 100e92ffd9bSMartin Matuska return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], 101e92ffd9bSMartin Matuska d_start, bufsiz, d_len) < 0); 102e92ffd9bSMartin Matuska } 103e92ffd9bSMartin Matuska 104*e2df9bb4SMartin Matuska ZFS_COMPRESS_WRAP_DECL(zfs_lz4_compress) 105*e2df9bb4SMartin Matuska ZFS_DECOMPRESS_WRAP_DECL(zfs_lz4_decompress) 106*e2df9bb4SMartin Matuska 107e92ffd9bSMartin Matuska /* 108e92ffd9bSMartin Matuska * LZ4 API Description: 109e92ffd9bSMartin Matuska * 110e92ffd9bSMartin Matuska * Simple Functions: 111e92ffd9bSMartin Matuska * real_LZ4_compress() : 112e92ffd9bSMartin Matuska * isize : is the input size. Max supported value is ~1.9GB 113e92ffd9bSMartin Matuska * return : the number of bytes written in buffer dest 114e92ffd9bSMartin Matuska * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). 115e92ffd9bSMartin Matuska * note : destination buffer must be already allocated. 116e92ffd9bSMartin Matuska * destination buffer must be sized to handle worst cases 117e92ffd9bSMartin Matuska * situations (input data not compressible) worst case size 118e92ffd9bSMartin Matuska * evaluation is provided by function LZ4_compressBound(). 119e92ffd9bSMartin Matuska * 120e92ffd9bSMartin Matuska * real_LZ4_uncompress() : 121e92ffd9bSMartin Matuska * osize : is the output size, therefore the original size 122e92ffd9bSMartin Matuska * return : the number of bytes read in the source buffer. 123e92ffd9bSMartin Matuska * If the source stream is malformed, the function will stop 124e92ffd9bSMartin Matuska * decoding and return a negative result, indicating the byte 125e92ffd9bSMartin Matuska * position of the faulty instruction. This function never 126e92ffd9bSMartin Matuska * writes beyond dest + osize, and is therefore protected 127e92ffd9bSMartin Matuska * against malicious data packets. 128e92ffd9bSMartin Matuska * note : destination buffer must be already allocated 129e92ffd9bSMartin Matuska * note : real_LZ4_uncompress() is not used in ZFS so its code 130e92ffd9bSMartin Matuska * is not present here. 131e92ffd9bSMartin Matuska * 132e92ffd9bSMartin Matuska * Advanced Functions 133e92ffd9bSMartin Matuska * 134e92ffd9bSMartin Matuska * LZ4_compressBound() : 135e92ffd9bSMartin Matuska * Provides the maximum size that LZ4 may output in a "worst case" 136e92ffd9bSMartin Matuska * scenario (input data not compressible) primarily useful for memory 137e92ffd9bSMartin Matuska * allocation of output buffer. 138e92ffd9bSMartin Matuska * 139e92ffd9bSMartin Matuska * isize : is the input size. Max supported value is ~1.9GB 140e92ffd9bSMartin Matuska * return : maximum output size in a "worst case" scenario 141e92ffd9bSMartin Matuska * note : this function is limited by "int" range (2^31-1) 142e92ffd9bSMartin Matuska * 143e92ffd9bSMartin Matuska * LZ4_uncompress_unknownOutputSize() : 144e92ffd9bSMartin Matuska * isize : is the input size, therefore the compressed size 145e92ffd9bSMartin Matuska * maxOutputSize : is the size of the destination buffer (which must be 146e92ffd9bSMartin Matuska * already allocated) 147e92ffd9bSMartin Matuska * return : the number of bytes decoded in the destination buffer 148e92ffd9bSMartin Matuska * (necessarily <= maxOutputSize). If the source stream is 149e92ffd9bSMartin Matuska * malformed, the function will stop decoding and return a 150e92ffd9bSMartin Matuska * negative result, indicating the byte position of the faulty 151e92ffd9bSMartin Matuska * instruction. This function never writes beyond dest + 152e92ffd9bSMartin Matuska * maxOutputSize, and is therefore protected against malicious 153e92ffd9bSMartin Matuska * data packets. 154e92ffd9bSMartin Matuska * note : Destination buffer must be already allocated. 155e92ffd9bSMartin Matuska * This version is slightly slower than real_LZ4_uncompress() 156e92ffd9bSMartin Matuska * 157e92ffd9bSMartin Matuska * LZ4_compressCtx() : 158e92ffd9bSMartin Matuska * This function explicitly handles the CTX memory structure. 159e92ffd9bSMartin Matuska * 160e92ffd9bSMartin Matuska * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 161e92ffd9bSMartin Matuska * by the caller (either on the stack or using kmem_cache_alloc). Passing 162e92ffd9bSMartin Matuska * NULL isn't valid. 163e92ffd9bSMartin Matuska * 164e92ffd9bSMartin Matuska * LZ4_compress64kCtx() : 165e92ffd9bSMartin Matuska * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). 166e92ffd9bSMartin Matuska * isize *Must* be <64KB, otherwise the output will be corrupted. 167e92ffd9bSMartin Matuska * 168e92ffd9bSMartin Matuska * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 169e92ffd9bSMartin Matuska * by the caller (either on the stack or using kmem_cache_alloc). Passing 170e92ffd9bSMartin Matuska * NULL isn't valid. 171e92ffd9bSMartin Matuska */ 172e92ffd9bSMartin Matuska 173e92ffd9bSMartin Matuska /* 174e92ffd9bSMartin Matuska * Tuning parameters 175e92ffd9bSMartin Matuska */ 176e92ffd9bSMartin Matuska 177e92ffd9bSMartin Matuska /* 178e92ffd9bSMartin Matuska * COMPRESSIONLEVEL: Increasing this value improves compression ratio 179e92ffd9bSMartin Matuska * Lowering this value reduces memory usage. Reduced memory usage 180e92ffd9bSMartin Matuska * typically improves speed, due to cache effect (ex: L1 32KB for Intel, 181e92ffd9bSMartin Matuska * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes 182e92ffd9bSMartin Matuska * (examples : 12 -> 16KB ; 17 -> 512KB) 183e92ffd9bSMartin Matuska */ 184e92ffd9bSMartin Matuska #define COMPRESSIONLEVEL 12 185e92ffd9bSMartin Matuska 186e92ffd9bSMartin Matuska /* 187e92ffd9bSMartin Matuska * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the 188e92ffd9bSMartin Matuska * algorithm skip faster data segments considered "incompressible". 189e92ffd9bSMartin Matuska * This may decrease compression ratio dramatically, but will be 190e92ffd9bSMartin Matuska * faster on incompressible data. Increasing this value will make 191e92ffd9bSMartin Matuska * the algorithm search more before declaring a segment "incompressible". 192e92ffd9bSMartin Matuska * This could improve compression a bit, but will be slower on 193e92ffd9bSMartin Matuska * incompressible data. The default value (6) is recommended. 194e92ffd9bSMartin Matuska */ 195e92ffd9bSMartin Matuska #define NOTCOMPRESSIBLE_CONFIRMATION 6 196e92ffd9bSMartin Matuska 197e92ffd9bSMartin Matuska /* 198e92ffd9bSMartin Matuska * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to 199e92ffd9bSMartin Matuska * performance for big endian cpu, but the resulting compressed stream 200e92ffd9bSMartin Matuska * will be incompatible with little-endian CPU. You can set this option 201e92ffd9bSMartin Matuska * to 1 in situations where data will stay within closed environment. 202e92ffd9bSMartin Matuska * This option is useless on Little_Endian CPU (such as x86). 203e92ffd9bSMartin Matuska */ 204e92ffd9bSMartin Matuska /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ 205e92ffd9bSMartin Matuska 206e92ffd9bSMartin Matuska /* 207e92ffd9bSMartin Matuska * CPU Feature Detection 208e92ffd9bSMartin Matuska */ 209e92ffd9bSMartin Matuska 210e92ffd9bSMartin Matuska /* 32 or 64 bits ? */ 211e92ffd9bSMartin Matuska #if defined(_LP64) 212e92ffd9bSMartin Matuska #define LZ4_ARCH64 1 213e92ffd9bSMartin Matuska #else 214e92ffd9bSMartin Matuska #define LZ4_ARCH64 0 215e92ffd9bSMartin Matuska #endif 216e92ffd9bSMartin Matuska 217e92ffd9bSMartin Matuska /* 218e92ffd9bSMartin Matuska * Little Endian or Big Endian? 219e92ffd9bSMartin Matuska * Note: overwrite the below #define if you know your architecture endianness. 220e92ffd9bSMartin Matuska */ 221e92ffd9bSMartin Matuska #if defined(_ZFS_BIG_ENDIAN) 222e92ffd9bSMartin Matuska #define LZ4_BIG_ENDIAN 1 223e92ffd9bSMartin Matuska #else 224e92ffd9bSMartin Matuska /* 225e92ffd9bSMartin Matuska * Little Endian assumed. PDP Endian and other very rare endian format 226e92ffd9bSMartin Matuska * are unsupported. 227e92ffd9bSMartin Matuska */ 228e92ffd9bSMartin Matuska #undef LZ4_BIG_ENDIAN 229e92ffd9bSMartin Matuska #endif 230e92ffd9bSMartin Matuska 231e92ffd9bSMartin Matuska /* 232e92ffd9bSMartin Matuska * Unaligned memory access is automatically enabled for "common" CPU, 233e92ffd9bSMartin Matuska * such as x86. For others CPU, the compiler will be more cautious, and 234e92ffd9bSMartin Matuska * insert extra code to ensure aligned access is respected. If you know 235e92ffd9bSMartin Matuska * your target CPU supports unaligned memory access, you may want to 236e92ffd9bSMartin Matuska * force this option manually to improve performance 237e92ffd9bSMartin Matuska */ 238e92ffd9bSMartin Matuska #if defined(__ARM_FEATURE_UNALIGNED) 239e92ffd9bSMartin Matuska #define LZ4_FORCE_UNALIGNED_ACCESS 1 240e92ffd9bSMartin Matuska #endif 241e92ffd9bSMartin Matuska 242e92ffd9bSMartin Matuska /* 243e92ffd9bSMartin Matuska * Illumos : we can't use GCC's __builtin_ctz family of builtins in the 244e92ffd9bSMartin Matuska * kernel 245e92ffd9bSMartin Matuska * Linux : we can use GCC's __builtin_ctz family of builtins in the 246e92ffd9bSMartin Matuska * kernel 247e92ffd9bSMartin Matuska */ 248e92ffd9bSMartin Matuska #undef LZ4_FORCE_SW_BITCOUNT 249e92ffd9bSMartin Matuska #if defined(__sparc) 250e92ffd9bSMartin Matuska #define LZ4_FORCE_SW_BITCOUNT 251e92ffd9bSMartin Matuska #endif 252e92ffd9bSMartin Matuska 253e92ffd9bSMartin Matuska /* 254e92ffd9bSMartin Matuska * Compiler Options 255e92ffd9bSMartin Matuska */ 256e92ffd9bSMartin Matuska /* Disable restrict */ 257e92ffd9bSMartin Matuska #define restrict 258e92ffd9bSMartin Matuska 259e92ffd9bSMartin Matuska /* 260e92ffd9bSMartin Matuska * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it. 261e92ffd9bSMartin Matuska * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16 262e92ffd9bSMartin Matuska */ 263e92ffd9bSMartin Matuska #ifdef GCC_VERSION 264e92ffd9bSMartin Matuska #undef GCC_VERSION 265e92ffd9bSMartin Matuska #endif 266e92ffd9bSMartin Matuska 267e92ffd9bSMartin Matuska #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 268e92ffd9bSMartin Matuska 269e92ffd9bSMartin Matuska #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) 270e92ffd9bSMartin Matuska #define expect(expr, value) (__builtin_expect((expr), (value))) 271e92ffd9bSMartin Matuska #else 272e92ffd9bSMartin Matuska #define expect(expr, value) (expr) 273e92ffd9bSMartin Matuska #endif 274e92ffd9bSMartin Matuska 275e92ffd9bSMartin Matuska #ifndef likely 276e92ffd9bSMartin Matuska #define likely(expr) expect((expr) != 0, 1) 277e92ffd9bSMartin Matuska #endif 278e92ffd9bSMartin Matuska 279e92ffd9bSMartin Matuska #ifndef unlikely 280e92ffd9bSMartin Matuska #define unlikely(expr) expect((expr) != 0, 0) 281e92ffd9bSMartin Matuska #endif 282e92ffd9bSMartin Matuska 283e92ffd9bSMartin Matuska #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ 284e92ffd9bSMartin Matuska (((x) & 0xffu) << 8))) 285e92ffd9bSMartin Matuska 286e92ffd9bSMartin Matuska /* Basic types */ 287e92ffd9bSMartin Matuska #define BYTE uint8_t 288e92ffd9bSMartin Matuska #define U16 uint16_t 289e92ffd9bSMartin Matuska #define U32 uint32_t 290e92ffd9bSMartin Matuska #define S32 int32_t 291e92ffd9bSMartin Matuska #define U64 uint64_t 292e92ffd9bSMartin Matuska 293e92ffd9bSMartin Matuska #ifndef LZ4_FORCE_UNALIGNED_ACCESS 294e92ffd9bSMartin Matuska #pragma pack(1) 295e92ffd9bSMartin Matuska #endif 296e92ffd9bSMartin Matuska 297e92ffd9bSMartin Matuska typedef struct _U16_S { 298e92ffd9bSMartin Matuska U16 v; 299e92ffd9bSMartin Matuska } U16_S; 300e92ffd9bSMartin Matuska typedef struct _U32_S { 301e92ffd9bSMartin Matuska U32 v; 302e92ffd9bSMartin Matuska } U32_S; 303e92ffd9bSMartin Matuska typedef struct _U64_S { 304e92ffd9bSMartin Matuska U64 v; 305e92ffd9bSMartin Matuska } U64_S; 306e92ffd9bSMartin Matuska 307e92ffd9bSMartin Matuska #ifndef LZ4_FORCE_UNALIGNED_ACCESS 308e92ffd9bSMartin Matuska #pragma pack() 309e92ffd9bSMartin Matuska #endif 310e92ffd9bSMartin Matuska 311e92ffd9bSMartin Matuska #define A64(x) (((U64_S *)(x))->v) 312e92ffd9bSMartin Matuska #define A32(x) (((U32_S *)(x))->v) 313e92ffd9bSMartin Matuska #define A16(x) (((U16_S *)(x))->v) 314e92ffd9bSMartin Matuska 315e92ffd9bSMartin Matuska /* 316e92ffd9bSMartin Matuska * Constants 317e92ffd9bSMartin Matuska */ 318e92ffd9bSMartin Matuska #define MINMATCH 4 319e92ffd9bSMartin Matuska 320e92ffd9bSMartin Matuska #define HASH_LOG COMPRESSIONLEVEL 321e92ffd9bSMartin Matuska #define HASHTABLESIZE (1 << HASH_LOG) 322e92ffd9bSMartin Matuska #define HASH_MASK (HASHTABLESIZE - 1) 323e92ffd9bSMartin Matuska 324e92ffd9bSMartin Matuska #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ 325e92ffd9bSMartin Matuska NOTCOMPRESSIBLE_CONFIRMATION : 2) 326e92ffd9bSMartin Matuska 327e92ffd9bSMartin Matuska #define COPYLENGTH 8 328e92ffd9bSMartin Matuska #define LASTLITERALS 5 329e92ffd9bSMartin Matuska #define MFLIMIT (COPYLENGTH + MINMATCH) 330e92ffd9bSMartin Matuska #define MINLENGTH (MFLIMIT + 1) 331e92ffd9bSMartin Matuska 332e92ffd9bSMartin Matuska #define MAXD_LOG 16 333e92ffd9bSMartin Matuska #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 334e92ffd9bSMartin Matuska 335e92ffd9bSMartin Matuska #define ML_BITS 4 336e92ffd9bSMartin Matuska #define ML_MASK ((1U<<ML_BITS)-1) 337e92ffd9bSMartin Matuska #define RUN_BITS (8-ML_BITS) 338e92ffd9bSMartin Matuska #define RUN_MASK ((1U<<RUN_BITS)-1) 339e92ffd9bSMartin Matuska 340e92ffd9bSMartin Matuska 341e92ffd9bSMartin Matuska /* 342e92ffd9bSMartin Matuska * Architecture-specific macros 343e92ffd9bSMartin Matuska */ 344e92ffd9bSMartin Matuska #if LZ4_ARCH64 345e92ffd9bSMartin Matuska #define STEPSIZE 8 346e92ffd9bSMartin Matuska #define UARCH U64 347e92ffd9bSMartin Matuska #define AARCH A64 348e92ffd9bSMartin Matuska #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; 349e92ffd9bSMartin Matuska #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) 350e92ffd9bSMartin Matuska #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) 351e92ffd9bSMartin Matuska #define HTYPE U32 352e92ffd9bSMartin Matuska #define INITBASE(base) const BYTE* const base = ip 353e92ffd9bSMartin Matuska #else /* !LZ4_ARCH64 */ 354e92ffd9bSMartin Matuska #define STEPSIZE 4 355e92ffd9bSMartin Matuska #define UARCH U32 356e92ffd9bSMartin Matuska #define AARCH A32 357e92ffd9bSMartin Matuska #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; 358e92ffd9bSMartin Matuska #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); 359e92ffd9bSMartin Matuska #define LZ4_SECURECOPY LZ4_WILDCOPY 360e92ffd9bSMartin Matuska #define HTYPE const BYTE * 361e92ffd9bSMartin Matuska #define INITBASE(base) const int base = 0 362e92ffd9bSMartin Matuska #endif /* !LZ4_ARCH64 */ 363e92ffd9bSMartin Matuska 364e92ffd9bSMartin Matuska #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 365e92ffd9bSMartin Matuska #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ 366e92ffd9bSMartin Matuska { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } 367e92ffd9bSMartin Matuska #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ 368e92ffd9bSMartin Matuska { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } 369e92ffd9bSMartin Matuska #else 370e92ffd9bSMartin Matuska #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } 371e92ffd9bSMartin Matuska #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } 372e92ffd9bSMartin Matuska #endif 373e92ffd9bSMartin Matuska 374e92ffd9bSMartin Matuska 375e92ffd9bSMartin Matuska /* Local structures */ 376e92ffd9bSMartin Matuska struct refTables { 377e92ffd9bSMartin Matuska HTYPE hashTable[HASHTABLESIZE]; 378e92ffd9bSMartin Matuska }; 379e92ffd9bSMartin Matuska 380e92ffd9bSMartin Matuska 381e92ffd9bSMartin Matuska /* Macros */ 382e92ffd9bSMartin Matuska #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \ 383e92ffd9bSMartin Matuska HASH_LOG)) 384e92ffd9bSMartin Matuska #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) 385e92ffd9bSMartin Matuska #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); 386e92ffd9bSMartin Matuska #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ 387e92ffd9bSMartin Matuska d = e; } 388e92ffd9bSMartin Matuska 389e92ffd9bSMartin Matuska 390e92ffd9bSMartin Matuska /* Private functions */ 391e92ffd9bSMartin Matuska #if LZ4_ARCH64 392e92ffd9bSMartin Matuska 393e92ffd9bSMartin Matuska static inline int 394e92ffd9bSMartin Matuska LZ4_NbCommonBytes(register U64 val) 395e92ffd9bSMartin Matuska { 396e92ffd9bSMartin Matuska #if defined(LZ4_BIG_ENDIAN) 397e92ffd9bSMartin Matuska #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 398e92ffd9bSMartin Matuska !defined(LZ4_FORCE_SW_BITCOUNT) 399e92ffd9bSMartin Matuska return (__builtin_clzll(val) >> 3); 400e92ffd9bSMartin Matuska #else 401e92ffd9bSMartin Matuska int r; 402e92ffd9bSMartin Matuska if (!(val >> 32)) { 403e92ffd9bSMartin Matuska r = 4; 404e92ffd9bSMartin Matuska } else { 405e92ffd9bSMartin Matuska r = 0; 406e92ffd9bSMartin Matuska val >>= 32; 407e92ffd9bSMartin Matuska } 408e92ffd9bSMartin Matuska if (!(val >> 16)) { 409e92ffd9bSMartin Matuska r += 2; 410e92ffd9bSMartin Matuska val >>= 8; 411e92ffd9bSMartin Matuska } else { 412e92ffd9bSMartin Matuska val >>= 24; 413e92ffd9bSMartin Matuska } 414e92ffd9bSMartin Matuska r += (!val); 415e92ffd9bSMartin Matuska return (r); 416e92ffd9bSMartin Matuska #endif 417e92ffd9bSMartin Matuska #else 418e92ffd9bSMartin Matuska #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 419e92ffd9bSMartin Matuska !defined(LZ4_FORCE_SW_BITCOUNT) 420e92ffd9bSMartin Matuska return (__builtin_ctzll(val) >> 3); 421e92ffd9bSMartin Matuska #else 422e92ffd9bSMartin Matuska static const int DeBruijnBytePos[64] = 423e92ffd9bSMartin Matuska { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 424e92ffd9bSMartin Matuska 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 425e92ffd9bSMartin Matuska 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 426e92ffd9bSMartin Matuska 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 427e92ffd9bSMartin Matuska }; 428e92ffd9bSMartin Matuska return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> 429e92ffd9bSMartin Matuska 58]; 430e92ffd9bSMartin Matuska #endif 431e92ffd9bSMartin Matuska #endif 432e92ffd9bSMartin Matuska } 433e92ffd9bSMartin Matuska 434e92ffd9bSMartin Matuska #else 435e92ffd9bSMartin Matuska 436e92ffd9bSMartin Matuska static inline int 437e92ffd9bSMartin Matuska LZ4_NbCommonBytes(register U32 val) 438e92ffd9bSMartin Matuska { 439e92ffd9bSMartin Matuska #if defined(LZ4_BIG_ENDIAN) 440e92ffd9bSMartin Matuska #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 441e92ffd9bSMartin Matuska !defined(LZ4_FORCE_SW_BITCOUNT) 442e92ffd9bSMartin Matuska return (__builtin_clz(val) >> 3); 443e92ffd9bSMartin Matuska #else 444e92ffd9bSMartin Matuska int r; 445e92ffd9bSMartin Matuska if (!(val >> 16)) { 446e92ffd9bSMartin Matuska r = 2; 447e92ffd9bSMartin Matuska val >>= 8; 448e92ffd9bSMartin Matuska } else { 449e92ffd9bSMartin Matuska r = 0; 450e92ffd9bSMartin Matuska val >>= 24; 451e92ffd9bSMartin Matuska } 452e92ffd9bSMartin Matuska r += (!val); 453e92ffd9bSMartin Matuska return (r); 454e92ffd9bSMartin Matuska #endif 455e92ffd9bSMartin Matuska #else 456e92ffd9bSMartin Matuska #if defined(__GNUC__) && (GCC_VERSION >= 304) && \ 457e92ffd9bSMartin Matuska !defined(LZ4_FORCE_SW_BITCOUNT) 458e92ffd9bSMartin Matuska return (__builtin_ctz(val) >> 3); 459e92ffd9bSMartin Matuska #else 460e92ffd9bSMartin Matuska static const int DeBruijnBytePos[32] = { 461e92ffd9bSMartin Matuska 0, 0, 3, 0, 3, 1, 3, 0, 462e92ffd9bSMartin Matuska 3, 2, 2, 1, 3, 2, 0, 1, 463e92ffd9bSMartin Matuska 3, 3, 1, 2, 2, 2, 2, 0, 464e92ffd9bSMartin Matuska 3, 1, 2, 0, 1, 0, 1, 1 465e92ffd9bSMartin Matuska }; 466e92ffd9bSMartin Matuska return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 467e92ffd9bSMartin Matuska 27]; 468e92ffd9bSMartin Matuska #endif 469e92ffd9bSMartin Matuska #endif 470e92ffd9bSMartin Matuska } 471e92ffd9bSMartin Matuska 472e92ffd9bSMartin Matuska #endif 473e92ffd9bSMartin Matuska 474e92ffd9bSMartin Matuska /* Compression functions */ 475e92ffd9bSMartin Matuska 476e92ffd9bSMartin Matuska static int 477e92ffd9bSMartin Matuska LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, 478e92ffd9bSMartin Matuska int osize) 479e92ffd9bSMartin Matuska { 480e92ffd9bSMartin Matuska struct refTables *srt = (struct refTables *)ctx; 481e92ffd9bSMartin Matuska HTYPE *HashTable = (HTYPE *) (srt->hashTable); 482e92ffd9bSMartin Matuska 483e92ffd9bSMartin Matuska const BYTE *ip = (BYTE *) source; 484e92ffd9bSMartin Matuska INITBASE(base); 485e92ffd9bSMartin Matuska const BYTE *anchor = ip; 486e92ffd9bSMartin Matuska const BYTE *const iend = ip + isize; 487e92ffd9bSMartin Matuska const BYTE *const oend = (BYTE *) dest + osize; 488e92ffd9bSMartin Matuska const BYTE *const mflimit = iend - MFLIMIT; 489e92ffd9bSMartin Matuska #define matchlimit (iend - LASTLITERALS) 490e92ffd9bSMartin Matuska 491e92ffd9bSMartin Matuska BYTE *op = (BYTE *) dest; 492e92ffd9bSMartin Matuska 493e92ffd9bSMartin Matuska int len, length; 494e92ffd9bSMartin Matuska const int skipStrength = SKIPSTRENGTH; 495e92ffd9bSMartin Matuska U32 forwardH; 496e92ffd9bSMartin Matuska 497e92ffd9bSMartin Matuska 498e92ffd9bSMartin Matuska /* Init */ 499e92ffd9bSMartin Matuska if (isize < MINLENGTH) 500e92ffd9bSMartin Matuska goto _last_literals; 501e92ffd9bSMartin Matuska 502e92ffd9bSMartin Matuska /* First Byte */ 503e92ffd9bSMartin Matuska HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 504e92ffd9bSMartin Matuska ip++; 505e92ffd9bSMartin Matuska forwardH = LZ4_HASH_VALUE(ip); 506e92ffd9bSMartin Matuska 507e92ffd9bSMartin Matuska /* Main Loop */ 508e92ffd9bSMartin Matuska for (;;) { 509e92ffd9bSMartin Matuska int findMatchAttempts = (1U << skipStrength) + 3; 510e92ffd9bSMartin Matuska const BYTE *forwardIp = ip; 511e92ffd9bSMartin Matuska const BYTE *ref; 512e92ffd9bSMartin Matuska BYTE *token; 513e92ffd9bSMartin Matuska 514e92ffd9bSMartin Matuska /* Find a match */ 515e92ffd9bSMartin Matuska do { 516e92ffd9bSMartin Matuska U32 h = forwardH; 517e92ffd9bSMartin Matuska int step = findMatchAttempts++ >> skipStrength; 518e92ffd9bSMartin Matuska ip = forwardIp; 519e92ffd9bSMartin Matuska forwardIp = ip + step; 520e92ffd9bSMartin Matuska 521e92ffd9bSMartin Matuska if (unlikely(forwardIp > mflimit)) { 522e92ffd9bSMartin Matuska goto _last_literals; 523e92ffd9bSMartin Matuska } 524e92ffd9bSMartin Matuska 525e92ffd9bSMartin Matuska forwardH = LZ4_HASH_VALUE(forwardIp); 526e92ffd9bSMartin Matuska ref = base + HashTable[h]; 527e92ffd9bSMartin Matuska HashTable[h] = ip - base; 528e92ffd9bSMartin Matuska 529e92ffd9bSMartin Matuska } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); 530e92ffd9bSMartin Matuska 531e92ffd9bSMartin Matuska /* Catch up */ 532e92ffd9bSMartin Matuska while ((ip > anchor) && (ref > (BYTE *) source) && 533e92ffd9bSMartin Matuska unlikely(ip[-1] == ref[-1])) { 534e92ffd9bSMartin Matuska ip--; 535e92ffd9bSMartin Matuska ref--; 536e92ffd9bSMartin Matuska } 537e92ffd9bSMartin Matuska 538e92ffd9bSMartin Matuska /* Encode Literal length */ 539e92ffd9bSMartin Matuska length = ip - anchor; 540e92ffd9bSMartin Matuska token = op++; 541e92ffd9bSMartin Matuska 542e92ffd9bSMartin Matuska /* Check output limit */ 543e92ffd9bSMartin Matuska if (unlikely(op + length + (2 + 1 + LASTLITERALS) + 544e92ffd9bSMartin Matuska (length >> 8) > oend)) 545e92ffd9bSMartin Matuska return (0); 546e92ffd9bSMartin Matuska 547e92ffd9bSMartin Matuska if (length >= (int)RUN_MASK) { 548e92ffd9bSMartin Matuska *token = (RUN_MASK << ML_BITS); 549e92ffd9bSMartin Matuska len = length - RUN_MASK; 550e92ffd9bSMartin Matuska for (; len > 254; len -= 255) 551e92ffd9bSMartin Matuska *op++ = 255; 552e92ffd9bSMartin Matuska *op++ = (BYTE)len; 553e92ffd9bSMartin Matuska } else 554e92ffd9bSMartin Matuska *token = (length << ML_BITS); 555e92ffd9bSMartin Matuska 556e92ffd9bSMartin Matuska /* Copy Literals */ 557e92ffd9bSMartin Matuska LZ4_BLINDCOPY(anchor, op, length); 558e92ffd9bSMartin Matuska 559e92ffd9bSMartin Matuska _next_match: 560e92ffd9bSMartin Matuska /* Encode Offset */ 561e92ffd9bSMartin Matuska LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 562e92ffd9bSMartin Matuska 563e92ffd9bSMartin Matuska /* Start Counting */ 564e92ffd9bSMartin Matuska ip += MINMATCH; 565e92ffd9bSMartin Matuska ref += MINMATCH; /* MinMatch verified */ 566e92ffd9bSMartin Matuska anchor = ip; 567e92ffd9bSMartin Matuska while (likely(ip < matchlimit - (STEPSIZE - 1))) { 568e92ffd9bSMartin Matuska UARCH diff = AARCH(ref) ^ AARCH(ip); 569e92ffd9bSMartin Matuska if (!diff) { 570e92ffd9bSMartin Matuska ip += STEPSIZE; 571e92ffd9bSMartin Matuska ref += STEPSIZE; 572e92ffd9bSMartin Matuska continue; 573e92ffd9bSMartin Matuska } 574e92ffd9bSMartin Matuska ip += LZ4_NbCommonBytes(diff); 575e92ffd9bSMartin Matuska goto _endCount; 576e92ffd9bSMartin Matuska } 577e92ffd9bSMartin Matuska #if LZ4_ARCH64 578e92ffd9bSMartin Matuska if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 579e92ffd9bSMartin Matuska ip += 4; 580e92ffd9bSMartin Matuska ref += 4; 581e92ffd9bSMartin Matuska } 582e92ffd9bSMartin Matuska #endif 583e92ffd9bSMartin Matuska if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 584e92ffd9bSMartin Matuska ip += 2; 585e92ffd9bSMartin Matuska ref += 2; 586e92ffd9bSMartin Matuska } 587e92ffd9bSMartin Matuska if ((ip < matchlimit) && (*ref == *ip)) 588e92ffd9bSMartin Matuska ip++; 589e92ffd9bSMartin Matuska _endCount: 590e92ffd9bSMartin Matuska 591e92ffd9bSMartin Matuska /* Encode MatchLength */ 592e92ffd9bSMartin Matuska len = (ip - anchor); 593e92ffd9bSMartin Matuska /* Check output limit */ 594e92ffd9bSMartin Matuska if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) 595e92ffd9bSMartin Matuska return (0); 596e92ffd9bSMartin Matuska if (len >= (int)ML_MASK) { 597e92ffd9bSMartin Matuska *token += ML_MASK; 598e92ffd9bSMartin Matuska len -= ML_MASK; 599e92ffd9bSMartin Matuska for (; len > 509; len -= 510) { 600e92ffd9bSMartin Matuska *op++ = 255; 601e92ffd9bSMartin Matuska *op++ = 255; 602e92ffd9bSMartin Matuska } 603e92ffd9bSMartin Matuska if (len > 254) { 604e92ffd9bSMartin Matuska len -= 255; 605e92ffd9bSMartin Matuska *op++ = 255; 606e92ffd9bSMartin Matuska } 607e92ffd9bSMartin Matuska *op++ = (BYTE)len; 608e92ffd9bSMartin Matuska } else 609e92ffd9bSMartin Matuska *token += len; 610e92ffd9bSMartin Matuska 611e92ffd9bSMartin Matuska /* Test end of chunk */ 612e92ffd9bSMartin Matuska if (ip > mflimit) { 613e92ffd9bSMartin Matuska anchor = ip; 614e92ffd9bSMartin Matuska break; 615e92ffd9bSMartin Matuska } 616e92ffd9bSMartin Matuska /* Fill table */ 617e92ffd9bSMartin Matuska HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; 618e92ffd9bSMartin Matuska 619e92ffd9bSMartin Matuska /* Test next position */ 620e92ffd9bSMartin Matuska ref = base + HashTable[LZ4_HASH_VALUE(ip)]; 621e92ffd9bSMartin Matuska HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 622e92ffd9bSMartin Matuska if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { 623e92ffd9bSMartin Matuska token = op++; 624e92ffd9bSMartin Matuska *token = 0; 625e92ffd9bSMartin Matuska goto _next_match; 626e92ffd9bSMartin Matuska } 627e92ffd9bSMartin Matuska /* Prepare next loop */ 628e92ffd9bSMartin Matuska anchor = ip++; 629e92ffd9bSMartin Matuska forwardH = LZ4_HASH_VALUE(ip); 630e92ffd9bSMartin Matuska } 631e92ffd9bSMartin Matuska 632e92ffd9bSMartin Matuska _last_literals: 633e92ffd9bSMartin Matuska /* Encode Last Literals */ 634e92ffd9bSMartin Matuska { 635e92ffd9bSMartin Matuska int lastRun = iend - anchor; 636e92ffd9bSMartin Matuska if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 637e92ffd9bSMartin Matuska oend) 638e92ffd9bSMartin Matuska return (0); 639e92ffd9bSMartin Matuska if (lastRun >= (int)RUN_MASK) { 640e92ffd9bSMartin Matuska *op++ = (RUN_MASK << ML_BITS); 641e92ffd9bSMartin Matuska lastRun -= RUN_MASK; 642e92ffd9bSMartin Matuska for (; lastRun > 254; lastRun -= 255) { 643e92ffd9bSMartin Matuska *op++ = 255; 644e92ffd9bSMartin Matuska } 645e92ffd9bSMartin Matuska *op++ = (BYTE)lastRun; 646e92ffd9bSMartin Matuska } else 647e92ffd9bSMartin Matuska *op++ = (lastRun << ML_BITS); 648e92ffd9bSMartin Matuska (void) memcpy(op, anchor, iend - anchor); 649e92ffd9bSMartin Matuska op += iend - anchor; 650e92ffd9bSMartin Matuska } 651e92ffd9bSMartin Matuska 652e92ffd9bSMartin Matuska /* End */ 653e92ffd9bSMartin Matuska return (int)(((char *)op) - dest); 654e92ffd9bSMartin Matuska } 655e92ffd9bSMartin Matuska 656e92ffd9bSMartin Matuska 657e92ffd9bSMartin Matuska 658e92ffd9bSMartin Matuska /* Note : this function is valid only if isize < LZ4_64KLIMIT */ 659e92ffd9bSMartin Matuska #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) 660e92ffd9bSMartin Matuska #define HASHLOG64K (HASH_LOG + 1) 661e92ffd9bSMartin Matuska #define HASH64KTABLESIZE (1U << HASHLOG64K) 662e92ffd9bSMartin Matuska #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ 663e92ffd9bSMartin Matuska HASHLOG64K)) 664e92ffd9bSMartin Matuska #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) 665e92ffd9bSMartin Matuska 666e92ffd9bSMartin Matuska static int 667e92ffd9bSMartin Matuska LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, 668e92ffd9bSMartin Matuska int osize) 669e92ffd9bSMartin Matuska { 670e92ffd9bSMartin Matuska struct refTables *srt = (struct refTables *)ctx; 671e92ffd9bSMartin Matuska U16 *HashTable = (U16 *) (srt->hashTable); 672e92ffd9bSMartin Matuska 673e92ffd9bSMartin Matuska const BYTE *ip = (BYTE *) source; 674e92ffd9bSMartin Matuska const BYTE *anchor = ip; 675e92ffd9bSMartin Matuska const BYTE *const base = ip; 676e92ffd9bSMartin Matuska const BYTE *const iend = ip + isize; 677e92ffd9bSMartin Matuska const BYTE *const oend = (BYTE *) dest + osize; 678e92ffd9bSMartin Matuska const BYTE *const mflimit = iend - MFLIMIT; 679e92ffd9bSMartin Matuska #define matchlimit (iend - LASTLITERALS) 680e92ffd9bSMartin Matuska 681e92ffd9bSMartin Matuska BYTE *op = (BYTE *) dest; 682e92ffd9bSMartin Matuska 683e92ffd9bSMartin Matuska int len, length; 684e92ffd9bSMartin Matuska const int skipStrength = SKIPSTRENGTH; 685e92ffd9bSMartin Matuska U32 forwardH; 686e92ffd9bSMartin Matuska 687e92ffd9bSMartin Matuska /* Init */ 688e92ffd9bSMartin Matuska if (isize < MINLENGTH) 689e92ffd9bSMartin Matuska goto _last_literals; 690e92ffd9bSMartin Matuska 691e92ffd9bSMartin Matuska /* First Byte */ 692e92ffd9bSMartin Matuska ip++; 693e92ffd9bSMartin Matuska forwardH = LZ4_HASH64K_VALUE(ip); 694e92ffd9bSMartin Matuska 695e92ffd9bSMartin Matuska /* Main Loop */ 696e92ffd9bSMartin Matuska for (;;) { 697e92ffd9bSMartin Matuska int findMatchAttempts = (1U << skipStrength) + 3; 698e92ffd9bSMartin Matuska const BYTE *forwardIp = ip; 699e92ffd9bSMartin Matuska const BYTE *ref; 700e92ffd9bSMartin Matuska BYTE *token; 701e92ffd9bSMartin Matuska 702e92ffd9bSMartin Matuska /* Find a match */ 703e92ffd9bSMartin Matuska do { 704e92ffd9bSMartin Matuska U32 h = forwardH; 705e92ffd9bSMartin Matuska int step = findMatchAttempts++ >> skipStrength; 706e92ffd9bSMartin Matuska ip = forwardIp; 707e92ffd9bSMartin Matuska forwardIp = ip + step; 708e92ffd9bSMartin Matuska 709e92ffd9bSMartin Matuska if (forwardIp > mflimit) { 710e92ffd9bSMartin Matuska goto _last_literals; 711e92ffd9bSMartin Matuska } 712e92ffd9bSMartin Matuska 713e92ffd9bSMartin Matuska forwardH = LZ4_HASH64K_VALUE(forwardIp); 714e92ffd9bSMartin Matuska ref = base + HashTable[h]; 715e92ffd9bSMartin Matuska HashTable[h] = ip - base; 716e92ffd9bSMartin Matuska 717e92ffd9bSMartin Matuska } while (A32(ref) != A32(ip)); 718e92ffd9bSMartin Matuska 719e92ffd9bSMartin Matuska /* Catch up */ 720e92ffd9bSMartin Matuska while ((ip > anchor) && (ref > (BYTE *) source) && 721e92ffd9bSMartin Matuska (ip[-1] == ref[-1])) { 722e92ffd9bSMartin Matuska ip--; 723e92ffd9bSMartin Matuska ref--; 724e92ffd9bSMartin Matuska } 725e92ffd9bSMartin Matuska 726e92ffd9bSMartin Matuska /* Encode Literal length */ 727e92ffd9bSMartin Matuska length = ip - anchor; 728e92ffd9bSMartin Matuska token = op++; 729e92ffd9bSMartin Matuska 730e92ffd9bSMartin Matuska /* Check output limit */ 731e92ffd9bSMartin Matuska if (unlikely(op + length + (2 + 1 + LASTLITERALS) + 732e92ffd9bSMartin Matuska (length >> 8) > oend)) 733e92ffd9bSMartin Matuska return (0); 734e92ffd9bSMartin Matuska 735e92ffd9bSMartin Matuska if (length >= (int)RUN_MASK) { 736e92ffd9bSMartin Matuska *token = (RUN_MASK << ML_BITS); 737e92ffd9bSMartin Matuska len = length - RUN_MASK; 738e92ffd9bSMartin Matuska for (; len > 254; len -= 255) 739e92ffd9bSMartin Matuska *op++ = 255; 740e92ffd9bSMartin Matuska *op++ = (BYTE)len; 741e92ffd9bSMartin Matuska } else 742e92ffd9bSMartin Matuska *token = (length << ML_BITS); 743e92ffd9bSMartin Matuska 744e92ffd9bSMartin Matuska /* Copy Literals */ 745e92ffd9bSMartin Matuska LZ4_BLINDCOPY(anchor, op, length); 746e92ffd9bSMartin Matuska 747e92ffd9bSMartin Matuska _next_match: 748e92ffd9bSMartin Matuska /* Encode Offset */ 749e92ffd9bSMartin Matuska LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 750e92ffd9bSMartin Matuska 751e92ffd9bSMartin Matuska /* Start Counting */ 752e92ffd9bSMartin Matuska ip += MINMATCH; 753e92ffd9bSMartin Matuska ref += MINMATCH; /* MinMatch verified */ 754e92ffd9bSMartin Matuska anchor = ip; 755e92ffd9bSMartin Matuska while (ip < matchlimit - (STEPSIZE - 1)) { 756e92ffd9bSMartin Matuska UARCH diff = AARCH(ref) ^ AARCH(ip); 757e92ffd9bSMartin Matuska if (!diff) { 758e92ffd9bSMartin Matuska ip += STEPSIZE; 759e92ffd9bSMartin Matuska ref += STEPSIZE; 760e92ffd9bSMartin Matuska continue; 761e92ffd9bSMartin Matuska } 762e92ffd9bSMartin Matuska ip += LZ4_NbCommonBytes(diff); 763e92ffd9bSMartin Matuska goto _endCount; 764e92ffd9bSMartin Matuska } 765e92ffd9bSMartin Matuska #if LZ4_ARCH64 766e92ffd9bSMartin Matuska if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 767e92ffd9bSMartin Matuska ip += 4; 768e92ffd9bSMartin Matuska ref += 4; 769e92ffd9bSMartin Matuska } 770e92ffd9bSMartin Matuska #endif 771e92ffd9bSMartin Matuska if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 772e92ffd9bSMartin Matuska ip += 2; 773e92ffd9bSMartin Matuska ref += 2; 774e92ffd9bSMartin Matuska } 775e92ffd9bSMartin Matuska if ((ip < matchlimit) && (*ref == *ip)) 776e92ffd9bSMartin Matuska ip++; 777e92ffd9bSMartin Matuska _endCount: 778e92ffd9bSMartin Matuska 779e92ffd9bSMartin Matuska /* Encode MatchLength */ 780e92ffd9bSMartin Matuska len = (ip - anchor); 781e92ffd9bSMartin Matuska /* Check output limit */ 782e92ffd9bSMartin Matuska if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) 783e92ffd9bSMartin Matuska return (0); 784e92ffd9bSMartin Matuska if (len >= (int)ML_MASK) { 785e92ffd9bSMartin Matuska *token += ML_MASK; 786e92ffd9bSMartin Matuska len -= ML_MASK; 787e92ffd9bSMartin Matuska for (; len > 509; len -= 510) { 788e92ffd9bSMartin Matuska *op++ = 255; 789e92ffd9bSMartin Matuska *op++ = 255; 790e92ffd9bSMartin Matuska } 791e92ffd9bSMartin Matuska if (len > 254) { 792e92ffd9bSMartin Matuska len -= 255; 793e92ffd9bSMartin Matuska *op++ = 255; 794e92ffd9bSMartin Matuska } 795e92ffd9bSMartin Matuska *op++ = (BYTE)len; 796e92ffd9bSMartin Matuska } else 797e92ffd9bSMartin Matuska *token += len; 798e92ffd9bSMartin Matuska 799e92ffd9bSMartin Matuska /* Test end of chunk */ 800e92ffd9bSMartin Matuska if (ip > mflimit) { 801e92ffd9bSMartin Matuska anchor = ip; 802e92ffd9bSMartin Matuska break; 803e92ffd9bSMartin Matuska } 804e92ffd9bSMartin Matuska /* Fill table */ 805e92ffd9bSMartin Matuska HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; 806e92ffd9bSMartin Matuska 807e92ffd9bSMartin Matuska /* Test next position */ 808e92ffd9bSMartin Matuska ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; 809e92ffd9bSMartin Matuska HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; 810e92ffd9bSMartin Matuska if (A32(ref) == A32(ip)) { 811e92ffd9bSMartin Matuska token = op++; 812e92ffd9bSMartin Matuska *token = 0; 813e92ffd9bSMartin Matuska goto _next_match; 814e92ffd9bSMartin Matuska } 815e92ffd9bSMartin Matuska /* Prepare next loop */ 816e92ffd9bSMartin Matuska anchor = ip++; 817e92ffd9bSMartin Matuska forwardH = LZ4_HASH64K_VALUE(ip); 818e92ffd9bSMartin Matuska } 819e92ffd9bSMartin Matuska 820e92ffd9bSMartin Matuska _last_literals: 821e92ffd9bSMartin Matuska /* Encode Last Literals */ 822e92ffd9bSMartin Matuska { 823e92ffd9bSMartin Matuska int lastRun = iend - anchor; 824e92ffd9bSMartin Matuska if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 825e92ffd9bSMartin Matuska oend) 826e92ffd9bSMartin Matuska return (0); 827e92ffd9bSMartin Matuska if (lastRun >= (int)RUN_MASK) { 828e92ffd9bSMartin Matuska *op++ = (RUN_MASK << ML_BITS); 829e92ffd9bSMartin Matuska lastRun -= RUN_MASK; 830e92ffd9bSMartin Matuska for (; lastRun > 254; lastRun -= 255) 831e92ffd9bSMartin Matuska *op++ = 255; 832e92ffd9bSMartin Matuska *op++ = (BYTE)lastRun; 833e92ffd9bSMartin Matuska } else 834e92ffd9bSMartin Matuska *op++ = (lastRun << ML_BITS); 835e92ffd9bSMartin Matuska (void) memcpy(op, anchor, iend - anchor); 836e92ffd9bSMartin Matuska op += iend - anchor; 837e92ffd9bSMartin Matuska } 838e92ffd9bSMartin Matuska 839e92ffd9bSMartin Matuska /* End */ 840e92ffd9bSMartin Matuska return (int)(((char *)op) - dest); 841e92ffd9bSMartin Matuska } 842e92ffd9bSMartin Matuska 843e92ffd9bSMartin Matuska static int 844e92ffd9bSMartin Matuska real_LZ4_compress(const char *source, char *dest, int isize, int osize) 845e92ffd9bSMartin Matuska { 846e92ffd9bSMartin Matuska void *ctx; 847e92ffd9bSMartin Matuska int result; 848e92ffd9bSMartin Matuska 849e92ffd9bSMartin Matuska ctx = lz4_alloc(KM_SLEEP); 850e92ffd9bSMartin Matuska 851e92ffd9bSMartin Matuska /* 852e92ffd9bSMartin Matuska * out of kernel memory, gently fall through - this will disable 853e92ffd9bSMartin Matuska * compression in zio_compress_data 854e92ffd9bSMartin Matuska */ 855e92ffd9bSMartin Matuska if (ctx == NULL) 856e92ffd9bSMartin Matuska return (0); 857e92ffd9bSMartin Matuska 858e92ffd9bSMartin Matuska memset(ctx, 0, sizeof (struct refTables)); 859e92ffd9bSMartin Matuska 860e92ffd9bSMartin Matuska if (isize < LZ4_64KLIMIT) 861e92ffd9bSMartin Matuska result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); 862e92ffd9bSMartin Matuska else 863e92ffd9bSMartin Matuska result = LZ4_compressCtx(ctx, source, dest, isize, osize); 864e92ffd9bSMartin Matuska 865e92ffd9bSMartin Matuska lz4_free(ctx); 866e92ffd9bSMartin Matuska return (result); 867e92ffd9bSMartin Matuska } 868e92ffd9bSMartin Matuska 869e92ffd9bSMartin Matuska #ifdef __FreeBSD__ 870e92ffd9bSMartin Matuska /* 871e92ffd9bSMartin Matuska * FreeBSD has 4, 8 and 16 KB malloc zones which can be used here. 872e92ffd9bSMartin Matuska * Should struct refTables get resized this may need to be revisited, hence 873e92ffd9bSMartin Matuska * compiler-time asserts. 874e92ffd9bSMartin Matuska */ 875e92ffd9bSMartin Matuska _Static_assert(sizeof(struct refTables) <= 16384, 876e92ffd9bSMartin Matuska "refTables too big for malloc"); 877e92ffd9bSMartin Matuska _Static_assert((sizeof(struct refTables) % 4096) == 0, 878e92ffd9bSMartin Matuska "refTables not a multiple of page size"); 879e92ffd9bSMartin Matuska #else 880e92ffd9bSMartin Matuska #define ZFS_LZ4_USE_CACHE 881e92ffd9bSMartin Matuska #endif 882e92ffd9bSMartin Matuska 883e92ffd9bSMartin Matuska #ifdef ZFS_LZ4_USE_CACHE 884e92ffd9bSMartin Matuska static kmem_cache_t *lz4_cache; 885e92ffd9bSMartin Matuska #endif 886e92ffd9bSMartin Matuska 887e92ffd9bSMartin Matuska #ifdef ZFS_LZ4_USE_CACHE 888e92ffd9bSMartin Matuska void 889e92ffd9bSMartin Matuska lz4_init(void) 890e92ffd9bSMartin Matuska { 891e92ffd9bSMartin Matuska lz4_cache = kmem_cache_create("lz4_cache", 892ce4dcb97SMartin Matuska sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 893ce4dcb97SMartin Matuska KMC_RECLAIMABLE); 894e92ffd9bSMartin Matuska } 895e92ffd9bSMartin Matuska 896e92ffd9bSMartin Matuska void 897e92ffd9bSMartin Matuska lz4_fini(void) 898e92ffd9bSMartin Matuska { 899e92ffd9bSMartin Matuska if (lz4_cache) { 900e92ffd9bSMartin Matuska kmem_cache_destroy(lz4_cache); 901e92ffd9bSMartin Matuska lz4_cache = NULL; 902e92ffd9bSMartin Matuska } 903e92ffd9bSMartin Matuska } 904e92ffd9bSMartin Matuska 905e92ffd9bSMartin Matuska static void * 906e92ffd9bSMartin Matuska lz4_alloc(int flags) 907e92ffd9bSMartin Matuska { 908e92ffd9bSMartin Matuska ASSERT(lz4_cache != NULL); 909e92ffd9bSMartin Matuska return (kmem_cache_alloc(lz4_cache, flags)); 910e92ffd9bSMartin Matuska } 911e92ffd9bSMartin Matuska 912e92ffd9bSMartin Matuska static void 913e92ffd9bSMartin Matuska lz4_free(void *ctx) 914e92ffd9bSMartin Matuska { 915e92ffd9bSMartin Matuska kmem_cache_free(lz4_cache, ctx); 916e92ffd9bSMartin Matuska } 917e92ffd9bSMartin Matuska #else 918e92ffd9bSMartin Matuska void 919e92ffd9bSMartin Matuska lz4_init(void) 920e92ffd9bSMartin Matuska { 921e92ffd9bSMartin Matuska } 922e92ffd9bSMartin Matuska 923e92ffd9bSMartin Matuska void 924e92ffd9bSMartin Matuska lz4_fini(void) 925e92ffd9bSMartin Matuska { 926e92ffd9bSMartin Matuska } 927e92ffd9bSMartin Matuska 928e92ffd9bSMartin Matuska static void * 929e92ffd9bSMartin Matuska lz4_alloc(int flags) 930e92ffd9bSMartin Matuska { 931e92ffd9bSMartin Matuska return (kmem_alloc(sizeof (struct refTables), flags)); 932e92ffd9bSMartin Matuska } 933e92ffd9bSMartin Matuska 934e92ffd9bSMartin Matuska static void 935e92ffd9bSMartin Matuska lz4_free(void *ctx) 936e92ffd9bSMartin Matuska { 937e92ffd9bSMartin Matuska kmem_free(ctx, sizeof (struct refTables)); 938e92ffd9bSMartin Matuska } 939e92ffd9bSMartin Matuska #endif 940