1 /*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34
35 #include <sys/zfs_context.h>
36
37 static int real_LZ4_compress(const char *source, char *dest, int isize,
38 int osize);
39 static int LZ4_compressBound(int isize);
40 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
41 int isize, int maxOutputSize);
42 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
43 int isize, int osize);
44 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
45 int isize, int osize);
46
47 #ifdef __FreeBSD__
48 static kmem_cache_t *lz4_ctx_cache;
49 #endif
50
51 /*ARGSUSED*/
52 size_t
lz4_compress(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)53 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
54 {
55 uint32_t bufsiz;
56 char *dest = d_start;
57
58 ASSERT(d_len >= sizeof (bufsiz));
59
60 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
61 d_len - sizeof (bufsiz));
62
63 /* Signal an error if the compression routine returned zero. */
64 if (bufsiz == 0)
65 return (s_len);
66
67 /*
68 * Encode the compresed buffer size at the start. We'll need this in
69 * decompression to counter the effects of padding which might be
70 * added to the compressed buffer and which, if unhandled, would
71 * confuse the hell out of our decompression function.
72 */
73 *(uint32_t *)dest = BE_32(bufsiz);
74
75 return (bufsiz + sizeof (bufsiz));
76 }
77
78 /*ARGSUSED*/
79 int
lz4_decompress(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)80 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
81 {
82 const char *src = s_start;
83 uint32_t bufsiz = BE_IN32(src);
84
85 /* invalid compressed buffer size encoded at start */
86 if (bufsiz + sizeof (bufsiz) > s_len)
87 return (1);
88
89 /*
90 * Returns 0 on success (decompression function returned non-negative)
91 * and non-zero on failure (decompression function returned negative.
92 */
93 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
94 d_start, bufsiz, d_len) < 0);
95 }
96
97 /*
98 * LZ4 API Description:
99 *
100 * Simple Functions:
101 * real_LZ4_compress() :
102 * isize : is the input size. Max supported value is ~1.9GB
103 * return : the number of bytes written in buffer dest
104 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
105 * note : destination buffer must be already allocated.
106 * destination buffer must be sized to handle worst cases
107 * situations (input data not compressible) worst case size
108 * evaluation is provided by function LZ4_compressBound().
109 *
110 * Advanced Functions
111 *
112 * LZ4_compressBound() :
113 * Provides the maximum size that LZ4 may output in a "worst case"
114 * scenario (input data not compressible) primarily useful for memory
115 * allocation of output buffer.
116 *
117 * isize : is the input size. Max supported value is ~1.9GB
118 * return : maximum output size in a "worst case" scenario
119 * note : this function is limited by "int" range (2^31-1)
120 *
121 * LZ4_uncompress_unknownOutputSize() :
122 * isize : is the input size, therefore the compressed size
123 * maxOutputSize : is the size of the destination buffer (which must be
124 * already allocated)
125 * return : the number of bytes decoded in the destination buffer
126 * (necessarily <= maxOutputSize). If the source stream is
127 * malformed, the function will stop decoding and return a
128 * negative result, indicating the byte position of the faulty
129 * instruction. This function never writes beyond dest +
130 * maxOutputSize, and is therefore protected against malicious
131 * data packets.
132 * note : Destination buffer must be already allocated.
133 *
134 * LZ4_compressCtx() :
135 * This function explicitly handles the CTX memory structure.
136 *
137 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
138 * by the caller (either on the stack or using kmem_zalloc). Passing NULL
139 * isn't valid.
140 *
141 * LZ4_compress64kCtx() :
142 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
143 * isize *Must* be <64KB, otherwise the output will be corrupted.
144 *
145 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
146 * by the caller (either on the stack or using kmem_zalloc). Passing NULL
147 * isn't valid.
148 */
149
150 /*
151 * Tuning parameters
152 */
153
154 /*
155 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
156 * Lowering this value reduces memory usage. Reduced memory usage
157 * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
158 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
159 * (examples : 12 -> 16KB ; 17 -> 512KB)
160 */
161 #define COMPRESSIONLEVEL 12
162
163 /*
164 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
165 * algorithm skip faster data segments considered "incompressible".
166 * This may decrease compression ratio dramatically, but will be
167 * faster on incompressible data. Increasing this value will make
168 * the algorithm search more before declaring a segment "incompressible".
169 * This could improve compression a bit, but will be slower on
170 * incompressible data. The default value (6) is recommended.
171 */
172 #define NOTCOMPRESSIBLE_CONFIRMATION 6
173
174 /*
175 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
176 * performance for big endian cpu, but the resulting compressed stream
177 * will be incompatible with little-endian CPU. You can set this option
178 * to 1 in situations where data will stay within closed environment.
179 * This option is useless on Little_Endian CPU (such as x86).
180 */
181 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
182
183 /*
184 * CPU Feature Detection
185 */
186
187 /* 32 or 64 bits ? */
188 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
189 defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
190 defined(__LP64__) || defined(_LP64))
191 #define LZ4_ARCH64 1
192 #else
193 #define LZ4_ARCH64 0
194 #endif
195
196 /*
197 * Limits the amount of stack space that the algorithm may consume to hold
198 * the compression lookup table. The value `9' here means we'll never use
199 * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
200 * If more memory is needed, it is allocated from the heap.
201 */
202 /* FreeBSD: Use heap for all platforms for now */
203 #define STACKLIMIT 0
204
205 /*
206 * Little Endian or Big Endian?
207 * Note: overwrite the below #define if you know your architecture endianess.
208 */
209 #if BYTE_ORDER == BIG_ENDIAN
210 #define LZ4_BIG_ENDIAN 1
211 #else
212 /*
213 * Little Endian assumed. PDP Endian and other very rare endian format
214 * are unsupported.
215 */
216 #endif
217
218 /*
219 * Unaligned memory access is automatically enabled for "common" CPU,
220 * such as x86. For others CPU, the compiler will be more cautious, and
221 * insert extra code to ensure aligned access is respected. If you know
222 * your target CPU supports unaligned memory access, you may want to
223 * force this option manually to improve performance
224 */
225 #if defined(__ARM_FEATURE_UNALIGNED)
226 #define LZ4_FORCE_UNALIGNED_ACCESS 1
227 #endif
228
229 /*
230 * FreeBSD: can't use GCC's __builtin_ctz when using sparc64 because
231 * gcc currently rely on libcompiler_rt.
232 *
233 * TODO: revisit this when situation changes.
234 */
235 #if defined(__sparc64__)
236 #define LZ4_FORCE_SW_BITCOUNT
237 #endif
238
239 /*
240 * Compiler Options
241 */
242 #if __STDC_VERSION__ >= 199901L /* C99 */
243 /* "restrict" is a known keyword */
244 #else
245 /* Disable restrict */
246 #define restrict
247 #endif
248
249 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
250 (((x) & 0xffu) << 8)))
251
252 #define expect(expr, value) (__builtin_expect((expr), (value)))
253
254 #if defined(likely)
255 #undef likely
256 #endif
257 #if defined(unlikely)
258 #undef unlikely
259 #endif
260
261 #define likely(expr) expect((expr) != 0, 1)
262 #define unlikely(expr) expect((expr) != 0, 0)
263
264 /* Basic types */
265 #define BYTE uint8_t
266 #define U16 uint16_t
267 #define U32 uint32_t
268 #define S32 int32_t
269 #define U64 uint64_t
270
271 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
272 #pragma pack(1)
273 #endif
274
275 typedef struct _U16_S {
276 U16 v;
277 } U16_S;
278 typedef struct _U32_S {
279 U32 v;
280 } U32_S;
281 typedef struct _U64_S {
282 U64 v;
283 } U64_S;
284
285 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
286 #pragma pack()
287 #endif
288
289 #define A64(x) (((U64_S *)(x))->v)
290 #define A32(x) (((U32_S *)(x))->v)
291 #define A16(x) (((U16_S *)(x))->v)
292
293 /*
294 * Constants
295 */
296 #define MINMATCH 4
297
298 #define HASH_LOG COMPRESSIONLEVEL
299 #define HASHTABLESIZE (1 << HASH_LOG)
300 #define HASH_MASK (HASHTABLESIZE - 1)
301
302 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
303 NOTCOMPRESSIBLE_CONFIRMATION : 2)
304
305 /*
306 * Defines if memory is allocated into the stack (local variable),
307 * or into the heap (kmem_alloc()).
308 */
309 #define HEAPMODE (HASH_LOG > STACKLIMIT)
310 #define COPYLENGTH 8
311 #define LASTLITERALS 5
312 #define MFLIMIT (COPYLENGTH + MINMATCH)
313 #define MINLENGTH (MFLIMIT + 1)
314
315 #define MAXD_LOG 16
316 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
317
318 #define ML_BITS 4
319 #define ML_MASK ((1U<<ML_BITS)-1)
320 #define RUN_BITS (8-ML_BITS)
321 #define RUN_MASK ((1U<<RUN_BITS)-1)
322
323
324 /*
325 * Architecture-specific macros
326 */
327 #if LZ4_ARCH64
328 #define STEPSIZE 8
329 #define UARCH U64
330 #define AARCH A64
331 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
332 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
333 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
334 #define HTYPE U32
335 #define INITBASE(base) const BYTE* const base = ip
336 #else /* !LZ4_ARCH64 */
337 #define STEPSIZE 4
338 #define UARCH U32
339 #define AARCH A32
340 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
341 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
342 #define LZ4_SECURECOPY LZ4_WILDCOPY
343 #define HTYPE const BYTE *
344 #define INITBASE(base) const int base = 0
345 #endif /* !LZ4_ARCH64 */
346
347 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
348 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
349 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
350 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
351 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
352 #else
353 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
354 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
355 #endif
356
357
358 /* Local structures */
359 struct refTables {
360 HTYPE hashTable[HASHTABLESIZE];
361 };
362
363
364 /* Macros */
365 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
366 HASH_LOG))
367 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
368 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
369 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
370 d = e; }
371
372
373 /* Private functions */
374 #if LZ4_ARCH64
375
376 static inline int
LZ4_NbCommonBytes(register U64 val)377 LZ4_NbCommonBytes(register U64 val)
378 {
379 #if defined(LZ4_BIG_ENDIAN)
380 #if !defined(LZ4_FORCE_SW_BITCOUNT)
381 return (__builtin_clzll(val) >> 3);
382 #else
383 int r;
384 if (!(val >> 32)) {
385 r = 4;
386 } else {
387 r = 0;
388 val >>= 32;
389 }
390 if (!(val >> 16)) {
391 r += 2;
392 val >>= 8;
393 } else {
394 val >>= 24;
395 }
396 r += (!val);
397 return (r);
398 #endif
399 #else
400 #if !defined(LZ4_FORCE_SW_BITCOUNT)
401 return (__builtin_ctzll(val) >> 3);
402 #else
403 static const int DeBruijnBytePos[64] =
404 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
405 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
406 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
407 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
408 };
409 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
410 58];
411 #endif
412 #endif
413 }
414
415 #else
416
417 static inline int
LZ4_NbCommonBytes(register U32 val)418 LZ4_NbCommonBytes(register U32 val)
419 {
420 #if defined(LZ4_BIG_ENDIAN)
421 #if !defined(LZ4_FORCE_SW_BITCOUNT)
422 return (__builtin_clz(val) >> 3);
423 #else
424 int r;
425 if (!(val >> 16)) {
426 r = 2;
427 val >>= 8;
428 } else {
429 r = 0;
430 val >>= 24;
431 }
432 r += (!val);
433 return (r);
434 #endif
435 #else
436 #if !defined(LZ4_FORCE_SW_BITCOUNT)
437 return (__builtin_ctz(val) >> 3);
438 #else
439 static const int DeBruijnBytePos[32] = {
440 0, 0, 3, 0, 3, 1, 3, 0,
441 3, 2, 2, 1, 3, 2, 0, 1,
442 3, 3, 1, 2, 2, 2, 2, 0,
443 3, 1, 2, 0, 1, 0, 1, 1
444 };
445 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
446 27];
447 #endif
448 #endif
449 }
450
451 #endif
452
453 /* Public functions */
454
455 static int
LZ4_compressBound(int isize)456 LZ4_compressBound(int isize)
457 {
458 return (isize + (isize / 255) + 16);
459 }
460
461 /* Compression functions */
462
463 /*ARGSUSED*/
464 static int
LZ4_compressCtx(void * ctx,const char * source,char * dest,int isize,int osize)465 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
466 int osize)
467 {
468 #if HEAPMODE
469 struct refTables *srt = (struct refTables *)ctx;
470 HTYPE *HashTable = (HTYPE *) (srt->hashTable);
471 #else
472 HTYPE HashTable[HASHTABLESIZE] = { 0 };
473 #endif
474
475 const BYTE *ip = (BYTE *) source;
476 INITBASE(base);
477 const BYTE *anchor = ip;
478 const BYTE *const iend = ip + isize;
479 const BYTE *const oend = (BYTE *) dest + osize;
480 const BYTE *const mflimit = iend - MFLIMIT;
481 #define matchlimit (iend - LASTLITERALS)
482
483 BYTE *op = (BYTE *) dest;
484
485 int len, length;
486 const int skipStrength = SKIPSTRENGTH;
487 U32 forwardH;
488
489
490 /* Init */
491 if (isize < MINLENGTH)
492 goto _last_literals;
493
494 /* First Byte */
495 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
496 ip++;
497 forwardH = LZ4_HASH_VALUE(ip);
498
499 /* Main Loop */
500 for (;;) {
501 int findMatchAttempts = (1U << skipStrength) + 3;
502 const BYTE *forwardIp = ip;
503 const BYTE *ref;
504 BYTE *token;
505
506 /* Find a match */
507 do {
508 U32 h = forwardH;
509 int step = findMatchAttempts++ >> skipStrength;
510 ip = forwardIp;
511 forwardIp = ip + step;
512
513 if unlikely(forwardIp > mflimit) {
514 goto _last_literals;
515 }
516
517 forwardH = LZ4_HASH_VALUE(forwardIp);
518 ref = base + HashTable[h];
519 HashTable[h] = ip - base;
520
521 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
522
523 /* Catch up */
524 while ((ip > anchor) && (ref > (BYTE *) source) &&
525 unlikely(ip[-1] == ref[-1])) {
526 ip--;
527 ref--;
528 }
529
530 /* Encode Literal length */
531 length = ip - anchor;
532 token = op++;
533
534 /* Check output limit */
535 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
536 (length >> 8) > oend)
537 return (0);
538
539 if (length >= (int)RUN_MASK) {
540 *token = (RUN_MASK << ML_BITS);
541 len = length - RUN_MASK;
542 for (; len > 254; len -= 255)
543 *op++ = 255;
544 *op++ = (BYTE)len;
545 } else
546 *token = (length << ML_BITS);
547
548 /* Copy Literals */
549 LZ4_BLINDCOPY(anchor, op, length);
550
551 _next_match:
552 /* Encode Offset */
553 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
554
555 /* Start Counting */
556 ip += MINMATCH;
557 ref += MINMATCH; /* MinMatch verified */
558 anchor = ip;
559 while likely(ip < matchlimit - (STEPSIZE - 1)) {
560 UARCH diff = AARCH(ref) ^ AARCH(ip);
561 if (!diff) {
562 ip += STEPSIZE;
563 ref += STEPSIZE;
564 continue;
565 }
566 ip += LZ4_NbCommonBytes(diff);
567 goto _endCount;
568 }
569 #if LZ4_ARCH64
570 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
571 ip += 4;
572 ref += 4;
573 }
574 #endif
575 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
576 ip += 2;
577 ref += 2;
578 }
579 if ((ip < matchlimit) && (*ref == *ip))
580 ip++;
581 _endCount:
582
583 /* Encode MatchLength */
584 len = (ip - anchor);
585 /* Check output limit */
586 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
587 return (0);
588 if (len >= (int)ML_MASK) {
589 *token += ML_MASK;
590 len -= ML_MASK;
591 for (; len > 509; len -= 510) {
592 *op++ = 255;
593 *op++ = 255;
594 }
595 if (len > 254) {
596 len -= 255;
597 *op++ = 255;
598 }
599 *op++ = (BYTE)len;
600 } else
601 *token += len;
602
603 /* Test end of chunk */
604 if (ip > mflimit) {
605 anchor = ip;
606 break;
607 }
608 /* Fill table */
609 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
610
611 /* Test next position */
612 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
613 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
614 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
615 token = op++;
616 *token = 0;
617 goto _next_match;
618 }
619 /* Prepare next loop */
620 anchor = ip++;
621 forwardH = LZ4_HASH_VALUE(ip);
622 }
623
624 _last_literals:
625 /* Encode Last Literals */
626 {
627 int lastRun = iend - anchor;
628 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
629 oend)
630 return (0);
631 if (lastRun >= (int)RUN_MASK) {
632 *op++ = (RUN_MASK << ML_BITS);
633 lastRun -= RUN_MASK;
634 for (; lastRun > 254; lastRun -= 255) {
635 *op++ = 255;
636 }
637 *op++ = (BYTE)lastRun;
638 } else
639 *op++ = (lastRun << ML_BITS);
640 (void) memcpy(op, anchor, iend - anchor);
641 op += iend - anchor;
642 }
643
644 /* End */
645 return (int)(((char *)op) - dest);
646 }
647
648
649
650 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
651 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
652 #define HASHLOG64K (HASH_LOG + 1)
653 #define HASH64KTABLESIZE (1U << HASHLOG64K)
654 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
655 HASHLOG64K))
656 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
657
658 /*ARGSUSED*/
659 static int
LZ4_compress64kCtx(void * ctx,const char * source,char * dest,int isize,int osize)660 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
661 int osize)
662 {
663 #if HEAPMODE
664 struct refTables *srt = (struct refTables *)ctx;
665 U16 *HashTable = (U16 *) (srt->hashTable);
666 #else
667 U16 HashTable[HASH64KTABLESIZE] = { 0 };
668 #endif
669
670 const BYTE *ip = (BYTE *) source;
671 const BYTE *anchor = ip;
672 const BYTE *const base = ip;
673 const BYTE *const iend = ip + isize;
674 const BYTE *const oend = (BYTE *) dest + osize;
675 const BYTE *const mflimit = iend - MFLIMIT;
676 #define matchlimit (iend - LASTLITERALS)
677
678 BYTE *op = (BYTE *) dest;
679
680 int len, length;
681 const int skipStrength = SKIPSTRENGTH;
682 U32 forwardH;
683
684 /* Init */
685 if (isize < MINLENGTH)
686 goto _last_literals;
687
688 /* First Byte */
689 ip++;
690 forwardH = LZ4_HASH64K_VALUE(ip);
691
692 /* Main Loop */
693 for (;;) {
694 int findMatchAttempts = (1U << skipStrength) + 3;
695 const BYTE *forwardIp = ip;
696 const BYTE *ref;
697 BYTE *token;
698
699 /* Find a match */
700 do {
701 U32 h = forwardH;
702 int step = findMatchAttempts++ >> skipStrength;
703 ip = forwardIp;
704 forwardIp = ip + step;
705
706 if (forwardIp > mflimit) {
707 goto _last_literals;
708 }
709
710 forwardH = LZ4_HASH64K_VALUE(forwardIp);
711 ref = base + HashTable[h];
712 HashTable[h] = ip - base;
713
714 } while (A32(ref) != A32(ip));
715
716 /* Catch up */
717 while ((ip > anchor) && (ref > (BYTE *) source) &&
718 (ip[-1] == ref[-1])) {
719 ip--;
720 ref--;
721 }
722
723 /* Encode Literal length */
724 length = ip - anchor;
725 token = op++;
726
727 /* Check output limit */
728 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
729 (length >> 8) > oend)
730 return (0);
731
732 if (length >= (int)RUN_MASK) {
733 *token = (RUN_MASK << ML_BITS);
734 len = length - RUN_MASK;
735 for (; len > 254; len -= 255)
736 *op++ = 255;
737 *op++ = (BYTE)len;
738 } else
739 *token = (length << ML_BITS);
740
741 /* Copy Literals */
742 LZ4_BLINDCOPY(anchor, op, length);
743
744 _next_match:
745 /* Encode Offset */
746 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
747
748 /* Start Counting */
749 ip += MINMATCH;
750 ref += MINMATCH; /* MinMatch verified */
751 anchor = ip;
752 while (ip < matchlimit - (STEPSIZE - 1)) {
753 UARCH diff = AARCH(ref) ^ AARCH(ip);
754 if (!diff) {
755 ip += STEPSIZE;
756 ref += STEPSIZE;
757 continue;
758 }
759 ip += LZ4_NbCommonBytes(diff);
760 goto _endCount;
761 }
762 #if LZ4_ARCH64
763 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
764 ip += 4;
765 ref += 4;
766 }
767 #endif
768 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
769 ip += 2;
770 ref += 2;
771 }
772 if ((ip < matchlimit) && (*ref == *ip))
773 ip++;
774 _endCount:
775
776 /* Encode MatchLength */
777 len = (ip - anchor);
778 /* Check output limit */
779 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
780 return (0);
781 if (len >= (int)ML_MASK) {
782 *token += ML_MASK;
783 len -= ML_MASK;
784 for (; len > 509; len -= 510) {
785 *op++ = 255;
786 *op++ = 255;
787 }
788 if (len > 254) {
789 len -= 255;
790 *op++ = 255;
791 }
792 *op++ = (BYTE)len;
793 } else
794 *token += len;
795
796 /* Test end of chunk */
797 if (ip > mflimit) {
798 anchor = ip;
799 break;
800 }
801 /* Fill table */
802 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
803
804 /* Test next position */
805 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
806 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
807 if (A32(ref) == A32(ip)) {
808 token = op++;
809 *token = 0;
810 goto _next_match;
811 }
812 /* Prepare next loop */
813 anchor = ip++;
814 forwardH = LZ4_HASH64K_VALUE(ip);
815 }
816
817 _last_literals:
818 /* Encode Last Literals */
819 {
820 int lastRun = iend - anchor;
821 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
822 oend)
823 return (0);
824 if (lastRun >= (int)RUN_MASK) {
825 *op++ = (RUN_MASK << ML_BITS);
826 lastRun -= RUN_MASK;
827 for (; lastRun > 254; lastRun -= 255)
828 *op++ = 255;
829 *op++ = (BYTE)lastRun;
830 } else
831 *op++ = (lastRun << ML_BITS);
832 (void) memcpy(op, anchor, iend - anchor);
833 op += iend - anchor;
834 }
835
836 /* End */
837 return (int)(((char *)op) - dest);
838 }
839
840 static int
real_LZ4_compress(const char * source,char * dest,int isize,int osize)841 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
842 {
843 #if HEAPMODE
844 #ifdef __FreeBSD__
845 void *ctx = kmem_cache_alloc(lz4_ctx_cache, KM_NOSLEEP);
846 #else
847 void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP);
848 #endif
849 int result;
850
851 /*
852 * out of kernel memory, gently fall through - this will disable
853 * compression in zio_compress_data
854 */
855 if (ctx == NULL)
856 return (0);
857
858 bzero(ctx, sizeof(struct refTables));
859 if (isize < LZ4_64KLIMIT)
860 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
861 else
862 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
863
864 #ifdef __FreeBSD__
865 kmem_cache_free(lz4_ctx_cache, ctx);
866 #else
867 kmem_free(ctx, sizeof (struct refTables));
868 #endif
869 return (result);
870 #else
871 if (isize < (int)LZ4_64KLIMIT)
872 return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
873 return (LZ4_compressCtx(NULL, source, dest, isize, osize));
874 #endif
875 }
876
877 /* Decompression functions */
878
879 /*
880 * Note: The decoding function LZ4_uncompress_unknownOutputSize() is safe
881 * against "buffer overflow" attack type. They will never write nor
882 * read outside of the provided output buffers.
883 * LZ4_uncompress_unknownOutputSize() also insures that it will never
884 * read outside of the input buffer. A corrupted input will produce
885 * an error result, a negative int, indicating the position of the
886 * error within input stream.
887 */
888
889 static int
LZ4_uncompress_unknownOutputSize(const char * source,char * dest,int isize,int maxOutputSize)890 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
891 int maxOutputSize)
892 {
893 /* Local Variables */
894 const BYTE *restrict ip = (const BYTE *) source;
895 const BYTE *const iend = ip + isize;
896 const BYTE *ref;
897
898 BYTE *op = (BYTE *) dest;
899 BYTE *const oend = op + maxOutputSize;
900 BYTE *cpy;
901
902 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
903 #if LZ4_ARCH64
904 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
905 #endif
906
907 /* Main Loop */
908 while (ip < iend) {
909 unsigned token;
910 size_t length;
911
912 /* get runlength */
913 token = *ip++;
914 if ((length = (token >> ML_BITS)) == RUN_MASK) {
915 int s = 255;
916 while ((ip < iend) && (s == 255)) {
917 s = *ip++;
918 length += s;
919 }
920 }
921 /* copy literals */
922 cpy = op + length;
923 /* CORNER-CASE: cpy might overflow. */
924 if (cpy < op)
925 goto _output_error; /* cpy was overflowed, bail! */
926 if ((cpy > oend - COPYLENGTH) ||
927 (ip + length > iend - COPYLENGTH)) {
928 if (cpy > oend)
929 /* Error: writes beyond output buffer */
930 goto _output_error;
931 if (ip + length != iend)
932 /*
933 * Error: LZ4 format requires to consume all
934 * input at this stage
935 */
936 goto _output_error;
937 (void) memcpy(op, ip, length);
938 op += length;
939 /* Necessarily EOF, due to parsing restrictions */
940 break;
941 }
942 LZ4_WILDCOPY(ip, op, cpy);
943 ip -= (op - cpy);
944 op = cpy;
945
946 /* get offset */
947 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
948 ip += 2;
949 if (ref < (BYTE * const) dest)
950 /*
951 * Error: offset creates reference outside of
952 * destination buffer
953 */
954 goto _output_error;
955
956 /* get matchlength */
957 if ((length = (token & ML_MASK)) == ML_MASK) {
958 while (ip < iend) {
959 int s = *ip++;
960 length += s;
961 if (s == 255)
962 continue;
963 break;
964 }
965 }
966 /* copy repeated sequence */
967 if unlikely(op - ref < STEPSIZE) {
968 #if LZ4_ARCH64
969 size_t dec64 = dec64table[op-ref];
970 #else
971 const int dec64 = 0;
972 #endif
973 op[0] = ref[0];
974 op[1] = ref[1];
975 op[2] = ref[2];
976 op[3] = ref[3];
977 op += 4;
978 ref += 4;
979 ref -= dec32table[op-ref];
980 A32(op) = A32(ref);
981 op += STEPSIZE - 4;
982 ref -= dec64;
983 } else {
984 LZ4_COPYSTEP(ref, op);
985 }
986 cpy = op + length - (STEPSIZE - 4);
987 if (cpy > oend - COPYLENGTH) {
988 if (cpy > oend)
989 /*
990 * Error: request to write outside of
991 * destination buffer
992 */
993 goto _output_error;
994 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
995 while (op < cpy)
996 *op++ = *ref++;
997 op = cpy;
998 if (op == oend)
999 /*
1000 * Check EOF (should never happen, since
1001 * last 5 bytes are supposed to be literals)
1002 */
1003 goto _output_error;
1004 continue;
1005 }
1006 LZ4_SECURECOPY(ref, op, cpy);
1007 op = cpy; /* correction */
1008 }
1009
1010 /* end of decoding */
1011 return (int)(((char *)op) - dest);
1012
1013 /* write overflow error detected */
1014 _output_error:
1015 return (int)(-(((char *)ip) - source));
1016 }
1017
1018 #ifdef __FreeBSD__
1019
1020 extern void
lz4_init(void)1021 lz4_init(void)
1022 {
1023
1024 #if HEAPMODE
1025 lz4_ctx_cache = kmem_cache_create("lz4_ctx", sizeof(struct refTables),
1026 0, NULL, NULL, NULL, NULL, NULL, 0);
1027 #endif
1028 }
1029
1030 extern void
lz4_fini(void)1031 lz4_fini(void)
1032 {
1033
1034 #if HEAPMODE
1035 kmem_cache_destroy(lz4_ctx_cache);
1036 #endif
1037 }
1038
1039 #endif /* __FreeBSD__ */
1040