1 /*
2 xxHash - Fast Hash algorithm
3 Copyright (C) 2012-2014, Yann Collet.
4 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are
8 met:
9
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following disclaimer
14 in the documentation and/or other materials provided with the
15 distribution.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 You can contact the author at :
30 - xxHash source repository : http://code.google.com/p/xxhash/
31 */
32 #include "archive_platform.h"
33
34 #include <stdlib.h>
35 #include <string.h>
36
37 #include "archive_xxhash.h"
38
39 #ifdef HAVE_LIBLZ4
40
41 /***************************************
42 ** Tuning parameters
43 ****************************************/
44 /* Unaligned memory access is automatically enabled for "common" CPU, such as x86.
45 ** For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected.
46 ** If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance.
47 ** You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32).
48 */
49 #if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
50 # define XXH_USE_UNALIGNED_ACCESS 1
51 #endif
52
53 /* XXH_ACCEPT_NULL_INPUT_POINTER :
54 ** If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
55 ** When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
56 ** This option has a very small performance cost (only measurable on small inputs).
57 ** By default, this option is disabled. To enable it, uncomment below define :
58 ** #define XXH_ACCEPT_NULL_INPUT_POINTER 1
59
60 ** XXH_FORCE_NATIVE_FORMAT :
61 ** By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
62 ** Results are therefore identical for little-endian and big-endian CPU.
63 ** This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
64 ** Should endian-independence be of no importance for your application, you may set the #define below to 1.
65 ** It will improve speed for Big-endian CPU.
66 ** This option has no impact on Little_Endian CPU.
67 */
68 #define XXH_FORCE_NATIVE_FORMAT 0
69
70 /***************************************
71 ** Compiler Specific Options
72 ****************************************/
73 /* Disable some Visual warning messages */
74 #ifdef _MSC_VER /* Visual Studio */
75 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
76 #endif
77
78 #ifdef _MSC_VER /* Visual Studio */
79 # define FORCE_INLINE __forceinline
80 #else
81 # ifdef __GNUC__
82 # define FORCE_INLINE inline __attribute__((always_inline))
83 # else
84 # define FORCE_INLINE inline
85 # endif
86 #endif
87
88 /***************************************
89 ** Includes & Memory related functions
90 ****************************************/
91 #define XXH_malloc malloc
92 #define XXH_free free
93 #define XXH_memcpy memcpy
94
95
96 static unsigned int XXH32 (const void*, unsigned int, unsigned int);
97 static void* XXH32_init (unsigned int);
98 static XXH_errorcode XXH32_update (void*, const void*, unsigned int);
99 static unsigned int XXH32_digest (void*);
100 /*static int XXH32_sizeofState(void);*/
101 static XXH_errorcode XXH32_resetState(void*, unsigned int);
102 #define XXH32_SIZEOFSTATE 48
103 typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t;
104 static unsigned int XXH32_intermediateDigest (void*);
105
106 /***************************************
107 ** Basic Types
108 ****************************************/
109 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
110 # include <stdint.h>
111 typedef uint8_t BYTE;
112 typedef uint16_t U16;
113 typedef uint32_t U32;
114 typedef int32_t S32;
115 typedef uint64_t U64;
116 #else
117 typedef unsigned char BYTE;
118 typedef unsigned short U16;
119 typedef unsigned int U32;
120 typedef signed int S32;
121 typedef unsigned long long U64;
122 #endif
123
124 #if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS)
125 # define _PACKED __attribute__ ((packed))
126 #else
127 # define _PACKED
128 #endif
129
130 #if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
131 # ifdef __IBMC__
132 # pragma pack(1)
133 # else
134 # pragma pack(push, 1)
135 # endif
136 #endif
137
138 typedef struct _U32_S { U32 v; } _PACKED U32_S;
139
140 #if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
141 # pragma pack(pop)
142 #endif
143
144
145 /****************************************
146 ** Compiler-specific Functions and Macros
147 *****************************************/
148 #define GCC_VERSION ((__GNUC__-0) * 100 + (__GNUC_MINOR__ - 0))
149
150 #if GCC_VERSION >= 409
151 __attribute__((__no_sanitize_undefined__))
152 #else
153 # if defined(__clang__)
154 __attribute__((no_sanitize("undefined")))
155 # endif
156 #endif
157 #if defined(_MSC_VER)
A32(const void * x)158 static __inline U32 A32(const void * x)
159 #else
160 static inline U32 A32(const void* x)
161 #endif
162 {
163 return (((const U32_S *)(x))->v);
164 }
165
166 /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
167 #if defined(_MSC_VER)
168 # define XXH_rotl32(x,r) _rotl(x,r)
169 #else
170 # define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
171 #endif
172
173 #if defined(_MSC_VER) /* Visual Studio */
174 # define XXH_swap32 _byteswap_ulong
175 #elif GCC_VERSION >= 403
176 # define XXH_swap32 __builtin_bswap32
177 #else
XXH_swap32(U32 x)178 static inline U32 XXH_swap32 (U32 x) {
179 return ((x << 24) & 0xff000000 ) |
180 ((x << 8) & 0x00ff0000 ) |
181 ((x >> 8) & 0x0000ff00 ) |
182 ((x >> 24) & 0x000000ff );}
183 #endif
184
185
186 /***************************************
187 ** Constants
188 ****************************************/
189 #define PRIME32_1 2654435761U
190 #define PRIME32_2 2246822519U
191 #define PRIME32_3 3266489917U
192 #define PRIME32_4 668265263U
193 #define PRIME32_5 374761393U
194
195
196 /***************************************
197 ** Architecture Macros
198 ****************************************/
199 typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
200 #ifndef XXH_CPU_LITTLE_ENDIAN /* It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch */
201 static const int one = 1;
202 # define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&one))
203 #endif
204
205
206 /***************************************
207 ** Macros
208 ****************************************/
209 #define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } /* use only *after* variable declarations */
210
211
212 /*****************************
213 ** Memory reads
214 ******************************/
215 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
216
217 static
XXH_readLE32_align(const U32 * ptr,XXH_endianess endian,XXH_alignment align)218 FORCE_INLINE U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align)
219 {
220 if (align==XXH_unaligned)
221 return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr));
222 else
223 return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr);
224 }
225
226 static
XXH_readLE32(const U32 * ptr,XXH_endianess endian)227 FORCE_INLINE U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); }
228
229
230 /*****************************
231 ** Simple Hash Functions
232 ******************************/
233 static
XXH32_endian_align(const void * input,unsigned int len,U32 seed,XXH_endianess endian,XXH_alignment align)234 FORCE_INLINE U32 XXH32_endian_align(const void* input, unsigned int len, U32 seed, XXH_endianess endian, XXH_alignment align)
235 {
236 const BYTE* p = (const BYTE*)input;
237 const BYTE* bEnd = p + len;
238 U32 h32;
239 #define XXH_get32bits(p) XXH_readLE32_align((const U32*)p, endian, align)
240
241 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
242 if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)16; }
243 #endif
244
245 if (len>=16)
246 {
247 const BYTE* const limit = bEnd - 16;
248 U32 v1 = seed + PRIME32_1 + PRIME32_2;
249 U32 v2 = seed + PRIME32_2;
250 U32 v3 = seed + 0;
251 U32 v4 = seed - PRIME32_1;
252
253 do
254 {
255 v1 += XXH_get32bits(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
256 v2 += XXH_get32bits(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
257 v3 += XXH_get32bits(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
258 v4 += XXH_get32bits(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
259 } while (p<=limit);
260
261 h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
262 }
263 else
264 {
265 h32 = seed + PRIME32_5;
266 }
267
268 h32 += (U32) len;
269
270 while (p<=bEnd-4)
271 {
272 h32 += XXH_get32bits(p) * PRIME32_3;
273 h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
274 p+=4;
275 }
276
277 while (p<bEnd)
278 {
279 h32 += (*p) * PRIME32_5;
280 h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
281 p++;
282 }
283
284 h32 ^= h32 >> 15;
285 h32 *= PRIME32_2;
286 h32 ^= h32 >> 13;
287 h32 *= PRIME32_3;
288 h32 ^= h32 >> 16;
289
290 return h32;
291 }
292
293
XXH32(const void * input,unsigned int len,U32 seed)294 U32 XXH32(const void* input, unsigned int len, U32 seed)
295 {
296 #if 0
297 // Simple version, good for code maintenance, but unfortunately slow for small inputs
298 void* state = XXH32_init(seed);
299 XXH32_update(state, input, len);
300 return XXH32_digest(state);
301 #else
302 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
303
304 # if !defined(XXH_USE_UNALIGNED_ACCESS)
305 if ((((size_t)input) & 3) == 0) /* Input is aligned, let's leverage the speed advantage */
306 {
307 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
308 return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
309 else
310 return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
311 }
312 # endif
313
314 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
315 return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
316 else
317 return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
318 #endif
319 }
320
321 /*****************************
322 ** Advanced Hash Functions
323 ******************************/
324
325 struct XXH_state32_t
326 {
327 U64 total_len;
328 U32 seed;
329 U32 v1;
330 U32 v2;
331 U32 v3;
332 U32 v4;
333 int memsize;
334 char memory[16];
335 };
336
337 #if 0
338 static
339 int XXH32_sizeofState(void)
340 {
341 XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t)); /* A compilation error here means XXH32_SIZEOFSTATE is not large enough */
342 return sizeof(struct XXH_state32_t);
343 }
344 #endif
345
346 static
XXH32_resetState(void * state_in,U32 seed)347 XXH_errorcode XXH32_resetState(void* state_in, U32 seed)
348 {
349 struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
350 state->seed = seed;
351 state->v1 = seed + PRIME32_1 + PRIME32_2;
352 state->v2 = seed + PRIME32_2;
353 state->v3 = seed + 0;
354 state->v4 = seed - PRIME32_1;
355 state->total_len = 0;
356 state->memsize = 0;
357 return XXH_OK;
358 }
359
360 static
XXH32_init(U32 seed)361 void* XXH32_init (U32 seed)
362 {
363 void* state = XXH_malloc (sizeof(struct XXH_state32_t));
364 XXH32_resetState(state, seed);
365 return state;
366 }
367
368 static
XXH32_update_endian(void * state_in,const void * input,int len,XXH_endianess endian)369 FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian)
370 {
371 struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
372 const BYTE* p = (const BYTE*)input;
373 const BYTE* const bEnd = p + len;
374
375 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
376 if (input==NULL) return XXH_ERROR;
377 #endif
378
379 state->total_len += len;
380
381 if (state->memsize + len < 16) /* fill in tmp buffer */
382 {
383 XXH_memcpy(state->memory + state->memsize, input, len);
384 state->memsize += len;
385 return XXH_OK;
386 }
387
388 if (state->memsize) /* some data left from previous update */
389 {
390 XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize);
391 {
392 const U32* p32 = (const U32*)state->memory;
393 state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++;
394 state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++;
395 state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++;
396 state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++;
397 }
398 p += 16-state->memsize;
399 state->memsize = 0;
400 }
401
402 if (p <= bEnd-16)
403 {
404 const BYTE* const limit = bEnd - 16;
405 U32 v1 = state->v1;
406 U32 v2 = state->v2;
407 U32 v3 = state->v3;
408 U32 v4 = state->v4;
409
410 do
411 {
412 v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
413 v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
414 v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
415 v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
416 } while (p<=limit);
417
418 state->v1 = v1;
419 state->v2 = v2;
420 state->v3 = v3;
421 state->v4 = v4;
422 }
423
424 if (p < bEnd)
425 {
426 XXH_memcpy(state->memory, p, bEnd-p);
427 state->memsize = (int)(bEnd-p);
428 }
429
430 return XXH_OK;
431 }
432
433 static
XXH32_update(void * state_in,const void * input,unsigned int len)434 XXH_errorcode XXH32_update (void* state_in, const void* input, unsigned int len)
435 {
436 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
437
438 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
439 return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
440 else
441 return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
442 }
443
444
445
446 static
XXH32_intermediateDigest_endian(void * state_in,XXH_endianess endian)447 FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian)
448 {
449 struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
450 const BYTE * p = (const BYTE*)state->memory;
451 BYTE* bEnd = (BYTE*)state->memory + state->memsize;
452 U32 h32;
453
454 if (state->total_len >= 16)
455 {
456 h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
457 }
458 else
459 {
460 h32 = state->seed + PRIME32_5;
461 }
462
463 h32 += (U32) state->total_len;
464
465 while (p<=bEnd-4)
466 {
467 h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3;
468 h32 = XXH_rotl32(h32, 17) * PRIME32_4;
469 p+=4;
470 }
471
472 while (p<bEnd)
473 {
474 h32 += (*p) * PRIME32_5;
475 h32 = XXH_rotl32(h32, 11) * PRIME32_1;
476 p++;
477 }
478
479 h32 ^= h32 >> 15;
480 h32 *= PRIME32_2;
481 h32 ^= h32 >> 13;
482 h32 *= PRIME32_3;
483 h32 ^= h32 >> 16;
484
485 return h32;
486 }
487
488 static
XXH32_intermediateDigest(void * state_in)489 U32 XXH32_intermediateDigest (void* state_in)
490 {
491 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
492
493 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
494 return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian);
495 else
496 return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian);
497 }
498
499 static
XXH32_digest(void * state_in)500 U32 XXH32_digest (void* state_in)
501 {
502 U32 h32 = XXH32_intermediateDigest(state_in);
503
504 XXH_free(state_in);
505
506 return h32;
507 }
508
509 const
510 struct archive_xxhash __archive_xxhash = {
511 XXH32,
512 XXH32_init,
513 XXH32_update,
514 XXH32_digest
515 };
516 #else
517
518 /*
519 * Define an empty version of the struct if we aren't using the LZ4 library.
520 */
521 const
522 struct archive_xxhash __archive_xxhash = {
523 NULL,
524 NULL,
525 NULL,
526 NULL
527 };
528
529 #endif /* HAVE_LIBLZ4 */
530