xref: /dflybsd-src/contrib/zstd/lib/common/xxhash.h (revision a28cd43d19e8b720a6c852a4bbc5ae147a26165a)
1*a28cd43dSSascha Wildner /*
2*a28cd43dSSascha Wildner  * xxHash - Extremely Fast Hash algorithm
3*a28cd43dSSascha Wildner  * Header File
4*a28cd43dSSascha Wildner  * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
5*a28cd43dSSascha Wildner  *
6*a28cd43dSSascha Wildner  * You can contact the author at :
7*a28cd43dSSascha Wildner  * - xxHash source repository : https://github.com/Cyan4973/xxHash
8*a28cd43dSSascha Wildner  *
9*a28cd43dSSascha Wildner  * This source code is licensed under both the BSD-style license (found in the
10*a28cd43dSSascha Wildner  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11*a28cd43dSSascha Wildner  * in the COPYING file in the root directory of this source tree).
12*a28cd43dSSascha Wildner  * You may select, at your option, one of the above-listed licenses.
13*a28cd43dSSascha Wildner */
14*a28cd43dSSascha Wildner 
15*a28cd43dSSascha Wildner /* Notice extracted from xxHash homepage :
16*a28cd43dSSascha Wildner 
17*a28cd43dSSascha Wildner xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
18*a28cd43dSSascha Wildner It also successfully passes all tests from the SMHasher suite.
19*a28cd43dSSascha Wildner 
20*a28cd43dSSascha Wildner Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
21*a28cd43dSSascha Wildner 
22*a28cd43dSSascha Wildner Name            Speed       Q.Score   Author
23*a28cd43dSSascha Wildner xxHash          5.4 GB/s     10
24*a28cd43dSSascha Wildner CrapWow         3.2 GB/s      2       Andrew
25*a28cd43dSSascha Wildner MumurHash 3a    2.7 GB/s     10       Austin Appleby
26*a28cd43dSSascha Wildner SpookyHash      2.0 GB/s     10       Bob Jenkins
27*a28cd43dSSascha Wildner SBox            1.4 GB/s      9       Bret Mulvey
28*a28cd43dSSascha Wildner Lookup3         1.2 GB/s      9       Bob Jenkins
29*a28cd43dSSascha Wildner SuperFastHash   1.2 GB/s      1       Paul Hsieh
30*a28cd43dSSascha Wildner CityHash64      1.05 GB/s    10       Pike & Alakuijala
31*a28cd43dSSascha Wildner FNV             0.55 GB/s     5       Fowler, Noll, Vo
32*a28cd43dSSascha Wildner CRC32           0.43 GB/s     9
33*a28cd43dSSascha Wildner MD5-32          0.33 GB/s    10       Ronald L. Rivest
34*a28cd43dSSascha Wildner SHA1-32         0.28 GB/s    10
35*a28cd43dSSascha Wildner 
36*a28cd43dSSascha Wildner Q.Score is a measure of quality of the hash function.
37*a28cd43dSSascha Wildner It depends on successfully passing SMHasher test set.
38*a28cd43dSSascha Wildner 10 is a perfect score.
39*a28cd43dSSascha Wildner 
40*a28cd43dSSascha Wildner A 64-bits version, named XXH64, is available since r35.
41*a28cd43dSSascha Wildner It offers much better speed, but for 64-bits applications only.
42*a28cd43dSSascha Wildner Name     Speed on 64 bits    Speed on 32 bits
43*a28cd43dSSascha Wildner XXH64       13.8 GB/s            1.9 GB/s
44*a28cd43dSSascha Wildner XXH32        6.8 GB/s            6.0 GB/s
45*a28cd43dSSascha Wildner */
46*a28cd43dSSascha Wildner 
47*a28cd43dSSascha Wildner #if defined (__cplusplus)
48*a28cd43dSSascha Wildner extern "C" {
49*a28cd43dSSascha Wildner #endif
50*a28cd43dSSascha Wildner 
51*a28cd43dSSascha Wildner #ifndef XXHASH_H_5627135585666179
52*a28cd43dSSascha Wildner #define XXHASH_H_5627135585666179 1
53*a28cd43dSSascha Wildner 
54*a28cd43dSSascha Wildner 
55*a28cd43dSSascha Wildner /* ****************************
56*a28cd43dSSascha Wildner *  Definitions
57*a28cd43dSSascha Wildner ******************************/
58*a28cd43dSSascha Wildner #include "zstd_deps.h"
59*a28cd43dSSascha Wildner typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
60*a28cd43dSSascha Wildner 
61*a28cd43dSSascha Wildner 
62*a28cd43dSSascha Wildner /* ****************************
63*a28cd43dSSascha Wildner *  API modifier
64*a28cd43dSSascha Wildner ******************************/
65*a28cd43dSSascha Wildner /** XXH_PRIVATE_API
66*a28cd43dSSascha Wildner *   This is useful if you want to include xxhash functions in `static` mode
67*a28cd43dSSascha Wildner *   in order to inline them, and remove their symbol from the public list.
68*a28cd43dSSascha Wildner *   Methodology :
69*a28cd43dSSascha Wildner *     #define XXH_PRIVATE_API
70*a28cd43dSSascha Wildner *     #include "xxhash.h"
71*a28cd43dSSascha Wildner *   `xxhash.c` is automatically included.
72*a28cd43dSSascha Wildner *   It's not useful to compile and link it as a separate module anymore.
73*a28cd43dSSascha Wildner */
74*a28cd43dSSascha Wildner #ifdef XXH_PRIVATE_API
75*a28cd43dSSascha Wildner #  ifndef XXH_STATIC_LINKING_ONLY
76*a28cd43dSSascha Wildner #    define XXH_STATIC_LINKING_ONLY
77*a28cd43dSSascha Wildner #  endif
78*a28cd43dSSascha Wildner #  if defined(__GNUC__)
79*a28cd43dSSascha Wildner #    define XXH_PUBLIC_API static __inline __attribute__((unused))
80*a28cd43dSSascha Wildner #  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
81*a28cd43dSSascha Wildner #    define XXH_PUBLIC_API static inline
82*a28cd43dSSascha Wildner #  elif defined(_MSC_VER)
83*a28cd43dSSascha Wildner #    define XXH_PUBLIC_API static __inline
84*a28cd43dSSascha Wildner #  else
85*a28cd43dSSascha Wildner #    define XXH_PUBLIC_API static   /* this version may generate warnings for unused static functions; disable the relevant warning */
86*a28cd43dSSascha Wildner #  endif
87*a28cd43dSSascha Wildner #else
88*a28cd43dSSascha Wildner #  define XXH_PUBLIC_API   /* do nothing */
89*a28cd43dSSascha Wildner #endif /* XXH_PRIVATE_API */
90*a28cd43dSSascha Wildner 
91*a28cd43dSSascha Wildner /*!XXH_NAMESPACE, aka Namespace Emulation :
92*a28cd43dSSascha Wildner 
93*a28cd43dSSascha Wildner If you want to include _and expose_ xxHash functions from within your own library,
94*a28cd43dSSascha Wildner but also want to avoid symbol collisions with another library which also includes xxHash,
95*a28cd43dSSascha Wildner 
96*a28cd43dSSascha Wildner you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
97*a28cd43dSSascha Wildner with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values).
98*a28cd43dSSascha Wildner 
99*a28cd43dSSascha Wildner Note that no change is required within the calling program as long as it includes `xxhash.h` :
100*a28cd43dSSascha Wildner regular symbol name will be automatically translated by this header.
101*a28cd43dSSascha Wildner */
102*a28cd43dSSascha Wildner #ifdef XXH_NAMESPACE
103*a28cd43dSSascha Wildner #  define XXH_CAT(A,B) A##B
104*a28cd43dSSascha Wildner #  define XXH_NAME2(A,B) XXH_CAT(A,B)
105*a28cd43dSSascha Wildner #  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
106*a28cd43dSSascha Wildner #  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
107*a28cd43dSSascha Wildner #  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
108*a28cd43dSSascha Wildner #  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
109*a28cd43dSSascha Wildner #  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
110*a28cd43dSSascha Wildner #  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
111*a28cd43dSSascha Wildner #  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
112*a28cd43dSSascha Wildner #  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
113*a28cd43dSSascha Wildner #  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
114*a28cd43dSSascha Wildner #  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
115*a28cd43dSSascha Wildner #  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
116*a28cd43dSSascha Wildner #  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
117*a28cd43dSSascha Wildner #  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
118*a28cd43dSSascha Wildner #  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
119*a28cd43dSSascha Wildner #  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
120*a28cd43dSSascha Wildner #  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
121*a28cd43dSSascha Wildner #  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
122*a28cd43dSSascha Wildner #  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
123*a28cd43dSSascha Wildner #  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
124*a28cd43dSSascha Wildner #endif
125*a28cd43dSSascha Wildner 
126*a28cd43dSSascha Wildner 
127*a28cd43dSSascha Wildner /* *************************************
128*a28cd43dSSascha Wildner *  Version
129*a28cd43dSSascha Wildner ***************************************/
130*a28cd43dSSascha Wildner #define XXH_VERSION_MAJOR    0
131*a28cd43dSSascha Wildner #define XXH_VERSION_MINOR    6
132*a28cd43dSSascha Wildner #define XXH_VERSION_RELEASE  2
133*a28cd43dSSascha Wildner #define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
134*a28cd43dSSascha Wildner XXH_PUBLIC_API unsigned XXH_versionNumber (void);
135*a28cd43dSSascha Wildner 
136*a28cd43dSSascha Wildner 
137*a28cd43dSSascha Wildner /* ****************************
138*a28cd43dSSascha Wildner *  Simple Hash Functions
139*a28cd43dSSascha Wildner ******************************/
140*a28cd43dSSascha Wildner typedef unsigned int       XXH32_hash_t;
141*a28cd43dSSascha Wildner typedef unsigned long long XXH64_hash_t;
142*a28cd43dSSascha Wildner 
143*a28cd43dSSascha Wildner XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
144*a28cd43dSSascha Wildner /* Begin FreeBSD - This symbol is needed by dll-linked CLI zstd(1). */
145*a28cd43dSSascha Wildner __attribute__((visibility ("default")))
146*a28cd43dSSascha Wildner /* End FreeBSD */
147*a28cd43dSSascha Wildner XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
148*a28cd43dSSascha Wildner 
149*a28cd43dSSascha Wildner /*!
150*a28cd43dSSascha Wildner XXH32() :
151*a28cd43dSSascha Wildner     Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
152*a28cd43dSSascha Wildner     The memory between input & input+length must be valid (allocated and read-accessible).
153*a28cd43dSSascha Wildner     "seed" can be used to alter the result predictably.
154*a28cd43dSSascha Wildner     Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
155*a28cd43dSSascha Wildner XXH64() :
156*a28cd43dSSascha Wildner     Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
157*a28cd43dSSascha Wildner     "seed" can be used to alter the result predictably.
158*a28cd43dSSascha Wildner     This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
159*a28cd43dSSascha Wildner */
160*a28cd43dSSascha Wildner 
161*a28cd43dSSascha Wildner 
162*a28cd43dSSascha Wildner /* ****************************
163*a28cd43dSSascha Wildner *  Streaming Hash Functions
164*a28cd43dSSascha Wildner ******************************/
165*a28cd43dSSascha Wildner typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
166*a28cd43dSSascha Wildner typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
167*a28cd43dSSascha Wildner 
168*a28cd43dSSascha Wildner /*! State allocation, compatible with dynamic libraries */
169*a28cd43dSSascha Wildner 
170*a28cd43dSSascha Wildner XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
171*a28cd43dSSascha Wildner XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
172*a28cd43dSSascha Wildner 
173*a28cd43dSSascha Wildner XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
174*a28cd43dSSascha Wildner XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
175*a28cd43dSSascha Wildner 
176*a28cd43dSSascha Wildner 
177*a28cd43dSSascha Wildner /* hash streaming */
178*a28cd43dSSascha Wildner 
179*a28cd43dSSascha Wildner XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned int seed);
180*a28cd43dSSascha Wildner XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
181*a28cd43dSSascha Wildner XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
182*a28cd43dSSascha Wildner 
183*a28cd43dSSascha Wildner XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
184*a28cd43dSSascha Wildner XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
185*a28cd43dSSascha Wildner XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
186*a28cd43dSSascha Wildner 
187*a28cd43dSSascha Wildner /*
188*a28cd43dSSascha Wildner These functions generate the xxHash of an input provided in multiple segments.
189*a28cd43dSSascha Wildner Note that, for small input, they are slower than single-call functions, due to state management.
190*a28cd43dSSascha Wildner For small input, prefer `XXH32()` and `XXH64()` .
191*a28cd43dSSascha Wildner 
192*a28cd43dSSascha Wildner XXH state must first be allocated, using XXH*_createState() .
193*a28cd43dSSascha Wildner 
194*a28cd43dSSascha Wildner Start a new hash by initializing state with a seed, using XXH*_reset().
195*a28cd43dSSascha Wildner 
196*a28cd43dSSascha Wildner Then, feed the hash state by calling XXH*_update() as many times as necessary.
197*a28cd43dSSascha Wildner Obviously, input must be allocated and read accessible.
198*a28cd43dSSascha Wildner The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
199*a28cd43dSSascha Wildner 
200*a28cd43dSSascha Wildner Finally, a hash value can be produced anytime, by using XXH*_digest().
201*a28cd43dSSascha Wildner This function returns the nn-bits hash as an int or long long.
202*a28cd43dSSascha Wildner 
203*a28cd43dSSascha Wildner It's still possible to continue inserting input into the hash state after a digest,
204*a28cd43dSSascha Wildner and generate some new hashes later on, by calling again XXH*_digest().
205*a28cd43dSSascha Wildner 
206*a28cd43dSSascha Wildner When done, free XXH state space if it was allocated dynamically.
207*a28cd43dSSascha Wildner */
208*a28cd43dSSascha Wildner 
209*a28cd43dSSascha Wildner 
210*a28cd43dSSascha Wildner /* **************************
211*a28cd43dSSascha Wildner *  Utils
212*a28cd43dSSascha Wildner ****************************/
213*a28cd43dSSascha Wildner #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* ! C99 */
214*a28cd43dSSascha Wildner #  define restrict   /* disable restrict */
215*a28cd43dSSascha Wildner #endif
216*a28cd43dSSascha Wildner 
217*a28cd43dSSascha Wildner XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state);
218*a28cd43dSSascha Wildner XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state);
219*a28cd43dSSascha Wildner 
220*a28cd43dSSascha Wildner 
221*a28cd43dSSascha Wildner /* **************************
222*a28cd43dSSascha Wildner *  Canonical representation
223*a28cd43dSSascha Wildner ****************************/
224*a28cd43dSSascha Wildner /* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
225*a28cd43dSSascha Wildner *  The canonical representation uses human-readable write convention, aka big-endian (large digits first).
226*a28cd43dSSascha Wildner *  These functions allow transformation of hash result into and from its canonical format.
227*a28cd43dSSascha Wildner *  This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
228*a28cd43dSSascha Wildner */
229*a28cd43dSSascha Wildner typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
230*a28cd43dSSascha Wildner typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
231*a28cd43dSSascha Wildner 
232*a28cd43dSSascha Wildner XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
233*a28cd43dSSascha Wildner XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
234*a28cd43dSSascha Wildner 
235*a28cd43dSSascha Wildner XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
236*a28cd43dSSascha Wildner XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
237*a28cd43dSSascha Wildner 
238*a28cd43dSSascha Wildner #endif /* XXHASH_H_5627135585666179 */
239*a28cd43dSSascha Wildner 
240*a28cd43dSSascha Wildner 
241*a28cd43dSSascha Wildner 
242*a28cd43dSSascha Wildner /* ================================================================================================
243*a28cd43dSSascha Wildner    This section contains definitions which are not guaranteed to remain stable.
244*a28cd43dSSascha Wildner    They may change in future versions, becoming incompatible with a different version of the library.
245*a28cd43dSSascha Wildner    They shall only be used with static linking.
246*a28cd43dSSascha Wildner    Never use these definitions in association with dynamic linking !
247*a28cd43dSSascha Wildner =================================================================================================== */
248*a28cd43dSSascha Wildner #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345)
249*a28cd43dSSascha Wildner #define XXH_STATIC_H_3543687687345
250*a28cd43dSSascha Wildner 
251*a28cd43dSSascha Wildner /* These definitions are only meant to allow allocation of XXH state
252*a28cd43dSSascha Wildner    statically, on stack, or in a struct for example.
253*a28cd43dSSascha Wildner    Do not use members directly. */
254*a28cd43dSSascha Wildner 
255*a28cd43dSSascha Wildner    struct XXH32_state_s {
256*a28cd43dSSascha Wildner        unsigned total_len_32;
257*a28cd43dSSascha Wildner        unsigned large_len;
258*a28cd43dSSascha Wildner        unsigned v1;
259*a28cd43dSSascha Wildner        unsigned v2;
260*a28cd43dSSascha Wildner        unsigned v3;
261*a28cd43dSSascha Wildner        unsigned v4;
262*a28cd43dSSascha Wildner        unsigned mem32[4];   /* buffer defined as U32 for alignment */
263*a28cd43dSSascha Wildner        unsigned memsize;
264*a28cd43dSSascha Wildner        unsigned reserved;   /* never read nor write, will be removed in a future version */
265*a28cd43dSSascha Wildner    };   /* typedef'd to XXH32_state_t */
266*a28cd43dSSascha Wildner 
267*a28cd43dSSascha Wildner    struct XXH64_state_s {
268*a28cd43dSSascha Wildner        unsigned long long total_len;
269*a28cd43dSSascha Wildner        unsigned long long v1;
270*a28cd43dSSascha Wildner        unsigned long long v2;
271*a28cd43dSSascha Wildner        unsigned long long v3;
272*a28cd43dSSascha Wildner        unsigned long long v4;
273*a28cd43dSSascha Wildner        unsigned long long mem64[4];   /* buffer defined as U64 for alignment */
274*a28cd43dSSascha Wildner        unsigned memsize;
275*a28cd43dSSascha Wildner        unsigned reserved[2];          /* never read nor write, will be removed in a future version */
276*a28cd43dSSascha Wildner    };   /* typedef'd to XXH64_state_t */
277*a28cd43dSSascha Wildner 
278*a28cd43dSSascha Wildner 
279*a28cd43dSSascha Wildner #  ifdef XXH_PRIVATE_API
280*a28cd43dSSascha Wildner #    include "xxhash.c"   /* include xxhash functions as `static`, for inlining */
281*a28cd43dSSascha Wildner #  endif
282*a28cd43dSSascha Wildner 
283*a28cd43dSSascha Wildner #endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */
284*a28cd43dSSascha Wildner 
285*a28cd43dSSascha Wildner 
286 #if defined (__cplusplus)
287 }
288 #endif
289