1*c03c5b1cSMartin Matuska /* 2*c03c5b1cSMartin Matuska * xxHash - Extremely Fast Hash algorithm 3*c03c5b1cSMartin Matuska * Header File 4*c03c5b1cSMartin Matuska * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. 5*c03c5b1cSMartin Matuska * 6*c03c5b1cSMartin Matuska * You can contact the author at : 7*c03c5b1cSMartin Matuska * - xxHash source repository : https://github.com/Cyan4973/xxHash 8*c03c5b1cSMartin Matuska * 9*c03c5b1cSMartin Matuska * This source code is licensed under both the BSD-style license (found in the 10*c03c5b1cSMartin Matuska * LICENSE file in the root directory of this source tree) and the GPLv2 (found 11*c03c5b1cSMartin Matuska * in the COPYING file in the root directory of this source tree). 12*c03c5b1cSMartin Matuska * You may select, at your option, one of the above-listed licenses. 13*c03c5b1cSMartin Matuska */ 14*c03c5b1cSMartin Matuska 15*c03c5b1cSMartin Matuska /* Notice extracted from xxHash homepage : 16*c03c5b1cSMartin Matuska 17*c03c5b1cSMartin Matuska xxHash is an extremely fast Hash algorithm, running at RAM speed limits. 18*c03c5b1cSMartin Matuska It also successfully passes all tests from the SMHasher suite. 19*c03c5b1cSMartin Matuska 20*c03c5b1cSMartin Matuska Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) 21*c03c5b1cSMartin Matuska 22*c03c5b1cSMartin Matuska Name Speed Q.Score Author 23*c03c5b1cSMartin Matuska xxHash 5.4 GB/s 10 24*c03c5b1cSMartin Matuska CrapWow 3.2 GB/s 2 Andrew 25*c03c5b1cSMartin Matuska MumurHash 3a 2.7 GB/s 10 Austin Appleby 26*c03c5b1cSMartin Matuska SpookyHash 2.0 GB/s 10 Bob Jenkins 27*c03c5b1cSMartin Matuska SBox 1.4 GB/s 9 Bret Mulvey 28*c03c5b1cSMartin Matuska Lookup3 1.2 GB/s 9 Bob Jenkins 29*c03c5b1cSMartin Matuska SuperFastHash 1.2 GB/s 1 Paul Hsieh 30*c03c5b1cSMartin Matuska CityHash64 1.05 GB/s 10 Pike & Alakuijala 31*c03c5b1cSMartin Matuska FNV 0.55 GB/s 5 Fowler, Noll, Vo 32*c03c5b1cSMartin Matuska CRC32 0.43 GB/s 9 33*c03c5b1cSMartin Matuska MD5-32 0.33 GB/s 10 Ronald L. Rivest 34*c03c5b1cSMartin Matuska SHA1-32 0.28 GB/s 10 35*c03c5b1cSMartin Matuska 36*c03c5b1cSMartin Matuska Q.Score is a measure of quality of the hash function. 37*c03c5b1cSMartin Matuska It depends on successfully passing SMHasher test set. 38*c03c5b1cSMartin Matuska 10 is a perfect score. 39*c03c5b1cSMartin Matuska 40*c03c5b1cSMartin Matuska A 64-bits version, named XXH64, is available since r35. 41*c03c5b1cSMartin Matuska It offers much better speed, but for 64-bits applications only. 42*c03c5b1cSMartin Matuska Name Speed on 64 bits Speed on 32 bits 43*c03c5b1cSMartin Matuska XXH64 13.8 GB/s 1.9 GB/s 44*c03c5b1cSMartin Matuska XXH32 6.8 GB/s 6.0 GB/s 45*c03c5b1cSMartin Matuska */ 46*c03c5b1cSMartin Matuska 47*c03c5b1cSMartin Matuska #if defined (__cplusplus) 48*c03c5b1cSMartin Matuska extern "C" { 49*c03c5b1cSMartin Matuska #endif 50*c03c5b1cSMartin Matuska 51*c03c5b1cSMartin Matuska #ifndef XXHASH_H_5627135585666179 52*c03c5b1cSMartin Matuska #define XXHASH_H_5627135585666179 1 53*c03c5b1cSMartin Matuska 54*c03c5b1cSMartin Matuska 55*c03c5b1cSMartin Matuska /* **************************** 56*c03c5b1cSMartin Matuska * Definitions 57*c03c5b1cSMartin Matuska ******************************/ 58*c03c5b1cSMartin Matuska #include <stddef.h> /* size_t */ 59*c03c5b1cSMartin Matuska typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; 60*c03c5b1cSMartin Matuska 61*c03c5b1cSMartin Matuska 62*c03c5b1cSMartin Matuska /* **************************** 63*c03c5b1cSMartin Matuska * API modifier 64*c03c5b1cSMartin Matuska ******************************/ 65*c03c5b1cSMartin Matuska /** XXH_PRIVATE_API 66*c03c5b1cSMartin Matuska * This is useful if you want to include xxhash functions in `static` mode 67*c03c5b1cSMartin Matuska * in order to inline them, and remove their symbol from the public list. 68*c03c5b1cSMartin Matuska * Methodology : 69*c03c5b1cSMartin Matuska * #define XXH_PRIVATE_API 70*c03c5b1cSMartin Matuska * #include "xxhash.h" 71*c03c5b1cSMartin Matuska * `xxhash.c` is automatically included. 72*c03c5b1cSMartin Matuska * It's not useful to compile and link it as a separate module anymore. 73*c03c5b1cSMartin Matuska */ 74*c03c5b1cSMartin Matuska #ifdef XXH_PRIVATE_API 75*c03c5b1cSMartin Matuska # ifndef XXH_STATIC_LINKING_ONLY 76*c03c5b1cSMartin Matuska # define XXH_STATIC_LINKING_ONLY 77*c03c5b1cSMartin Matuska # endif 78*c03c5b1cSMartin Matuska # if defined(__GNUC__) 79*c03c5b1cSMartin Matuska # define XXH_PUBLIC_API static __inline __attribute__((unused)) 80*c03c5b1cSMartin Matuska # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 81*c03c5b1cSMartin Matuska # define XXH_PUBLIC_API static inline 82*c03c5b1cSMartin Matuska # elif defined(_MSC_VER) 83*c03c5b1cSMartin Matuska # define XXH_PUBLIC_API static __inline 84*c03c5b1cSMartin Matuska # else 85*c03c5b1cSMartin Matuska # define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ 86*c03c5b1cSMartin Matuska # endif 87*c03c5b1cSMartin Matuska #else 88*c03c5b1cSMartin Matuska # define XXH_PUBLIC_API /* do nothing */ 89*c03c5b1cSMartin Matuska #endif /* XXH_PRIVATE_API */ 90*c03c5b1cSMartin Matuska 91*c03c5b1cSMartin Matuska /*!XXH_NAMESPACE, aka Namespace Emulation : 92*c03c5b1cSMartin Matuska 93*c03c5b1cSMartin Matuska If you want to include _and expose_ xxHash functions from within your own library, 94*c03c5b1cSMartin Matuska but also want to avoid symbol collisions with another library which also includes xxHash, 95*c03c5b1cSMartin Matuska 96*c03c5b1cSMartin Matuska you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library 97*c03c5b1cSMartin Matuska with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). 98*c03c5b1cSMartin Matuska 99*c03c5b1cSMartin Matuska Note that no change is required within the calling program as long as it includes `xxhash.h` : 100*c03c5b1cSMartin Matuska regular symbol name will be automatically translated by this header. 101*c03c5b1cSMartin Matuska */ 102*c03c5b1cSMartin Matuska #ifdef XXH_NAMESPACE 103*c03c5b1cSMartin Matuska # define XXH_CAT(A,B) A##B 104*c03c5b1cSMartin Matuska # define XXH_NAME2(A,B) XXH_CAT(A,B) 105*c03c5b1cSMartin Matuska # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) 106*c03c5b1cSMartin Matuska # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) 107*c03c5b1cSMartin Matuska # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) 108*c03c5b1cSMartin Matuska # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) 109*c03c5b1cSMartin Matuska # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) 110*c03c5b1cSMartin Matuska # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) 111*c03c5b1cSMartin Matuska # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) 112*c03c5b1cSMartin Matuska # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) 113*c03c5b1cSMartin Matuska # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) 114*c03c5b1cSMartin Matuska # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) 115*c03c5b1cSMartin Matuska # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) 116*c03c5b1cSMartin Matuska # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) 117*c03c5b1cSMartin Matuska # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) 118*c03c5b1cSMartin Matuska # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) 119*c03c5b1cSMartin Matuska # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) 120*c03c5b1cSMartin Matuska # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) 121*c03c5b1cSMartin Matuska # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) 122*c03c5b1cSMartin Matuska # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) 123*c03c5b1cSMartin Matuska # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) 124*c03c5b1cSMartin Matuska #endif 125*c03c5b1cSMartin Matuska 126*c03c5b1cSMartin Matuska 127*c03c5b1cSMartin Matuska /* ************************************* 128*c03c5b1cSMartin Matuska * Version 129*c03c5b1cSMartin Matuska ***************************************/ 130*c03c5b1cSMartin Matuska #define XXH_VERSION_MAJOR 0 131*c03c5b1cSMartin Matuska #define XXH_VERSION_MINOR 6 132*c03c5b1cSMartin Matuska #define XXH_VERSION_RELEASE 2 133*c03c5b1cSMartin Matuska #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) 134*c03c5b1cSMartin Matuska XXH_PUBLIC_API unsigned XXH_versionNumber (void); 135*c03c5b1cSMartin Matuska 136*c03c5b1cSMartin Matuska 137*c03c5b1cSMartin Matuska /* **************************** 138*c03c5b1cSMartin Matuska * Simple Hash Functions 139*c03c5b1cSMartin Matuska ******************************/ 140*c03c5b1cSMartin Matuska typedef unsigned int XXH32_hash_t; 141*c03c5b1cSMartin Matuska typedef unsigned long long XXH64_hash_t; 142*c03c5b1cSMartin Matuska 143*c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); 144*c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); 145*c03c5b1cSMartin Matuska 146*c03c5b1cSMartin Matuska /*! 147*c03c5b1cSMartin Matuska XXH32() : 148*c03c5b1cSMartin Matuska Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". 149*c03c5b1cSMartin Matuska The memory between input & input+length must be valid (allocated and read-accessible). 150*c03c5b1cSMartin Matuska "seed" can be used to alter the result predictably. 151*c03c5b1cSMartin Matuska Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s 152*c03c5b1cSMartin Matuska XXH64() : 153*c03c5b1cSMartin Matuska Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". 154*c03c5b1cSMartin Matuska "seed" can be used to alter the result predictably. 155*c03c5b1cSMartin Matuska This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark). 156*c03c5b1cSMartin Matuska */ 157*c03c5b1cSMartin Matuska 158*c03c5b1cSMartin Matuska 159*c03c5b1cSMartin Matuska /* **************************** 160*c03c5b1cSMartin Matuska * Streaming Hash Functions 161*c03c5b1cSMartin Matuska ******************************/ 162*c03c5b1cSMartin Matuska typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ 163*c03c5b1cSMartin Matuska typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ 164*c03c5b1cSMartin Matuska 165*c03c5b1cSMartin Matuska /*! State allocation, compatible with dynamic libraries */ 166*c03c5b1cSMartin Matuska 167*c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); 168*c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); 169*c03c5b1cSMartin Matuska 170*c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); 171*c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); 172*c03c5b1cSMartin Matuska 173*c03c5b1cSMartin Matuska 174*c03c5b1cSMartin Matuska /* hash streaming */ 175*c03c5b1cSMartin Matuska 176*c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); 177*c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); 178*c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); 179*c03c5b1cSMartin Matuska 180*c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); 181*c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); 182*c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); 183*c03c5b1cSMartin Matuska 184*c03c5b1cSMartin Matuska /* 185*c03c5b1cSMartin Matuska These functions generate the xxHash of an input provided in multiple segments. 186*c03c5b1cSMartin Matuska Note that, for small input, they are slower than single-call functions, due to state management. 187*c03c5b1cSMartin Matuska For small input, prefer `XXH32()` and `XXH64()` . 188*c03c5b1cSMartin Matuska 189*c03c5b1cSMartin Matuska XXH state must first be allocated, using XXH*_createState() . 190*c03c5b1cSMartin Matuska 191*c03c5b1cSMartin Matuska Start a new hash by initializing state with a seed, using XXH*_reset(). 192*c03c5b1cSMartin Matuska 193*c03c5b1cSMartin Matuska Then, feed the hash state by calling XXH*_update() as many times as necessary. 194*c03c5b1cSMartin Matuska Obviously, input must be allocated and read accessible. 195*c03c5b1cSMartin Matuska The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. 196*c03c5b1cSMartin Matuska 197*c03c5b1cSMartin Matuska Finally, a hash value can be produced anytime, by using XXH*_digest(). 198*c03c5b1cSMartin Matuska This function returns the nn-bits hash as an int or long long. 199*c03c5b1cSMartin Matuska 200*c03c5b1cSMartin Matuska It's still possible to continue inserting input into the hash state after a digest, 201*c03c5b1cSMartin Matuska and generate some new hashes later on, by calling again XXH*_digest(). 202*c03c5b1cSMartin Matuska 203*c03c5b1cSMartin Matuska When done, free XXH state space if it was allocated dynamically. 204*c03c5b1cSMartin Matuska */ 205*c03c5b1cSMartin Matuska 206*c03c5b1cSMartin Matuska 207*c03c5b1cSMartin Matuska /* ************************** 208*c03c5b1cSMartin Matuska * Utils 209*c03c5b1cSMartin Matuska ****************************/ 210*c03c5b1cSMartin Matuska #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ 211*c03c5b1cSMartin Matuska # define restrict /* disable restrict */ 212*c03c5b1cSMartin Matuska #endif 213*c03c5b1cSMartin Matuska 214*c03c5b1cSMartin Matuska XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state); 215*c03c5b1cSMartin Matuska XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state); 216*c03c5b1cSMartin Matuska 217*c03c5b1cSMartin Matuska 218*c03c5b1cSMartin Matuska /* ************************** 219*c03c5b1cSMartin Matuska * Canonical representation 220*c03c5b1cSMartin Matuska ****************************/ 221*c03c5b1cSMartin Matuska /* Default result type for XXH functions are primitive unsigned 32 and 64 bits. 222*c03c5b1cSMartin Matuska * The canonical representation uses human-readable write convention, aka big-endian (large digits first). 223*c03c5b1cSMartin Matuska * These functions allow transformation of hash result into and from its canonical format. 224*c03c5b1cSMartin Matuska * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. 225*c03c5b1cSMartin Matuska */ 226*c03c5b1cSMartin Matuska typedef struct { unsigned char digest[4]; } XXH32_canonical_t; 227*c03c5b1cSMartin Matuska typedef struct { unsigned char digest[8]; } XXH64_canonical_t; 228*c03c5b1cSMartin Matuska 229*c03c5b1cSMartin Matuska XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); 230*c03c5b1cSMartin Matuska XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); 231*c03c5b1cSMartin Matuska 232*c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); 233*c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); 234*c03c5b1cSMartin Matuska 235*c03c5b1cSMartin Matuska #endif /* XXHASH_H_5627135585666179 */ 236*c03c5b1cSMartin Matuska 237*c03c5b1cSMartin Matuska 238*c03c5b1cSMartin Matuska 239*c03c5b1cSMartin Matuska /* ================================================================================================ 240*c03c5b1cSMartin Matuska This section contains definitions which are not guaranteed to remain stable. 241*c03c5b1cSMartin Matuska They may change in future versions, becoming incompatible with a different version of the library. 242*c03c5b1cSMartin Matuska They shall only be used with static linking. 243*c03c5b1cSMartin Matuska Never use these definitions in association with dynamic linking ! 244*c03c5b1cSMartin Matuska =================================================================================================== */ 245*c03c5b1cSMartin Matuska #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345) 246*c03c5b1cSMartin Matuska #define XXH_STATIC_H_3543687687345 247*c03c5b1cSMartin Matuska 248*c03c5b1cSMartin Matuska /* These definitions are only meant to allow allocation of XXH state 249*c03c5b1cSMartin Matuska statically, on stack, or in a struct for example. 250*c03c5b1cSMartin Matuska Do not use members directly. */ 251*c03c5b1cSMartin Matuska 252*c03c5b1cSMartin Matuska struct XXH32_state_s { 253*c03c5b1cSMartin Matuska unsigned total_len_32; 254*c03c5b1cSMartin Matuska unsigned large_len; 255*c03c5b1cSMartin Matuska unsigned v1; 256*c03c5b1cSMartin Matuska unsigned v2; 257*c03c5b1cSMartin Matuska unsigned v3; 258*c03c5b1cSMartin Matuska unsigned v4; 259*c03c5b1cSMartin Matuska unsigned mem32[4]; /* buffer defined as U32 for alignment */ 260*c03c5b1cSMartin Matuska unsigned memsize; 261*c03c5b1cSMartin Matuska unsigned reserved; /* never read nor write, will be removed in a future version */ 262*c03c5b1cSMartin Matuska }; /* typedef'd to XXH32_state_t */ 263*c03c5b1cSMartin Matuska 264*c03c5b1cSMartin Matuska struct XXH64_state_s { 265*c03c5b1cSMartin Matuska unsigned long long total_len; 266*c03c5b1cSMartin Matuska unsigned long long v1; 267*c03c5b1cSMartin Matuska unsigned long long v2; 268*c03c5b1cSMartin Matuska unsigned long long v3; 269*c03c5b1cSMartin Matuska unsigned long long v4; 270*c03c5b1cSMartin Matuska unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ 271*c03c5b1cSMartin Matuska unsigned memsize; 272*c03c5b1cSMartin Matuska unsigned reserved[2]; /* never read nor write, will be removed in a future version */ 273*c03c5b1cSMartin Matuska }; /* typedef'd to XXH64_state_t */ 274*c03c5b1cSMartin Matuska 275*c03c5b1cSMartin Matuska 276*c03c5b1cSMartin Matuska # ifdef XXH_PRIVATE_API 277*c03c5b1cSMartin Matuska # include "xxhash.c" /* include xxhash functions as `static`, for inlining */ 278*c03c5b1cSMartin Matuska # endif 279*c03c5b1cSMartin Matuska 280*c03c5b1cSMartin Matuska #endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ 281*c03c5b1cSMartin Matuska 282*c03c5b1cSMartin Matuska 283*c03c5b1cSMartin Matuska #if defined (__cplusplus) 284*c03c5b1cSMartin Matuska } 285*c03c5b1cSMartin Matuska #endif 286