1*3117ece4Schristos /* 2*3117ece4Schristos * xxHash - Extremely Fast Hash algorithm 3*3117ece4Schristos * Header File 4*3117ece4Schristos * Copyright (c) Yann Collet - Meta Platforms, Inc 5*3117ece4Schristos * 6*3117ece4Schristos * This source code is licensed under both the BSD-style license (found in the 7*3117ece4Schristos * LICENSE file in the root directory of this source tree) and the GPLv2 (found 8*3117ece4Schristos * in the COPYING file in the root directory of this source tree). 9*3117ece4Schristos * You may select, at your option, one of the above-listed licenses. 10*3117ece4Schristos */ 11*3117ece4Schristos 12*3117ece4Schristos /* Local adaptations for Zstandard */ 13*3117ece4Schristos 14*3117ece4Schristos #ifndef XXH_NO_XXH3 15*3117ece4Schristos # define XXH_NO_XXH3 16*3117ece4Schristos #endif 17*3117ece4Schristos 18*3117ece4Schristos #ifndef XXH_NAMESPACE 19*3117ece4Schristos # define XXH_NAMESPACE ZSTD_ 20*3117ece4Schristos #endif 21*3117ece4Schristos 22*3117ece4Schristos /*! 23*3117ece4Schristos * @mainpage xxHash 24*3117ece4Schristos * 25*3117ece4Schristos * xxHash is an extremely fast non-cryptographic hash algorithm, working at RAM speed 26*3117ece4Schristos * limits. 27*3117ece4Schristos * 28*3117ece4Schristos * It is proposed in four flavors, in three families: 29*3117ece4Schristos * 1. @ref XXH32_family 30*3117ece4Schristos * - Classic 32-bit hash function. Simple, compact, and runs on almost all 31*3117ece4Schristos * 32-bit and 64-bit systems. 32*3117ece4Schristos * 2. @ref XXH64_family 33*3117ece4Schristos * - Classic 64-bit adaptation of XXH32. Just as simple, and runs well on most 34*3117ece4Schristos * 64-bit systems (but _not_ 32-bit systems). 35*3117ece4Schristos * 3. @ref XXH3_family 36*3117ece4Schristos * - Modern 64-bit and 128-bit hash function family which features improved 37*3117ece4Schristos * strength and performance across the board, especially on smaller data. 38*3117ece4Schristos * It benefits greatly from SIMD and 64-bit without requiring it. 39*3117ece4Schristos * 40*3117ece4Schristos * Benchmarks 41*3117ece4Schristos * --- 42*3117ece4Schristos * The reference system uses an Intel i7-9700K CPU, and runs Ubuntu x64 20.04. 43*3117ece4Schristos * The open source benchmark program is compiled with clang v10.0 using -O3 flag. 44*3117ece4Schristos * 45*3117ece4Schristos * | Hash Name | ISA ext | Width | Large Data Speed | Small Data Velocity | 46*3117ece4Schristos * | -------------------- | ------- | ----: | ---------------: | ------------------: | 47*3117ece4Schristos * | XXH3_64bits() | @b AVX2 | 64 | 59.4 GB/s | 133.1 | 48*3117ece4Schristos * | MeowHash | AES-NI | 128 | 58.2 GB/s | 52.5 | 49*3117ece4Schristos * | XXH3_128bits() | @b AVX2 | 128 | 57.9 GB/s | 118.1 | 50*3117ece4Schristos * | CLHash | PCLMUL | 64 | 37.1 GB/s | 58.1 | 51*3117ece4Schristos * | XXH3_64bits() | @b SSE2 | 64 | 31.5 GB/s | 133.1 | 52*3117ece4Schristos * | XXH3_128bits() | @b SSE2 | 128 | 29.6 GB/s | 118.1 | 53*3117ece4Schristos * | RAM sequential read | | N/A | 28.0 GB/s | N/A | 54*3117ece4Schristos * | ahash | AES-NI | 64 | 22.5 GB/s | 107.2 | 55*3117ece4Schristos * | City64 | | 64 | 22.0 GB/s | 76.6 | 56*3117ece4Schristos * | T1ha2 | | 64 | 22.0 GB/s | 99.0 | 57*3117ece4Schristos * | City128 | | 128 | 21.7 GB/s | 57.7 | 58*3117ece4Schristos * | FarmHash | AES-NI | 64 | 21.3 GB/s | 71.9 | 59*3117ece4Schristos * | XXH64() | | 64 | 19.4 GB/s | 71.0 | 60*3117ece4Schristos * | SpookyHash | | 64 | 19.3 GB/s | 53.2 | 61*3117ece4Schristos * | Mum | | 64 | 18.0 GB/s | 67.0 | 62*3117ece4Schristos * | CRC32C | SSE4.2 | 32 | 13.0 GB/s | 57.9 | 63*3117ece4Schristos * | XXH32() | | 32 | 9.7 GB/s | 71.9 | 64*3117ece4Schristos * | City32 | | 32 | 9.1 GB/s | 66.0 | 65*3117ece4Schristos * | Blake3* | @b AVX2 | 256 | 4.4 GB/s | 8.1 | 66*3117ece4Schristos * | Murmur3 | | 32 | 3.9 GB/s | 56.1 | 67*3117ece4Schristos * | SipHash* | | 64 | 3.0 GB/s | 43.2 | 68*3117ece4Schristos * | Blake3* | @b SSE2 | 256 | 2.4 GB/s | 8.1 | 69*3117ece4Schristos * | HighwayHash | | 64 | 1.4 GB/s | 6.0 | 70*3117ece4Schristos * | FNV64 | | 64 | 1.2 GB/s | 62.7 | 71*3117ece4Schristos * | Blake2* | | 256 | 1.1 GB/s | 5.1 | 72*3117ece4Schristos * | SHA1* | | 160 | 0.8 GB/s | 5.6 | 73*3117ece4Schristos * | MD5* | | 128 | 0.6 GB/s | 7.8 | 74*3117ece4Schristos * @note 75*3117ece4Schristos * - Hashes which require a specific ISA extension are noted. SSE2 is also noted, 76*3117ece4Schristos * even though it is mandatory on x64. 77*3117ece4Schristos * - Hashes with an asterisk are cryptographic. Note that MD5 is non-cryptographic 78*3117ece4Schristos * by modern standards. 79*3117ece4Schristos * - Small data velocity is a rough average of algorithm's efficiency for small 80*3117ece4Schristos * data. For more accurate information, see the wiki. 81*3117ece4Schristos * - More benchmarks and strength tests are found on the wiki: 82*3117ece4Schristos * https://github.com/Cyan4973/xxHash/wiki 83*3117ece4Schristos * 84*3117ece4Schristos * Usage 85*3117ece4Schristos * ------ 86*3117ece4Schristos * All xxHash variants use a similar API. Changing the algorithm is a trivial 87*3117ece4Schristos * substitution. 88*3117ece4Schristos * 89*3117ece4Schristos * @pre 90*3117ece4Schristos * For functions which take an input and length parameter, the following 91*3117ece4Schristos * requirements are assumed: 92*3117ece4Schristos * - The range from [`input`, `input + length`) is valid, readable memory. 93*3117ece4Schristos * - The only exception is if the `length` is `0`, `input` may be `NULL`. 94*3117ece4Schristos * - For C++, the objects must have the *TriviallyCopyable* property, as the 95*3117ece4Schristos * functions access bytes directly as if it was an array of `unsigned char`. 96*3117ece4Schristos * 97*3117ece4Schristos * @anchor single_shot_example 98*3117ece4Schristos * **Single Shot** 99*3117ece4Schristos * 100*3117ece4Schristos * These functions are stateless functions which hash a contiguous block of memory, 101*3117ece4Schristos * immediately returning the result. They are the easiest and usually the fastest 102*3117ece4Schristos * option. 103*3117ece4Schristos * 104*3117ece4Schristos * XXH32(), XXH64(), XXH3_64bits(), XXH3_128bits() 105*3117ece4Schristos * 106*3117ece4Schristos * @code{.c} 107*3117ece4Schristos * #include <string.h> 108*3117ece4Schristos * #include "xxhash.h" 109*3117ece4Schristos * 110*3117ece4Schristos * // Example for a function which hashes a null terminated string with XXH32(). 111*3117ece4Schristos * XXH32_hash_t hash_string(const char* string, XXH32_hash_t seed) 112*3117ece4Schristos * { 113*3117ece4Schristos * // NULL pointers are only valid if the length is zero 114*3117ece4Schristos * size_t length = (string == NULL) ? 0 : strlen(string); 115*3117ece4Schristos * return XXH32(string, length, seed); 116*3117ece4Schristos * } 117*3117ece4Schristos * @endcode 118*3117ece4Schristos * 119*3117ece4Schristos * 120*3117ece4Schristos * @anchor streaming_example 121*3117ece4Schristos * **Streaming** 122*3117ece4Schristos * 123*3117ece4Schristos * These groups of functions allow incremental hashing of unknown size, even 124*3117ece4Schristos * more than what would fit in a size_t. 125*3117ece4Schristos * 126*3117ece4Schristos * XXH32_reset(), XXH64_reset(), XXH3_64bits_reset(), XXH3_128bits_reset() 127*3117ece4Schristos * 128*3117ece4Schristos * @code{.c} 129*3117ece4Schristos * #include <stdio.h> 130*3117ece4Schristos * #include <assert.h> 131*3117ece4Schristos * #include "xxhash.h" 132*3117ece4Schristos * // Example for a function which hashes a FILE incrementally with XXH3_64bits(). 133*3117ece4Schristos * XXH64_hash_t hashFile(FILE* f) 134*3117ece4Schristos * { 135*3117ece4Schristos * // Allocate a state struct. Do not just use malloc() or new. 136*3117ece4Schristos * XXH3_state_t* state = XXH3_createState(); 137*3117ece4Schristos * assert(state != NULL && "Out of memory!"); 138*3117ece4Schristos * // Reset the state to start a new hashing session. 139*3117ece4Schristos * XXH3_64bits_reset(state); 140*3117ece4Schristos * char buffer[4096]; 141*3117ece4Schristos * size_t count; 142*3117ece4Schristos * // Read the file in chunks 143*3117ece4Schristos * while ((count = fread(buffer, 1, sizeof(buffer), f)) != 0) { 144*3117ece4Schristos * // Run update() as many times as necessary to process the data 145*3117ece4Schristos * XXH3_64bits_update(state, buffer, count); 146*3117ece4Schristos * } 147*3117ece4Schristos * // Retrieve the finalized hash. This will not change the state. 148*3117ece4Schristos * XXH64_hash_t result = XXH3_64bits_digest(state); 149*3117ece4Schristos * // Free the state. Do not use free(). 150*3117ece4Schristos * XXH3_freeState(state); 151*3117ece4Schristos * return result; 152*3117ece4Schristos * } 153*3117ece4Schristos * @endcode 154*3117ece4Schristos * 155*3117ece4Schristos * Streaming functions generate the xxHash value from an incremental input. 156*3117ece4Schristos * This method is slower than single-call functions, due to state management. 157*3117ece4Schristos * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized. 158*3117ece4Schristos * 159*3117ece4Schristos * An XXH state must first be allocated using `XXH*_createState()`. 160*3117ece4Schristos * 161*3117ece4Schristos * Start a new hash by initializing the state with a seed using `XXH*_reset()`. 162*3117ece4Schristos * 163*3117ece4Schristos * Then, feed the hash state by calling `XXH*_update()` as many times as necessary. 164*3117ece4Schristos * 165*3117ece4Schristos * The function returns an error code, with 0 meaning OK, and any other value 166*3117ece4Schristos * meaning there is an error. 167*3117ece4Schristos * 168*3117ece4Schristos * Finally, a hash value can be produced anytime, by using `XXH*_digest()`. 169*3117ece4Schristos * This function returns the nn-bits hash as an int or long long. 170*3117ece4Schristos * 171*3117ece4Schristos * It's still possible to continue inserting input into the hash state after a 172*3117ece4Schristos * digest, and generate new hash values later on by invoking `XXH*_digest()`. 173*3117ece4Schristos * 174*3117ece4Schristos * When done, release the state using `XXH*_freeState()`. 175*3117ece4Schristos * 176*3117ece4Schristos * 177*3117ece4Schristos * @anchor canonical_representation_example 178*3117ece4Schristos * **Canonical Representation** 179*3117ece4Schristos * 180*3117ece4Schristos * The default return values from XXH functions are unsigned 32, 64 and 128 bit 181*3117ece4Schristos * integers. 182*3117ece4Schristos * This the simplest and fastest format for further post-processing. 183*3117ece4Schristos * 184*3117ece4Schristos * However, this leaves open the question of what is the order on the byte level, 185*3117ece4Schristos * since little and big endian conventions will store the same number differently. 186*3117ece4Schristos * 187*3117ece4Schristos * The canonical representation settles this issue by mandating big-endian 188*3117ece4Schristos * convention, the same convention as human-readable numbers (large digits first). 189*3117ece4Schristos * 190*3117ece4Schristos * When writing hash values to storage, sending them over a network, or printing 191*3117ece4Schristos * them, it's highly recommended to use the canonical representation to ensure 192*3117ece4Schristos * portability across a wider range of systems, present and future. 193*3117ece4Schristos * 194*3117ece4Schristos * The following functions allow transformation of hash values to and from 195*3117ece4Schristos * canonical format. 196*3117ece4Schristos * 197*3117ece4Schristos * XXH32_canonicalFromHash(), XXH32_hashFromCanonical(), 198*3117ece4Schristos * XXH64_canonicalFromHash(), XXH64_hashFromCanonical(), 199*3117ece4Schristos * XXH128_canonicalFromHash(), XXH128_hashFromCanonical(), 200*3117ece4Schristos * 201*3117ece4Schristos * @code{.c} 202*3117ece4Schristos * #include <stdio.h> 203*3117ece4Schristos * #include "xxhash.h" 204*3117ece4Schristos * 205*3117ece4Schristos * // Example for a function which prints XXH32_hash_t in human readable format 206*3117ece4Schristos * void printXxh32(XXH32_hash_t hash) 207*3117ece4Schristos * { 208*3117ece4Schristos * XXH32_canonical_t cano; 209*3117ece4Schristos * XXH32_canonicalFromHash(&cano, hash); 210*3117ece4Schristos * size_t i; 211*3117ece4Schristos * for(i = 0; i < sizeof(cano.digest); ++i) { 212*3117ece4Schristos * printf("%02x", cano.digest[i]); 213*3117ece4Schristos * } 214*3117ece4Schristos * printf("\n"); 215*3117ece4Schristos * } 216*3117ece4Schristos * 217*3117ece4Schristos * // Example for a function which converts XXH32_canonical_t to XXH32_hash_t 218*3117ece4Schristos * XXH32_hash_t convertCanonicalToXxh32(XXH32_canonical_t cano) 219*3117ece4Schristos * { 220*3117ece4Schristos * XXH32_hash_t hash = XXH32_hashFromCanonical(&cano); 221*3117ece4Schristos * return hash; 222*3117ece4Schristos * } 223*3117ece4Schristos * @endcode 224*3117ece4Schristos * 225*3117ece4Schristos * 226*3117ece4Schristos * @file xxhash.h 227*3117ece4Schristos * xxHash prototypes and implementation 228*3117ece4Schristos */ 229*3117ece4Schristos 230*3117ece4Schristos #if defined (__cplusplus) 231*3117ece4Schristos extern "C" { 232*3117ece4Schristos #endif 233*3117ece4Schristos 234*3117ece4Schristos /* **************************** 235*3117ece4Schristos * INLINE mode 236*3117ece4Schristos ******************************/ 237*3117ece4Schristos /*! 238*3117ece4Schristos * @defgroup public Public API 239*3117ece4Schristos * Contains details on the public xxHash functions. 240*3117ece4Schristos * @{ 241*3117ece4Schristos */ 242*3117ece4Schristos #ifdef XXH_DOXYGEN 243*3117ece4Schristos /*! 244*3117ece4Schristos * @brief Gives access to internal state declaration, required for static allocation. 245*3117ece4Schristos * 246*3117ece4Schristos * Incompatible with dynamic linking, due to risks of ABI changes. 247*3117ece4Schristos * 248*3117ece4Schristos * Usage: 249*3117ece4Schristos * @code{.c} 250*3117ece4Schristos * #define XXH_STATIC_LINKING_ONLY 251*3117ece4Schristos * #include "xxhash.h" 252*3117ece4Schristos * @endcode 253*3117ece4Schristos */ 254*3117ece4Schristos # define XXH_STATIC_LINKING_ONLY 255*3117ece4Schristos /* Do not undef XXH_STATIC_LINKING_ONLY for Doxygen */ 256*3117ece4Schristos 257*3117ece4Schristos /*! 258*3117ece4Schristos * @brief Gives access to internal definitions. 259*3117ece4Schristos * 260*3117ece4Schristos * Usage: 261*3117ece4Schristos * @code{.c} 262*3117ece4Schristos * #define XXH_STATIC_LINKING_ONLY 263*3117ece4Schristos * #define XXH_IMPLEMENTATION 264*3117ece4Schristos * #include "xxhash.h" 265*3117ece4Schristos * @endcode 266*3117ece4Schristos */ 267*3117ece4Schristos # define XXH_IMPLEMENTATION 268*3117ece4Schristos /* Do not undef XXH_IMPLEMENTATION for Doxygen */ 269*3117ece4Schristos 270*3117ece4Schristos /*! 271*3117ece4Schristos * @brief Exposes the implementation and marks all functions as `inline`. 272*3117ece4Schristos * 273*3117ece4Schristos * Use these build macros to inline xxhash into the target unit. 274*3117ece4Schristos * Inlining improves performance on small inputs, especially when the length is 275*3117ece4Schristos * expressed as a compile-time constant: 276*3117ece4Schristos * 277*3117ece4Schristos * https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html 278*3117ece4Schristos * 279*3117ece4Schristos * It also keeps xxHash symbols private to the unit, so they are not exported. 280*3117ece4Schristos * 281*3117ece4Schristos * Usage: 282*3117ece4Schristos * @code{.c} 283*3117ece4Schristos * #define XXH_INLINE_ALL 284*3117ece4Schristos * #include "xxhash.h" 285*3117ece4Schristos * @endcode 286*3117ece4Schristos * Do not compile and link xxhash.o as a separate object, as it is not useful. 287*3117ece4Schristos */ 288*3117ece4Schristos # define XXH_INLINE_ALL 289*3117ece4Schristos # undef XXH_INLINE_ALL 290*3117ece4Schristos /*! 291*3117ece4Schristos * @brief Exposes the implementation without marking functions as inline. 292*3117ece4Schristos */ 293*3117ece4Schristos # define XXH_PRIVATE_API 294*3117ece4Schristos # undef XXH_PRIVATE_API 295*3117ece4Schristos /*! 296*3117ece4Schristos * @brief Emulate a namespace by transparently prefixing all symbols. 297*3117ece4Schristos * 298*3117ece4Schristos * If you want to include _and expose_ xxHash functions from within your own 299*3117ece4Schristos * library, but also want to avoid symbol collisions with other libraries which 300*3117ece4Schristos * may also include xxHash, you can use @ref XXH_NAMESPACE to automatically prefix 301*3117ece4Schristos * any public symbol from xxhash library with the value of @ref XXH_NAMESPACE 302*3117ece4Schristos * (therefore, avoid empty or numeric values). 303*3117ece4Schristos * 304*3117ece4Schristos * Note that no change is required within the calling program as long as it 305*3117ece4Schristos * includes `xxhash.h`: Regular symbol names will be automatically translated 306*3117ece4Schristos * by this header. 307*3117ece4Schristos */ 308*3117ece4Schristos # define XXH_NAMESPACE /* YOUR NAME HERE */ 309*3117ece4Schristos # undef XXH_NAMESPACE 310*3117ece4Schristos #endif 311*3117ece4Schristos 312*3117ece4Schristos #if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \ 313*3117ece4Schristos && !defined(XXH_INLINE_ALL_31684351384) 314*3117ece4Schristos /* this section should be traversed only once */ 315*3117ece4Schristos # define XXH_INLINE_ALL_31684351384 316*3117ece4Schristos /* give access to the advanced API, required to compile implementations */ 317*3117ece4Schristos # undef XXH_STATIC_LINKING_ONLY /* avoid macro redef */ 318*3117ece4Schristos # define XXH_STATIC_LINKING_ONLY 319*3117ece4Schristos /* make all functions private */ 320*3117ece4Schristos # undef XXH_PUBLIC_API 321*3117ece4Schristos # if defined(__GNUC__) 322*3117ece4Schristos # define XXH_PUBLIC_API static __inline __attribute__((unused)) 323*3117ece4Schristos # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 324*3117ece4Schristos # define XXH_PUBLIC_API static inline 325*3117ece4Schristos # elif defined(_MSC_VER) 326*3117ece4Schristos # define XXH_PUBLIC_API static __inline 327*3117ece4Schristos # else 328*3117ece4Schristos /* note: this version may generate warnings for unused static functions */ 329*3117ece4Schristos # define XXH_PUBLIC_API static 330*3117ece4Schristos # endif 331*3117ece4Schristos 332*3117ece4Schristos /* 333*3117ece4Schristos * This part deals with the special case where a unit wants to inline xxHash, 334*3117ece4Schristos * but "xxhash.h" has previously been included without XXH_INLINE_ALL, 335*3117ece4Schristos * such as part of some previously included *.h header file. 336*3117ece4Schristos * Without further action, the new include would just be ignored, 337*3117ece4Schristos * and functions would effectively _not_ be inlined (silent failure). 338*3117ece4Schristos * The following macros solve this situation by prefixing all inlined names, 339*3117ece4Schristos * avoiding naming collision with previous inclusions. 340*3117ece4Schristos */ 341*3117ece4Schristos /* Before that, we unconditionally #undef all symbols, 342*3117ece4Schristos * in case they were already defined with XXH_NAMESPACE. 343*3117ece4Schristos * They will then be redefined for XXH_INLINE_ALL 344*3117ece4Schristos */ 345*3117ece4Schristos # undef XXH_versionNumber 346*3117ece4Schristos /* XXH32 */ 347*3117ece4Schristos # undef XXH32 348*3117ece4Schristos # undef XXH32_createState 349*3117ece4Schristos # undef XXH32_freeState 350*3117ece4Schristos # undef XXH32_reset 351*3117ece4Schristos # undef XXH32_update 352*3117ece4Schristos # undef XXH32_digest 353*3117ece4Schristos # undef XXH32_copyState 354*3117ece4Schristos # undef XXH32_canonicalFromHash 355*3117ece4Schristos # undef XXH32_hashFromCanonical 356*3117ece4Schristos /* XXH64 */ 357*3117ece4Schristos # undef XXH64 358*3117ece4Schristos # undef XXH64_createState 359*3117ece4Schristos # undef XXH64_freeState 360*3117ece4Schristos # undef XXH64_reset 361*3117ece4Schristos # undef XXH64_update 362*3117ece4Schristos # undef XXH64_digest 363*3117ece4Schristos # undef XXH64_copyState 364*3117ece4Schristos # undef XXH64_canonicalFromHash 365*3117ece4Schristos # undef XXH64_hashFromCanonical 366*3117ece4Schristos /* XXH3_64bits */ 367*3117ece4Schristos # undef XXH3_64bits 368*3117ece4Schristos # undef XXH3_64bits_withSecret 369*3117ece4Schristos # undef XXH3_64bits_withSeed 370*3117ece4Schristos # undef XXH3_64bits_withSecretandSeed 371*3117ece4Schristos # undef XXH3_createState 372*3117ece4Schristos # undef XXH3_freeState 373*3117ece4Schristos # undef XXH3_copyState 374*3117ece4Schristos # undef XXH3_64bits_reset 375*3117ece4Schristos # undef XXH3_64bits_reset_withSeed 376*3117ece4Schristos # undef XXH3_64bits_reset_withSecret 377*3117ece4Schristos # undef XXH3_64bits_update 378*3117ece4Schristos # undef XXH3_64bits_digest 379*3117ece4Schristos # undef XXH3_generateSecret 380*3117ece4Schristos /* XXH3_128bits */ 381*3117ece4Schristos # undef XXH128 382*3117ece4Schristos # undef XXH3_128bits 383*3117ece4Schristos # undef XXH3_128bits_withSeed 384*3117ece4Schristos # undef XXH3_128bits_withSecret 385*3117ece4Schristos # undef XXH3_128bits_reset 386*3117ece4Schristos # undef XXH3_128bits_reset_withSeed 387*3117ece4Schristos # undef XXH3_128bits_reset_withSecret 388*3117ece4Schristos # undef XXH3_128bits_reset_withSecretandSeed 389*3117ece4Schristos # undef XXH3_128bits_update 390*3117ece4Schristos # undef XXH3_128bits_digest 391*3117ece4Schristos # undef XXH128_isEqual 392*3117ece4Schristos # undef XXH128_cmp 393*3117ece4Schristos # undef XXH128_canonicalFromHash 394*3117ece4Schristos # undef XXH128_hashFromCanonical 395*3117ece4Schristos /* Finally, free the namespace itself */ 396*3117ece4Schristos # undef XXH_NAMESPACE 397*3117ece4Schristos 398*3117ece4Schristos /* employ the namespace for XXH_INLINE_ALL */ 399*3117ece4Schristos # define XXH_NAMESPACE XXH_INLINE_ 400*3117ece4Schristos /* 401*3117ece4Schristos * Some identifiers (enums, type names) are not symbols, 402*3117ece4Schristos * but they must nonetheless be renamed to avoid redeclaration. 403*3117ece4Schristos * Alternative solution: do not redeclare them. 404*3117ece4Schristos * However, this requires some #ifdefs, and has a more dispersed impact. 405*3117ece4Schristos * Meanwhile, renaming can be achieved in a single place. 406*3117ece4Schristos */ 407*3117ece4Schristos # define XXH_IPREF(Id) XXH_NAMESPACE ## Id 408*3117ece4Schristos # define XXH_OK XXH_IPREF(XXH_OK) 409*3117ece4Schristos # define XXH_ERROR XXH_IPREF(XXH_ERROR) 410*3117ece4Schristos # define XXH_errorcode XXH_IPREF(XXH_errorcode) 411*3117ece4Schristos # define XXH32_canonical_t XXH_IPREF(XXH32_canonical_t) 412*3117ece4Schristos # define XXH64_canonical_t XXH_IPREF(XXH64_canonical_t) 413*3117ece4Schristos # define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t) 414*3117ece4Schristos # define XXH32_state_s XXH_IPREF(XXH32_state_s) 415*3117ece4Schristos # define XXH32_state_t XXH_IPREF(XXH32_state_t) 416*3117ece4Schristos # define XXH64_state_s XXH_IPREF(XXH64_state_s) 417*3117ece4Schristos # define XXH64_state_t XXH_IPREF(XXH64_state_t) 418*3117ece4Schristos # define XXH3_state_s XXH_IPREF(XXH3_state_s) 419*3117ece4Schristos # define XXH3_state_t XXH_IPREF(XXH3_state_t) 420*3117ece4Schristos # define XXH128_hash_t XXH_IPREF(XXH128_hash_t) 421*3117ece4Schristos /* Ensure the header is parsed again, even if it was previously included */ 422*3117ece4Schristos # undef XXHASH_H_5627135585666179 423*3117ece4Schristos # undef XXHASH_H_STATIC_13879238742 424*3117ece4Schristos #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */ 425*3117ece4Schristos 426*3117ece4Schristos /* **************************************************************** 427*3117ece4Schristos * Stable API 428*3117ece4Schristos *****************************************************************/ 429*3117ece4Schristos #ifndef XXHASH_H_5627135585666179 430*3117ece4Schristos #define XXHASH_H_5627135585666179 1 431*3117ece4Schristos 432*3117ece4Schristos /*! @brief Marks a global symbol. */ 433*3117ece4Schristos #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) 434*3117ece4Schristos # if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) 435*3117ece4Schristos # ifdef XXH_EXPORT 436*3117ece4Schristos # define XXH_PUBLIC_API __declspec(dllexport) 437*3117ece4Schristos # elif XXH_IMPORT 438*3117ece4Schristos # define XXH_PUBLIC_API __declspec(dllimport) 439*3117ece4Schristos # endif 440*3117ece4Schristos # else 441*3117ece4Schristos # define XXH_PUBLIC_API /* do nothing */ 442*3117ece4Schristos # endif 443*3117ece4Schristos #endif 444*3117ece4Schristos 445*3117ece4Schristos #ifdef XXH_NAMESPACE 446*3117ece4Schristos # define XXH_CAT(A,B) A##B 447*3117ece4Schristos # define XXH_NAME2(A,B) XXH_CAT(A,B) 448*3117ece4Schristos # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) 449*3117ece4Schristos /* XXH32 */ 450*3117ece4Schristos # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) 451*3117ece4Schristos # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) 452*3117ece4Schristos # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) 453*3117ece4Schristos # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) 454*3117ece4Schristos # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) 455*3117ece4Schristos # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) 456*3117ece4Schristos # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) 457*3117ece4Schristos # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) 458*3117ece4Schristos # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) 459*3117ece4Schristos /* XXH64 */ 460*3117ece4Schristos # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) 461*3117ece4Schristos # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) 462*3117ece4Schristos # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) 463*3117ece4Schristos # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) 464*3117ece4Schristos # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) 465*3117ece4Schristos # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) 466*3117ece4Schristos # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) 467*3117ece4Schristos # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) 468*3117ece4Schristos # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) 469*3117ece4Schristos /* XXH3_64bits */ 470*3117ece4Schristos # define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits) 471*3117ece4Schristos # define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret) 472*3117ece4Schristos # define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed) 473*3117ece4Schristos # define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed) 474*3117ece4Schristos # define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState) 475*3117ece4Schristos # define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState) 476*3117ece4Schristos # define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState) 477*3117ece4Schristos # define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset) 478*3117ece4Schristos # define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed) 479*3117ece4Schristos # define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret) 480*3117ece4Schristos # define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed) 481*3117ece4Schristos # define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update) 482*3117ece4Schristos # define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest) 483*3117ece4Schristos # define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret) 484*3117ece4Schristos # define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed) 485*3117ece4Schristos /* XXH3_128bits */ 486*3117ece4Schristos # define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128) 487*3117ece4Schristos # define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits) 488*3117ece4Schristos # define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed) 489*3117ece4Schristos # define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret) 490*3117ece4Schristos # define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed) 491*3117ece4Schristos # define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset) 492*3117ece4Schristos # define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed) 493*3117ece4Schristos # define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret) 494*3117ece4Schristos # define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed) 495*3117ece4Schristos # define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update) 496*3117ece4Schristos # define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest) 497*3117ece4Schristos # define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual) 498*3117ece4Schristos # define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp) 499*3117ece4Schristos # define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash) 500*3117ece4Schristos # define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical) 501*3117ece4Schristos #endif 502*3117ece4Schristos 503*3117ece4Schristos 504*3117ece4Schristos /* ************************************* 505*3117ece4Schristos * Compiler specifics 506*3117ece4Schristos ***************************************/ 507*3117ece4Schristos 508*3117ece4Schristos /* specific declaration modes for Windows */ 509*3117ece4Schristos #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) 510*3117ece4Schristos # if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) 511*3117ece4Schristos # ifdef XXH_EXPORT 512*3117ece4Schristos # define XXH_PUBLIC_API __declspec(dllexport) 513*3117ece4Schristos # elif XXH_IMPORT 514*3117ece4Schristos # define XXH_PUBLIC_API __declspec(dllimport) 515*3117ece4Schristos # endif 516*3117ece4Schristos # else 517*3117ece4Schristos # define XXH_PUBLIC_API /* do nothing */ 518*3117ece4Schristos # endif 519*3117ece4Schristos #endif 520*3117ece4Schristos 521*3117ece4Schristos #if defined (__GNUC__) 522*3117ece4Schristos # define XXH_CONSTF __attribute__((const)) 523*3117ece4Schristos # define XXH_PUREF __attribute__((pure)) 524*3117ece4Schristos # define XXH_MALLOCF __attribute__((malloc)) 525*3117ece4Schristos #else 526*3117ece4Schristos # define XXH_CONSTF /* disable */ 527*3117ece4Schristos # define XXH_PUREF 528*3117ece4Schristos # define XXH_MALLOCF 529*3117ece4Schristos #endif 530*3117ece4Schristos 531*3117ece4Schristos /* ************************************* 532*3117ece4Schristos * Version 533*3117ece4Schristos ***************************************/ 534*3117ece4Schristos #define XXH_VERSION_MAJOR 0 535*3117ece4Schristos #define XXH_VERSION_MINOR 8 536*3117ece4Schristos #define XXH_VERSION_RELEASE 2 537*3117ece4Schristos /*! @brief Version number, encoded as two digits each */ 538*3117ece4Schristos #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) 539*3117ece4Schristos 540*3117ece4Schristos /*! 541*3117ece4Schristos * @brief Obtains the xxHash version. 542*3117ece4Schristos * 543*3117ece4Schristos * This is mostly useful when xxHash is compiled as a shared library, 544*3117ece4Schristos * since the returned value comes from the library, as opposed to header file. 545*3117ece4Schristos * 546*3117ece4Schristos * @return @ref XXH_VERSION_NUMBER of the invoked library. 547*3117ece4Schristos */ 548*3117ece4Schristos XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber (void); 549*3117ece4Schristos 550*3117ece4Schristos 551*3117ece4Schristos /* **************************** 552*3117ece4Schristos * Common basic types 553*3117ece4Schristos ******************************/ 554*3117ece4Schristos #include <stddef.h> /* size_t */ 555*3117ece4Schristos /*! 556*3117ece4Schristos * @brief Exit code for the streaming API. 557*3117ece4Schristos */ 558*3117ece4Schristos typedef enum { 559*3117ece4Schristos XXH_OK = 0, /*!< OK */ 560*3117ece4Schristos XXH_ERROR /*!< Error */ 561*3117ece4Schristos } XXH_errorcode; 562*3117ece4Schristos 563*3117ece4Schristos 564*3117ece4Schristos /*-********************************************************************** 565*3117ece4Schristos * 32-bit hash 566*3117ece4Schristos ************************************************************************/ 567*3117ece4Schristos #if defined(XXH_DOXYGEN) /* Don't show <stdint.h> include */ 568*3117ece4Schristos /*! 569*3117ece4Schristos * @brief An unsigned 32-bit integer. 570*3117ece4Schristos * 571*3117ece4Schristos * Not necessarily defined to `uint32_t` but functionally equivalent. 572*3117ece4Schristos */ 573*3117ece4Schristos typedef uint32_t XXH32_hash_t; 574*3117ece4Schristos 575*3117ece4Schristos #elif !defined (__VMS) \ 576*3117ece4Schristos && (defined (__cplusplus) \ 577*3117ece4Schristos || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) 578*3117ece4Schristos # ifdef _AIX 579*3117ece4Schristos # include <inttypes.h> 580*3117ece4Schristos # else 581*3117ece4Schristos # include <stdint.h> 582*3117ece4Schristos # endif 583*3117ece4Schristos typedef uint32_t XXH32_hash_t; 584*3117ece4Schristos 585*3117ece4Schristos #else 586*3117ece4Schristos # include <limits.h> 587*3117ece4Schristos # if UINT_MAX == 0xFFFFFFFFUL 588*3117ece4Schristos typedef unsigned int XXH32_hash_t; 589*3117ece4Schristos # elif ULONG_MAX == 0xFFFFFFFFUL 590*3117ece4Schristos typedef unsigned long XXH32_hash_t; 591*3117ece4Schristos # else 592*3117ece4Schristos # error "unsupported platform: need a 32-bit type" 593*3117ece4Schristos # endif 594*3117ece4Schristos #endif 595*3117ece4Schristos 596*3117ece4Schristos /*! 597*3117ece4Schristos * @} 598*3117ece4Schristos * 599*3117ece4Schristos * @defgroup XXH32_family XXH32 family 600*3117ece4Schristos * @ingroup public 601*3117ece4Schristos * Contains functions used in the classic 32-bit xxHash algorithm. 602*3117ece4Schristos * 603*3117ece4Schristos * @note 604*3117ece4Schristos * XXH32 is useful for older platforms, with no or poor 64-bit performance. 605*3117ece4Schristos * Note that the @ref XXH3_family provides competitive speed for both 32-bit 606*3117ece4Schristos * and 64-bit systems, and offers true 64/128 bit hash results. 607*3117ece4Schristos * 608*3117ece4Schristos * @see @ref XXH64_family, @ref XXH3_family : Other xxHash families 609*3117ece4Schristos * @see @ref XXH32_impl for implementation details 610*3117ece4Schristos * @{ 611*3117ece4Schristos */ 612*3117ece4Schristos 613*3117ece4Schristos /*! 614*3117ece4Schristos * @brief Calculates the 32-bit hash of @p input using xxHash32. 615*3117ece4Schristos * 616*3117ece4Schristos * @param input The block of data to be hashed, at least @p length bytes in size. 617*3117ece4Schristos * @param length The length of @p input, in bytes. 618*3117ece4Schristos * @param seed The 32-bit seed to alter the hash's output predictably. 619*3117ece4Schristos * 620*3117ece4Schristos * @pre 621*3117ece4Schristos * The memory between @p input and @p input + @p length must be valid, 622*3117ece4Schristos * readable, contiguous memory. However, if @p length is `0`, @p input may be 623*3117ece4Schristos * `NULL`. In C++, this also must be *TriviallyCopyable*. 624*3117ece4Schristos * 625*3117ece4Schristos * @return The calculated 32-bit xxHash32 value. 626*3117ece4Schristos * 627*3117ece4Schristos * @see @ref single_shot_example "Single Shot Example" for an example. 628*3117ece4Schristos */ 629*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed); 630*3117ece4Schristos 631*3117ece4Schristos #ifndef XXH_NO_STREAM 632*3117ece4Schristos /*! 633*3117ece4Schristos * @typedef struct XXH32_state_s XXH32_state_t 634*3117ece4Schristos * @brief The opaque state struct for the XXH32 streaming API. 635*3117ece4Schristos * 636*3117ece4Schristos * @see XXH32_state_s for details. 637*3117ece4Schristos */ 638*3117ece4Schristos typedef struct XXH32_state_s XXH32_state_t; 639*3117ece4Schristos 640*3117ece4Schristos /*! 641*3117ece4Schristos * @brief Allocates an @ref XXH32_state_t. 642*3117ece4Schristos * 643*3117ece4Schristos * @return An allocated pointer of @ref XXH32_state_t on success. 644*3117ece4Schristos * @return `NULL` on failure. 645*3117ece4Schristos * 646*3117ece4Schristos * @note Must be freed with XXH32_freeState(). 647*3117ece4Schristos */ 648*3117ece4Schristos XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void); 649*3117ece4Schristos /*! 650*3117ece4Schristos * @brief Frees an @ref XXH32_state_t. 651*3117ece4Schristos * 652*3117ece4Schristos * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState(). 653*3117ece4Schristos * 654*3117ece4Schristos * @return @ref XXH_OK. 655*3117ece4Schristos * 656*3117ece4Schristos * @note @p statePtr must be allocated with XXH32_createState(). 657*3117ece4Schristos * 658*3117ece4Schristos */ 659*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); 660*3117ece4Schristos /*! 661*3117ece4Schristos * @brief Copies one @ref XXH32_state_t to another. 662*3117ece4Schristos * 663*3117ece4Schristos * @param dst_state The state to copy to. 664*3117ece4Schristos * @param src_state The state to copy from. 665*3117ece4Schristos * @pre 666*3117ece4Schristos * @p dst_state and @p src_state must not be `NULL` and must not overlap. 667*3117ece4Schristos */ 668*3117ece4Schristos XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state); 669*3117ece4Schristos 670*3117ece4Schristos /*! 671*3117ece4Schristos * @brief Resets an @ref XXH32_state_t to begin a new hash. 672*3117ece4Schristos * 673*3117ece4Schristos * @param statePtr The state struct to reset. 674*3117ece4Schristos * @param seed The 32-bit seed to alter the hash result predictably. 675*3117ece4Schristos * 676*3117ece4Schristos * @pre 677*3117ece4Schristos * @p statePtr must not be `NULL`. 678*3117ece4Schristos * 679*3117ece4Schristos * @return @ref XXH_OK on success. 680*3117ece4Schristos * @return @ref XXH_ERROR on failure. 681*3117ece4Schristos * 682*3117ece4Schristos * @note This function resets and seeds a state. Call it before @ref XXH32_update(). 683*3117ece4Schristos */ 684*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed); 685*3117ece4Schristos 686*3117ece4Schristos /*! 687*3117ece4Schristos * @brief Consumes a block of @p input to an @ref XXH32_state_t. 688*3117ece4Schristos * 689*3117ece4Schristos * @param statePtr The state struct to update. 690*3117ece4Schristos * @param input The block of data to be hashed, at least @p length bytes in size. 691*3117ece4Schristos * @param length The length of @p input, in bytes. 692*3117ece4Schristos * 693*3117ece4Schristos * @pre 694*3117ece4Schristos * @p statePtr must not be `NULL`. 695*3117ece4Schristos * @pre 696*3117ece4Schristos * The memory between @p input and @p input + @p length must be valid, 697*3117ece4Schristos * readable, contiguous memory. However, if @p length is `0`, @p input may be 698*3117ece4Schristos * `NULL`. In C++, this also must be *TriviallyCopyable*. 699*3117ece4Schristos * 700*3117ece4Schristos * @return @ref XXH_OK on success. 701*3117ece4Schristos * @return @ref XXH_ERROR on failure. 702*3117ece4Schristos * 703*3117ece4Schristos * @note Call this to incrementally consume blocks of data. 704*3117ece4Schristos */ 705*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); 706*3117ece4Schristos 707*3117ece4Schristos /*! 708*3117ece4Schristos * @brief Returns the calculated hash value from an @ref XXH32_state_t. 709*3117ece4Schristos * 710*3117ece4Schristos * @param statePtr The state struct to calculate the hash from. 711*3117ece4Schristos * 712*3117ece4Schristos * @pre 713*3117ece4Schristos * @p statePtr must not be `NULL`. 714*3117ece4Schristos * 715*3117ece4Schristos * @return The calculated 32-bit xxHash32 value from that state. 716*3117ece4Schristos * 717*3117ece4Schristos * @note 718*3117ece4Schristos * Calling XXH32_digest() will not affect @p statePtr, so you can update, 719*3117ece4Schristos * digest, and update again. 720*3117ece4Schristos */ 721*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); 722*3117ece4Schristos #endif /* !XXH_NO_STREAM */ 723*3117ece4Schristos 724*3117ece4Schristos /******* Canonical representation *******/ 725*3117ece4Schristos 726*3117ece4Schristos /*! 727*3117ece4Schristos * @brief Canonical (big endian) representation of @ref XXH32_hash_t. 728*3117ece4Schristos */ 729*3117ece4Schristos typedef struct { 730*3117ece4Schristos unsigned char digest[4]; /*!< Hash bytes, big endian */ 731*3117ece4Schristos } XXH32_canonical_t; 732*3117ece4Schristos 733*3117ece4Schristos /*! 734*3117ece4Schristos * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t. 735*3117ece4Schristos * 736*3117ece4Schristos * @param dst The @ref XXH32_canonical_t pointer to be stored to. 737*3117ece4Schristos * @param hash The @ref XXH32_hash_t to be converted. 738*3117ece4Schristos * 739*3117ece4Schristos * @pre 740*3117ece4Schristos * @p dst must not be `NULL`. 741*3117ece4Schristos * 742*3117ece4Schristos * @see @ref canonical_representation_example "Canonical Representation Example" 743*3117ece4Schristos */ 744*3117ece4Schristos XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); 745*3117ece4Schristos 746*3117ece4Schristos /*! 747*3117ece4Schristos * @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t. 748*3117ece4Schristos * 749*3117ece4Schristos * @param src The @ref XXH32_canonical_t to convert. 750*3117ece4Schristos * 751*3117ece4Schristos * @pre 752*3117ece4Schristos * @p src must not be `NULL`. 753*3117ece4Schristos * 754*3117ece4Schristos * @return The converted hash. 755*3117ece4Schristos * 756*3117ece4Schristos * @see @ref canonical_representation_example "Canonical Representation Example" 757*3117ece4Schristos */ 758*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); 759*3117ece4Schristos 760*3117ece4Schristos 761*3117ece4Schristos /*! @cond Doxygen ignores this part */ 762*3117ece4Schristos #ifdef __has_attribute 763*3117ece4Schristos # define XXH_HAS_ATTRIBUTE(x) __has_attribute(x) 764*3117ece4Schristos #else 765*3117ece4Schristos # define XXH_HAS_ATTRIBUTE(x) 0 766*3117ece4Schristos #endif 767*3117ece4Schristos /*! @endcond */ 768*3117ece4Schristos 769*3117ece4Schristos /*! @cond Doxygen ignores this part */ 770*3117ece4Schristos /* 771*3117ece4Schristos * C23 __STDC_VERSION__ number hasn't been specified yet. For now 772*3117ece4Schristos * leave as `201711L` (C17 + 1). 773*3117ece4Schristos * TODO: Update to correct value when its been specified. 774*3117ece4Schristos */ 775*3117ece4Schristos #define XXH_C23_VN 201711L 776*3117ece4Schristos /*! @endcond */ 777*3117ece4Schristos 778*3117ece4Schristos /*! @cond Doxygen ignores this part */ 779*3117ece4Schristos /* C-language Attributes are added in C23. */ 780*3117ece4Schristos #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute) 781*3117ece4Schristos # define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x) 782*3117ece4Schristos #else 783*3117ece4Schristos # define XXH_HAS_C_ATTRIBUTE(x) 0 784*3117ece4Schristos #endif 785*3117ece4Schristos /*! @endcond */ 786*3117ece4Schristos 787*3117ece4Schristos /*! @cond Doxygen ignores this part */ 788*3117ece4Schristos #if defined(__cplusplus) && defined(__has_cpp_attribute) 789*3117ece4Schristos # define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) 790*3117ece4Schristos #else 791*3117ece4Schristos # define XXH_HAS_CPP_ATTRIBUTE(x) 0 792*3117ece4Schristos #endif 793*3117ece4Schristos /*! @endcond */ 794*3117ece4Schristos 795*3117ece4Schristos /*! @cond Doxygen ignores this part */ 796*3117ece4Schristos /* 797*3117ece4Schristos * Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute 798*3117ece4Schristos * introduced in CPP17 and C23. 799*3117ece4Schristos * CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough 800*3117ece4Schristos * C23 : https://en.cppreference.com/w/c/language/attributes/fallthrough 801*3117ece4Schristos */ 802*3117ece4Schristos #if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough) 803*3117ece4Schristos # define XXH_FALLTHROUGH [[fallthrough]] 804*3117ece4Schristos #elif XXH_HAS_ATTRIBUTE(__fallthrough__) 805*3117ece4Schristos # define XXH_FALLTHROUGH __attribute__ ((__fallthrough__)) 806*3117ece4Schristos #else 807*3117ece4Schristos # define XXH_FALLTHROUGH /* fallthrough */ 808*3117ece4Schristos #endif 809*3117ece4Schristos /*! @endcond */ 810*3117ece4Schristos 811*3117ece4Schristos /*! @cond Doxygen ignores this part */ 812*3117ece4Schristos /* 813*3117ece4Schristos * Define XXH_NOESCAPE for annotated pointers in public API. 814*3117ece4Schristos * https://clang.llvm.org/docs/AttributeReference.html#noescape 815*3117ece4Schristos * As of writing this, only supported by clang. 816*3117ece4Schristos */ 817*3117ece4Schristos #if XXH_HAS_ATTRIBUTE(noescape) 818*3117ece4Schristos # define XXH_NOESCAPE __attribute__((noescape)) 819*3117ece4Schristos #else 820*3117ece4Schristos # define XXH_NOESCAPE 821*3117ece4Schristos #endif 822*3117ece4Schristos /*! @endcond */ 823*3117ece4Schristos 824*3117ece4Schristos 825*3117ece4Schristos /*! 826*3117ece4Schristos * @} 827*3117ece4Schristos * @ingroup public 828*3117ece4Schristos * @{ 829*3117ece4Schristos */ 830*3117ece4Schristos 831*3117ece4Schristos #ifndef XXH_NO_LONG_LONG 832*3117ece4Schristos /*-********************************************************************** 833*3117ece4Schristos * 64-bit hash 834*3117ece4Schristos ************************************************************************/ 835*3117ece4Schristos #if defined(XXH_DOXYGEN) /* don't include <stdint.h> */ 836*3117ece4Schristos /*! 837*3117ece4Schristos * @brief An unsigned 64-bit integer. 838*3117ece4Schristos * 839*3117ece4Schristos * Not necessarily defined to `uint64_t` but functionally equivalent. 840*3117ece4Schristos */ 841*3117ece4Schristos typedef uint64_t XXH64_hash_t; 842*3117ece4Schristos #elif !defined (__VMS) \ 843*3117ece4Schristos && (defined (__cplusplus) \ 844*3117ece4Schristos || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) 845*3117ece4Schristos # ifdef _AIX 846*3117ece4Schristos # include <inttypes.h> 847*3117ece4Schristos # else 848*3117ece4Schristos # include <stdint.h> 849*3117ece4Schristos # endif 850*3117ece4Schristos typedef uint64_t XXH64_hash_t; 851*3117ece4Schristos #else 852*3117ece4Schristos # include <limits.h> 853*3117ece4Schristos # if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL 854*3117ece4Schristos /* LP64 ABI says uint64_t is unsigned long */ 855*3117ece4Schristos typedef unsigned long XXH64_hash_t; 856*3117ece4Schristos # else 857*3117ece4Schristos /* the following type must have a width of 64-bit */ 858*3117ece4Schristos typedef unsigned long long XXH64_hash_t; 859*3117ece4Schristos # endif 860*3117ece4Schristos #endif 861*3117ece4Schristos 862*3117ece4Schristos /*! 863*3117ece4Schristos * @} 864*3117ece4Schristos * 865*3117ece4Schristos * @defgroup XXH64_family XXH64 family 866*3117ece4Schristos * @ingroup public 867*3117ece4Schristos * @{ 868*3117ece4Schristos * Contains functions used in the classic 64-bit xxHash algorithm. 869*3117ece4Schristos * 870*3117ece4Schristos * @note 871*3117ece4Schristos * XXH3 provides competitive speed for both 32-bit and 64-bit systems, 872*3117ece4Schristos * and offers true 64/128 bit hash results. 873*3117ece4Schristos * It provides better speed for systems with vector processing capabilities. 874*3117ece4Schristos */ 875*3117ece4Schristos 876*3117ece4Schristos /*! 877*3117ece4Schristos * @brief Calculates the 64-bit hash of @p input using xxHash64. 878*3117ece4Schristos * 879*3117ece4Schristos * @param input The block of data to be hashed, at least @p length bytes in size. 880*3117ece4Schristos * @param length The length of @p input, in bytes. 881*3117ece4Schristos * @param seed The 64-bit seed to alter the hash's output predictably. 882*3117ece4Schristos * 883*3117ece4Schristos * @pre 884*3117ece4Schristos * The memory between @p input and @p input + @p length must be valid, 885*3117ece4Schristos * readable, contiguous memory. However, if @p length is `0`, @p input may be 886*3117ece4Schristos * `NULL`. In C++, this also must be *TriviallyCopyable*. 887*3117ece4Schristos * 888*3117ece4Schristos * @return The calculated 64-bit xxHash64 value. 889*3117ece4Schristos * 890*3117ece4Schristos * @see @ref single_shot_example "Single Shot Example" for an example. 891*3117ece4Schristos */ 892*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed); 893*3117ece4Schristos 894*3117ece4Schristos /******* Streaming *******/ 895*3117ece4Schristos #ifndef XXH_NO_STREAM 896*3117ece4Schristos /*! 897*3117ece4Schristos * @brief The opaque state struct for the XXH64 streaming API. 898*3117ece4Schristos * 899*3117ece4Schristos * @see XXH64_state_s for details. 900*3117ece4Schristos */ 901*3117ece4Schristos typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ 902*3117ece4Schristos 903*3117ece4Schristos /*! 904*3117ece4Schristos * @brief Allocates an @ref XXH64_state_t. 905*3117ece4Schristos * 906*3117ece4Schristos * @return An allocated pointer of @ref XXH64_state_t on success. 907*3117ece4Schristos * @return `NULL` on failure. 908*3117ece4Schristos * 909*3117ece4Schristos * @note Must be freed with XXH64_freeState(). 910*3117ece4Schristos */ 911*3117ece4Schristos XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void); 912*3117ece4Schristos 913*3117ece4Schristos /*! 914*3117ece4Schristos * @brief Frees an @ref XXH64_state_t. 915*3117ece4Schristos * 916*3117ece4Schristos * @param statePtr A pointer to an @ref XXH64_state_t allocated with @ref XXH64_createState(). 917*3117ece4Schristos * 918*3117ece4Schristos * @return @ref XXH_OK. 919*3117ece4Schristos * 920*3117ece4Schristos * @note @p statePtr must be allocated with XXH64_createState(). 921*3117ece4Schristos */ 922*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); 923*3117ece4Schristos 924*3117ece4Schristos /*! 925*3117ece4Schristos * @brief Copies one @ref XXH64_state_t to another. 926*3117ece4Schristos * 927*3117ece4Schristos * @param dst_state The state to copy to. 928*3117ece4Schristos * @param src_state The state to copy from. 929*3117ece4Schristos * @pre 930*3117ece4Schristos * @p dst_state and @p src_state must not be `NULL` and must not overlap. 931*3117ece4Schristos */ 932*3117ece4Schristos XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const XXH64_state_t* src_state); 933*3117ece4Schristos 934*3117ece4Schristos /*! 935*3117ece4Schristos * @brief Resets an @ref XXH64_state_t to begin a new hash. 936*3117ece4Schristos * 937*3117ece4Schristos * @param statePtr The state struct to reset. 938*3117ece4Schristos * @param seed The 64-bit seed to alter the hash result predictably. 939*3117ece4Schristos * 940*3117ece4Schristos * @pre 941*3117ece4Schristos * @p statePtr must not be `NULL`. 942*3117ece4Schristos * 943*3117ece4Schristos * @return @ref XXH_OK on success. 944*3117ece4Schristos * @return @ref XXH_ERROR on failure. 945*3117ece4Schristos * 946*3117ece4Schristos * @note This function resets and seeds a state. Call it before @ref XXH64_update(). 947*3117ece4Schristos */ 948*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed); 949*3117ece4Schristos 950*3117ece4Schristos /*! 951*3117ece4Schristos * @brief Consumes a block of @p input to an @ref XXH64_state_t. 952*3117ece4Schristos * 953*3117ece4Schristos * @param statePtr The state struct to update. 954*3117ece4Schristos * @param input The block of data to be hashed, at least @p length bytes in size. 955*3117ece4Schristos * @param length The length of @p input, in bytes. 956*3117ece4Schristos * 957*3117ece4Schristos * @pre 958*3117ece4Schristos * @p statePtr must not be `NULL`. 959*3117ece4Schristos * @pre 960*3117ece4Schristos * The memory between @p input and @p input + @p length must be valid, 961*3117ece4Schristos * readable, contiguous memory. However, if @p length is `0`, @p input may be 962*3117ece4Schristos * `NULL`. In C++, this also must be *TriviallyCopyable*. 963*3117ece4Schristos * 964*3117ece4Schristos * @return @ref XXH_OK on success. 965*3117ece4Schristos * @return @ref XXH_ERROR on failure. 966*3117ece4Schristos * 967*3117ece4Schristos * @note Call this to incrementally consume blocks of data. 968*3117ece4Schristos */ 969*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length); 970*3117ece4Schristos 971*3117ece4Schristos /*! 972*3117ece4Schristos * @brief Returns the calculated hash value from an @ref XXH64_state_t. 973*3117ece4Schristos * 974*3117ece4Schristos * @param statePtr The state struct to calculate the hash from. 975*3117ece4Schristos * 976*3117ece4Schristos * @pre 977*3117ece4Schristos * @p statePtr must not be `NULL`. 978*3117ece4Schristos * 979*3117ece4Schristos * @return The calculated 64-bit xxHash64 value from that state. 980*3117ece4Schristos * 981*3117ece4Schristos * @note 982*3117ece4Schristos * Calling XXH64_digest() will not affect @p statePtr, so you can update, 983*3117ece4Schristos * digest, and update again. 984*3117ece4Schristos */ 985*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (XXH_NOESCAPE const XXH64_state_t* statePtr); 986*3117ece4Schristos #endif /* !XXH_NO_STREAM */ 987*3117ece4Schristos /******* Canonical representation *******/ 988*3117ece4Schristos 989*3117ece4Schristos /*! 990*3117ece4Schristos * @brief Canonical (big endian) representation of @ref XXH64_hash_t. 991*3117ece4Schristos */ 992*3117ece4Schristos typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t; 993*3117ece4Schristos 994*3117ece4Schristos /*! 995*3117ece4Schristos * @brief Converts an @ref XXH64_hash_t to a big endian @ref XXH64_canonical_t. 996*3117ece4Schristos * 997*3117ece4Schristos * @param dst The @ref XXH64_canonical_t pointer to be stored to. 998*3117ece4Schristos * @param hash The @ref XXH64_hash_t to be converted. 999*3117ece4Schristos * 1000*3117ece4Schristos * @pre 1001*3117ece4Schristos * @p dst must not be `NULL`. 1002*3117ece4Schristos * 1003*3117ece4Schristos * @see @ref canonical_representation_example "Canonical Representation Example" 1004*3117ece4Schristos */ 1005*3117ece4Schristos XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash); 1006*3117ece4Schristos 1007*3117ece4Schristos /*! 1008*3117ece4Schristos * @brief Converts an @ref XXH64_canonical_t to a native @ref XXH64_hash_t. 1009*3117ece4Schristos * 1010*3117ece4Schristos * @param src The @ref XXH64_canonical_t to convert. 1011*3117ece4Schristos * 1012*3117ece4Schristos * @pre 1013*3117ece4Schristos * @p src must not be `NULL`. 1014*3117ece4Schristos * 1015*3117ece4Schristos * @return The converted hash. 1016*3117ece4Schristos * 1017*3117ece4Schristos * @see @ref canonical_representation_example "Canonical Representation Example" 1018*3117ece4Schristos */ 1019*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src); 1020*3117ece4Schristos 1021*3117ece4Schristos #ifndef XXH_NO_XXH3 1022*3117ece4Schristos 1023*3117ece4Schristos /*! 1024*3117ece4Schristos * @} 1025*3117ece4Schristos * ************************************************************************ 1026*3117ece4Schristos * @defgroup XXH3_family XXH3 family 1027*3117ece4Schristos * @ingroup public 1028*3117ece4Schristos * @{ 1029*3117ece4Schristos * 1030*3117ece4Schristos * XXH3 is a more recent hash algorithm featuring: 1031*3117ece4Schristos * - Improved speed for both small and large inputs 1032*3117ece4Schristos * - True 64-bit and 128-bit outputs 1033*3117ece4Schristos * - SIMD acceleration 1034*3117ece4Schristos * - Improved 32-bit viability 1035*3117ece4Schristos * 1036*3117ece4Schristos * Speed analysis methodology is explained here: 1037*3117ece4Schristos * 1038*3117ece4Schristos * https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html 1039*3117ece4Schristos * 1040*3117ece4Schristos * Compared to XXH64, expect XXH3 to run approximately 1041*3117ece4Schristos * ~2x faster on large inputs and >3x faster on small ones, 1042*3117ece4Schristos * exact differences vary depending on platform. 1043*3117ece4Schristos * 1044*3117ece4Schristos * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic, 1045*3117ece4Schristos * but does not require it. 1046*3117ece4Schristos * Most 32-bit and 64-bit targets that can run XXH32 smoothly can run XXH3 1047*3117ece4Schristos * at competitive speeds, even without vector support. Further details are 1048*3117ece4Schristos * explained in the implementation. 1049*3117ece4Schristos * 1050*3117ece4Schristos * XXH3 has a fast scalar implementation, but it also includes accelerated SIMD 1051*3117ece4Schristos * implementations for many common platforms: 1052*3117ece4Schristos * - AVX512 1053*3117ece4Schristos * - AVX2 1054*3117ece4Schristos * - SSE2 1055*3117ece4Schristos * - ARM NEON 1056*3117ece4Schristos * - WebAssembly SIMD128 1057*3117ece4Schristos * - POWER8 VSX 1058*3117ece4Schristos * - s390x ZVector 1059*3117ece4Schristos * This can be controlled via the @ref XXH_VECTOR macro, but it automatically 1060*3117ece4Schristos * selects the best version according to predefined macros. For the x86 family, an 1061*3117ece4Schristos * automatic runtime dispatcher is included separately in @ref xxh_x86dispatch.c. 1062*3117ece4Schristos * 1063*3117ece4Schristos * XXH3 implementation is portable: 1064*3117ece4Schristos * it has a generic C90 formulation that can be compiled on any platform, 1065*3117ece4Schristos * all implementations generate exactly the same hash value on all platforms. 1066*3117ece4Schristos * Starting from v0.8.0, it's also labelled "stable", meaning that 1067*3117ece4Schristos * any future version will also generate the same hash value. 1068*3117ece4Schristos * 1069*3117ece4Schristos * XXH3 offers 2 variants, _64bits and _128bits. 1070*3117ece4Schristos * 1071*3117ece4Schristos * When only 64 bits are needed, prefer invoking the _64bits variant, as it 1072*3117ece4Schristos * reduces the amount of mixing, resulting in faster speed on small inputs. 1073*3117ece4Schristos * It's also generally simpler to manipulate a scalar return type than a struct. 1074*3117ece4Schristos * 1075*3117ece4Schristos * The API supports one-shot hashing, streaming mode, and custom secrets. 1076*3117ece4Schristos */ 1077*3117ece4Schristos /*-********************************************************************** 1078*3117ece4Schristos * XXH3 64-bit variant 1079*3117ece4Schristos ************************************************************************/ 1080*3117ece4Schristos 1081*3117ece4Schristos /*! 1082*3117ece4Schristos * @brief Calculates 64-bit unseeded variant of XXH3 hash of @p input. 1083*3117ece4Schristos * 1084*3117ece4Schristos * @param input The block of data to be hashed, at least @p length bytes in size. 1085*3117ece4Schristos * @param length The length of @p input, in bytes. 1086*3117ece4Schristos * 1087*3117ece4Schristos * @pre 1088*3117ece4Schristos * The memory between @p input and @p input + @p length must be valid, 1089*3117ece4Schristos * readable, contiguous memory. However, if @p length is `0`, @p input may be 1090*3117ece4Schristos * `NULL`. In C++, this also must be *TriviallyCopyable*. 1091*3117ece4Schristos * 1092*3117ece4Schristos * @return The calculated 64-bit XXH3 hash value. 1093*3117ece4Schristos * 1094*3117ece4Schristos * @note 1095*3117ece4Schristos * This is equivalent to @ref XXH3_64bits_withSeed() with a seed of `0`, however 1096*3117ece4Schristos * it may have slightly better performance due to constant propagation of the 1097*3117ece4Schristos * defaults. 1098*3117ece4Schristos * 1099*3117ece4Schristos * @see 1100*3117ece4Schristos * XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants 1101*3117ece4Schristos * @see @ref single_shot_example "Single Shot Example" for an example. 1102*3117ece4Schristos */ 1103*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length); 1104*3117ece4Schristos 1105*3117ece4Schristos /*! 1106*3117ece4Schristos * @brief Calculates 64-bit seeded variant of XXH3 hash of @p input. 1107*3117ece4Schristos * 1108*3117ece4Schristos * @param input The block of data to be hashed, at least @p length bytes in size. 1109*3117ece4Schristos * @param length The length of @p input, in bytes. 1110*3117ece4Schristos * @param seed The 64-bit seed to alter the hash result predictably. 1111*3117ece4Schristos * 1112*3117ece4Schristos * @pre 1113*3117ece4Schristos * The memory between @p input and @p input + @p length must be valid, 1114*3117ece4Schristos * readable, contiguous memory. However, if @p length is `0`, @p input may be 1115*3117ece4Schristos * `NULL`. In C++, this also must be *TriviallyCopyable*. 1116*3117ece4Schristos * 1117*3117ece4Schristos * @return The calculated 64-bit XXH3 hash value. 1118*3117ece4Schristos * 1119*3117ece4Schristos * @note 1120*3117ece4Schristos * seed == 0 produces the same results as @ref XXH3_64bits(). 1121*3117ece4Schristos * 1122*3117ece4Schristos * This variant generates a custom secret on the fly based on default secret 1123*3117ece4Schristos * altered using the @p seed value. 1124*3117ece4Schristos * 1125*3117ece4Schristos * While this operation is decently fast, note that it's not completely free. 1126*3117ece4Schristos * 1127*3117ece4Schristos * @see @ref single_shot_example "Single Shot Example" for an example. 1128*3117ece4Schristos */ 1129*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed); 1130*3117ece4Schristos 1131*3117ece4Schristos /*! 1132*3117ece4Schristos * The bare minimum size for a custom secret. 1133*3117ece4Schristos * 1134*3117ece4Schristos * @see 1135*3117ece4Schristos * XXH3_64bits_withSecret(), XXH3_64bits_reset_withSecret(), 1136*3117ece4Schristos * XXH3_128bits_withSecret(), XXH3_128bits_reset_withSecret(). 1137*3117ece4Schristos */ 1138*3117ece4Schristos #define XXH3_SECRET_SIZE_MIN 136 1139*3117ece4Schristos 1140*3117ece4Schristos /*! 1141*3117ece4Schristos * @brief Calculates 64-bit variant of XXH3 with a custom "secret". 1142*3117ece4Schristos * 1143*3117ece4Schristos * @param data The block of data to be hashed, at least @p len bytes in size. 1144*3117ece4Schristos * @param len The length of @p data, in bytes. 1145*3117ece4Schristos * @param secret The secret data. 1146*3117ece4Schristos * @param secretSize The length of @p secret, in bytes. 1147*3117ece4Schristos * 1148*3117ece4Schristos * @return The calculated 64-bit XXH3 hash value. 1149*3117ece4Schristos * 1150*3117ece4Schristos * @pre 1151*3117ece4Schristos * The memory between @p data and @p data + @p len must be valid, 1152*3117ece4Schristos * readable, contiguous memory. However, if @p length is `0`, @p data may be 1153*3117ece4Schristos * `NULL`. In C++, this also must be *TriviallyCopyable*. 1154*3117ece4Schristos * 1155*3117ece4Schristos * It's possible to provide any blob of bytes as a "secret" to generate the hash. 1156*3117ece4Schristos * This makes it more difficult for an external actor to prepare an intentional collision. 1157*3117ece4Schristos * The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN). 1158*3117ece4Schristos * However, the quality of the secret impacts the dispersion of the hash algorithm. 1159*3117ece4Schristos * Therefore, the secret _must_ look like a bunch of random bytes. 1160*3117ece4Schristos * Avoid "trivial" or structured data such as repeated sequences or a text document. 1161*3117ece4Schristos * Whenever in doubt about the "randomness" of the blob of bytes, 1162*3117ece4Schristos * consider employing @ref XXH3_generateSecret() instead (see below). 1163*3117ece4Schristos * It will generate a proper high entropy secret derived from the blob of bytes. 1164*3117ece4Schristos * Another advantage of using XXH3_generateSecret() is that 1165*3117ece4Schristos * it guarantees that all bits within the initial blob of bytes 1166*3117ece4Schristos * will impact every bit of the output. 1167*3117ece4Schristos * This is not necessarily the case when using the blob of bytes directly 1168*3117ece4Schristos * because, when hashing _small_ inputs, only a portion of the secret is employed. 1169*3117ece4Schristos * 1170*3117ece4Schristos * @see @ref single_shot_example "Single Shot Example" for an example. 1171*3117ece4Schristos */ 1172*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize); 1173*3117ece4Schristos 1174*3117ece4Schristos 1175*3117ece4Schristos /******* Streaming *******/ 1176*3117ece4Schristos #ifndef XXH_NO_STREAM 1177*3117ece4Schristos /* 1178*3117ece4Schristos * Streaming requires state maintenance. 1179*3117ece4Schristos * This operation costs memory and CPU. 1180*3117ece4Schristos * As a consequence, streaming is slower than one-shot hashing. 1181*3117ece4Schristos * For better performance, prefer one-shot functions whenever applicable. 1182*3117ece4Schristos */ 1183*3117ece4Schristos 1184*3117ece4Schristos /*! 1185*3117ece4Schristos * @brief The opaque state struct for the XXH3 streaming API. 1186*3117ece4Schristos * 1187*3117ece4Schristos * @see XXH3_state_s for details. 1188*3117ece4Schristos */ 1189*3117ece4Schristos typedef struct XXH3_state_s XXH3_state_t; 1190*3117ece4Schristos XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void); 1191*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr); 1192*3117ece4Schristos 1193*3117ece4Schristos /*! 1194*3117ece4Schristos * @brief Copies one @ref XXH3_state_t to another. 1195*3117ece4Schristos * 1196*3117ece4Schristos * @param dst_state The state to copy to. 1197*3117ece4Schristos * @param src_state The state to copy from. 1198*3117ece4Schristos * @pre 1199*3117ece4Schristos * @p dst_state and @p src_state must not be `NULL` and must not overlap. 1200*3117ece4Schristos */ 1201*3117ece4Schristos XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state); 1202*3117ece4Schristos 1203*3117ece4Schristos /*! 1204*3117ece4Schristos * @brief Resets an @ref XXH3_state_t to begin a new hash. 1205*3117ece4Schristos * 1206*3117ece4Schristos * @param statePtr The state struct to reset. 1207*3117ece4Schristos * 1208*3117ece4Schristos * @pre 1209*3117ece4Schristos * @p statePtr must not be `NULL`. 1210*3117ece4Schristos * 1211*3117ece4Schristos * @return @ref XXH_OK on success. 1212*3117ece4Schristos * @return @ref XXH_ERROR on failure. 1213*3117ece4Schristos * 1214*3117ece4Schristos * @note 1215*3117ece4Schristos * - This function resets `statePtr` and generate a secret with default parameters. 1216*3117ece4Schristos * - Call this function before @ref XXH3_64bits_update(). 1217*3117ece4Schristos * - Digest will be equivalent to `XXH3_64bits()`. 1218*3117ece4Schristos * 1219*3117ece4Schristos */ 1220*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr); 1221*3117ece4Schristos 1222*3117ece4Schristos /*! 1223*3117ece4Schristos * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash. 1224*3117ece4Schristos * 1225*3117ece4Schristos * @param statePtr The state struct to reset. 1226*3117ece4Schristos * @param seed The 64-bit seed to alter the hash result predictably. 1227*3117ece4Schristos * 1228*3117ece4Schristos * @pre 1229*3117ece4Schristos * @p statePtr must not be `NULL`. 1230*3117ece4Schristos * 1231*3117ece4Schristos * @return @ref XXH_OK on success. 1232*3117ece4Schristos * @return @ref XXH_ERROR on failure. 1233*3117ece4Schristos * 1234*3117ece4Schristos * @note 1235*3117ece4Schristos * - This function resets `statePtr` and generate a secret from `seed`. 1236*3117ece4Schristos * - Call this function before @ref XXH3_64bits_update(). 1237*3117ece4Schristos * - Digest will be equivalent to `XXH3_64bits_withSeed()`. 1238*3117ece4Schristos * 1239*3117ece4Schristos */ 1240*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed); 1241*3117ece4Schristos 1242*3117ece4Schristos /*! 1243*3117ece4Schristos * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash. 1244*3117ece4Schristos * 1245*3117ece4Schristos * @param statePtr The state struct to reset. 1246*3117ece4Schristos * @param secret The secret data. 1247*3117ece4Schristos * @param secretSize The length of @p secret, in bytes. 1248*3117ece4Schristos * 1249*3117ece4Schristos * @pre 1250*3117ece4Schristos * @p statePtr must not be `NULL`. 1251*3117ece4Schristos * 1252*3117ece4Schristos * @return @ref XXH_OK on success. 1253*3117ece4Schristos * @return @ref XXH_ERROR on failure. 1254*3117ece4Schristos * 1255*3117ece4Schristos * @note 1256*3117ece4Schristos * `secret` is referenced, it _must outlive_ the hash streaming session. 1257*3117ece4Schristos * 1258*3117ece4Schristos * Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN, 1259*3117ece4Schristos * and the quality of produced hash values depends on secret's entropy 1260*3117ece4Schristos * (secret's content should look like a bunch of random bytes). 1261*3117ece4Schristos * When in doubt about the randomness of a candidate `secret`, 1262*3117ece4Schristos * consider employing `XXH3_generateSecret()` instead (see below). 1263*3117ece4Schristos */ 1264*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize); 1265*3117ece4Schristos 1266*3117ece4Schristos /*! 1267*3117ece4Schristos * @brief Consumes a block of @p input to an @ref XXH3_state_t. 1268*3117ece4Schristos * 1269*3117ece4Schristos * @param statePtr The state struct to update. 1270*3117ece4Schristos * @param input The block of data to be hashed, at least @p length bytes in size. 1271*3117ece4Schristos * @param length The length of @p input, in bytes. 1272*3117ece4Schristos * 1273*3117ece4Schristos * @pre 1274*3117ece4Schristos * @p statePtr must not be `NULL`. 1275*3117ece4Schristos * @pre 1276*3117ece4Schristos * The memory between @p input and @p input + @p length must be valid, 1277*3117ece4Schristos * readable, contiguous memory. However, if @p length is `0`, @p input may be 1278*3117ece4Schristos * `NULL`. In C++, this also must be *TriviallyCopyable*. 1279*3117ece4Schristos * 1280*3117ece4Schristos * @return @ref XXH_OK on success. 1281*3117ece4Schristos * @return @ref XXH_ERROR on failure. 1282*3117ece4Schristos * 1283*3117ece4Schristos * @note Call this to incrementally consume blocks of data. 1284*3117ece4Schristos */ 1285*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length); 1286*3117ece4Schristos 1287*3117ece4Schristos /*! 1288*3117ece4Schristos * @brief Returns the calculated XXH3 64-bit hash value from an @ref XXH3_state_t. 1289*3117ece4Schristos * 1290*3117ece4Schristos * @param statePtr The state struct to calculate the hash from. 1291*3117ece4Schristos * 1292*3117ece4Schristos * @pre 1293*3117ece4Schristos * @p statePtr must not be `NULL`. 1294*3117ece4Schristos * 1295*3117ece4Schristos * @return The calculated XXH3 64-bit hash value from that state. 1296*3117ece4Schristos * 1297*3117ece4Schristos * @note 1298*3117ece4Schristos * Calling XXH3_64bits_digest() will not affect @p statePtr, so you can update, 1299*3117ece4Schristos * digest, and update again. 1300*3117ece4Schristos */ 1301*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr); 1302*3117ece4Schristos #endif /* !XXH_NO_STREAM */ 1303*3117ece4Schristos 1304*3117ece4Schristos /* note : canonical representation of XXH3 is the same as XXH64 1305*3117ece4Schristos * since they both produce XXH64_hash_t values */ 1306*3117ece4Schristos 1307*3117ece4Schristos 1308*3117ece4Schristos /*-********************************************************************** 1309*3117ece4Schristos * XXH3 128-bit variant 1310*3117ece4Schristos ************************************************************************/ 1311*3117ece4Schristos 1312*3117ece4Schristos /*! 1313*3117ece4Schristos * @brief The return value from 128-bit hashes. 1314*3117ece4Schristos * 1315*3117ece4Schristos * Stored in little endian order, although the fields themselves are in native 1316*3117ece4Schristos * endianness. 1317*3117ece4Schristos */ 1318*3117ece4Schristos typedef struct { 1319*3117ece4Schristos XXH64_hash_t low64; /*!< `value & 0xFFFFFFFFFFFFFFFF` */ 1320*3117ece4Schristos XXH64_hash_t high64; /*!< `value >> 64` */ 1321*3117ece4Schristos } XXH128_hash_t; 1322*3117ece4Schristos 1323*3117ece4Schristos /*! 1324*3117ece4Schristos * @brief Calculates 128-bit unseeded variant of XXH3 of @p data. 1325*3117ece4Schristos * 1326*3117ece4Schristos * @param data The block of data to be hashed, at least @p length bytes in size. 1327*3117ece4Schristos * @param len The length of @p data, in bytes. 1328*3117ece4Schristos * 1329*3117ece4Schristos * @return The calculated 128-bit variant of XXH3 value. 1330*3117ece4Schristos * 1331*3117ece4Schristos * The 128-bit variant of XXH3 has more strength, but it has a bit of overhead 1332*3117ece4Schristos * for shorter inputs. 1333*3117ece4Schristos * 1334*3117ece4Schristos * This is equivalent to @ref XXH3_128bits_withSeed() with a seed of `0`, however 1335*3117ece4Schristos * it may have slightly better performance due to constant propagation of the 1336*3117ece4Schristos * defaults. 1337*3117ece4Schristos * 1338*3117ece4Schristos * @see XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants 1339*3117ece4Schristos * @see @ref single_shot_example "Single Shot Example" for an example. 1340*3117ece4Schristos */ 1341*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* data, size_t len); 1342*3117ece4Schristos /*! @brief Calculates 128-bit seeded variant of XXH3 hash of @p data. 1343*3117ece4Schristos * 1344*3117ece4Schristos * @param data The block of data to be hashed, at least @p length bytes in size. 1345*3117ece4Schristos * @param len The length of @p data, in bytes. 1346*3117ece4Schristos * @param seed The 64-bit seed to alter the hash result predictably. 1347*3117ece4Schristos * 1348*3117ece4Schristos * @return The calculated 128-bit variant of XXH3 value. 1349*3117ece4Schristos * 1350*3117ece4Schristos * @note 1351*3117ece4Schristos * seed == 0 produces the same results as @ref XXH3_64bits(). 1352*3117ece4Schristos * 1353*3117ece4Schristos * This variant generates a custom secret on the fly based on default secret 1354*3117ece4Schristos * altered using the @p seed value. 1355*3117ece4Schristos * 1356*3117ece4Schristos * While this operation is decently fast, note that it's not completely free. 1357*3117ece4Schristos * 1358*3117ece4Schristos * @see XXH3_128bits(), XXH3_128bits_withSecret(): other seeding variants 1359*3117ece4Schristos * @see @ref single_shot_example "Single Shot Example" for an example. 1360*3117ece4Schristos */ 1361*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed); 1362*3117ece4Schristos /*! 1363*3117ece4Schristos * @brief Calculates 128-bit variant of XXH3 with a custom "secret". 1364*3117ece4Schristos * 1365*3117ece4Schristos * @param data The block of data to be hashed, at least @p len bytes in size. 1366*3117ece4Schristos * @param len The length of @p data, in bytes. 1367*3117ece4Schristos * @param secret The secret data. 1368*3117ece4Schristos * @param secretSize The length of @p secret, in bytes. 1369*3117ece4Schristos * 1370*3117ece4Schristos * @return The calculated 128-bit variant of XXH3 value. 1371*3117ece4Schristos * 1372*3117ece4Schristos * It's possible to provide any blob of bytes as a "secret" to generate the hash. 1373*3117ece4Schristos * This makes it more difficult for an external actor to prepare an intentional collision. 1374*3117ece4Schristos * The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN). 1375*3117ece4Schristos * However, the quality of the secret impacts the dispersion of the hash algorithm. 1376*3117ece4Schristos * Therefore, the secret _must_ look like a bunch of random bytes. 1377*3117ece4Schristos * Avoid "trivial" or structured data such as repeated sequences or a text document. 1378*3117ece4Schristos * Whenever in doubt about the "randomness" of the blob of bytes, 1379*3117ece4Schristos * consider employing @ref XXH3_generateSecret() instead (see below). 1380*3117ece4Schristos * It will generate a proper high entropy secret derived from the blob of bytes. 1381*3117ece4Schristos * Another advantage of using XXH3_generateSecret() is that 1382*3117ece4Schristos * it guarantees that all bits within the initial blob of bytes 1383*3117ece4Schristos * will impact every bit of the output. 1384*3117ece4Schristos * This is not necessarily the case when using the blob of bytes directly 1385*3117ece4Schristos * because, when hashing _small_ inputs, only a portion of the secret is employed. 1386*3117ece4Schristos * 1387*3117ece4Schristos * @see @ref single_shot_example "Single Shot Example" for an example. 1388*3117ece4Schristos */ 1389*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize); 1390*3117ece4Schristos 1391*3117ece4Schristos /******* Streaming *******/ 1392*3117ece4Schristos #ifndef XXH_NO_STREAM 1393*3117ece4Schristos /* 1394*3117ece4Schristos * Streaming requires state maintenance. 1395*3117ece4Schristos * This operation costs memory and CPU. 1396*3117ece4Schristos * As a consequence, streaming is slower than one-shot hashing. 1397*3117ece4Schristos * For better performance, prefer one-shot functions whenever applicable. 1398*3117ece4Schristos * 1399*3117ece4Schristos * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits(). 1400*3117ece4Schristos * Use already declared XXH3_createState() and XXH3_freeState(). 1401*3117ece4Schristos * 1402*3117ece4Schristos * All reset and streaming functions have same meaning as their 64-bit counterpart. 1403*3117ece4Schristos */ 1404*3117ece4Schristos 1405*3117ece4Schristos /*! 1406*3117ece4Schristos * @brief Resets an @ref XXH3_state_t to begin a new hash. 1407*3117ece4Schristos * 1408*3117ece4Schristos * @param statePtr The state struct to reset. 1409*3117ece4Schristos * 1410*3117ece4Schristos * @pre 1411*3117ece4Schristos * @p statePtr must not be `NULL`. 1412*3117ece4Schristos * 1413*3117ece4Schristos * @return @ref XXH_OK on success. 1414*3117ece4Schristos * @return @ref XXH_ERROR on failure. 1415*3117ece4Schristos * 1416*3117ece4Schristos * @note 1417*3117ece4Schristos * - This function resets `statePtr` and generate a secret with default parameters. 1418*3117ece4Schristos * - Call it before @ref XXH3_128bits_update(). 1419*3117ece4Schristos * - Digest will be equivalent to `XXH3_128bits()`. 1420*3117ece4Schristos */ 1421*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr); 1422*3117ece4Schristos 1423*3117ece4Schristos /*! 1424*3117ece4Schristos * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash. 1425*3117ece4Schristos * 1426*3117ece4Schristos * @param statePtr The state struct to reset. 1427*3117ece4Schristos * @param seed The 64-bit seed to alter the hash result predictably. 1428*3117ece4Schristos * 1429*3117ece4Schristos * @pre 1430*3117ece4Schristos * @p statePtr must not be `NULL`. 1431*3117ece4Schristos * 1432*3117ece4Schristos * @return @ref XXH_OK on success. 1433*3117ece4Schristos * @return @ref XXH_ERROR on failure. 1434*3117ece4Schristos * 1435*3117ece4Schristos * @note 1436*3117ece4Schristos * - This function resets `statePtr` and generate a secret from `seed`. 1437*3117ece4Schristos * - Call it before @ref XXH3_128bits_update(). 1438*3117ece4Schristos * - Digest will be equivalent to `XXH3_128bits_withSeed()`. 1439*3117ece4Schristos */ 1440*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed); 1441*3117ece4Schristos /*! 1442*3117ece4Schristos * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash. 1443*3117ece4Schristos * 1444*3117ece4Schristos * @param statePtr The state struct to reset. 1445*3117ece4Schristos * @param secret The secret data. 1446*3117ece4Schristos * @param secretSize The length of @p secret, in bytes. 1447*3117ece4Schristos * 1448*3117ece4Schristos * @pre 1449*3117ece4Schristos * @p statePtr must not be `NULL`. 1450*3117ece4Schristos * 1451*3117ece4Schristos * @return @ref XXH_OK on success. 1452*3117ece4Schristos * @return @ref XXH_ERROR on failure. 1453*3117ece4Schristos * 1454*3117ece4Schristos * `secret` is referenced, it _must outlive_ the hash streaming session. 1455*3117ece4Schristos * Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN, 1456*3117ece4Schristos * and the quality of produced hash values depends on secret's entropy 1457*3117ece4Schristos * (secret's content should look like a bunch of random bytes). 1458*3117ece4Schristos * When in doubt about the randomness of a candidate `secret`, 1459*3117ece4Schristos * consider employing `XXH3_generateSecret()` instead (see below). 1460*3117ece4Schristos */ 1461*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize); 1462*3117ece4Schristos 1463*3117ece4Schristos /*! 1464*3117ece4Schristos * @brief Consumes a block of @p input to an @ref XXH3_state_t. 1465*3117ece4Schristos * 1466*3117ece4Schristos * Call this to incrementally consume blocks of data. 1467*3117ece4Schristos * 1468*3117ece4Schristos * @param statePtr The state struct to update. 1469*3117ece4Schristos * @param input The block of data to be hashed, at least @p length bytes in size. 1470*3117ece4Schristos * @param length The length of @p input, in bytes. 1471*3117ece4Schristos * 1472*3117ece4Schristos * @pre 1473*3117ece4Schristos * @p statePtr must not be `NULL`. 1474*3117ece4Schristos * 1475*3117ece4Schristos * @return @ref XXH_OK on success. 1476*3117ece4Schristos * @return @ref XXH_ERROR on failure. 1477*3117ece4Schristos * 1478*3117ece4Schristos * @note 1479*3117ece4Schristos * The memory between @p input and @p input + @p length must be valid, 1480*3117ece4Schristos * readable, contiguous memory. However, if @p length is `0`, @p input may be 1481*3117ece4Schristos * `NULL`. In C++, this also must be *TriviallyCopyable*. 1482*3117ece4Schristos * 1483*3117ece4Schristos */ 1484*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length); 1485*3117ece4Schristos 1486*3117ece4Schristos /*! 1487*3117ece4Schristos * @brief Returns the calculated XXH3 128-bit hash value from an @ref XXH3_state_t. 1488*3117ece4Schristos * 1489*3117ece4Schristos * @param statePtr The state struct to calculate the hash from. 1490*3117ece4Schristos * 1491*3117ece4Schristos * @pre 1492*3117ece4Schristos * @p statePtr must not be `NULL`. 1493*3117ece4Schristos * 1494*3117ece4Schristos * @return The calculated XXH3 128-bit hash value from that state. 1495*3117ece4Schristos * 1496*3117ece4Schristos * @note 1497*3117ece4Schristos * Calling XXH3_128bits_digest() will not affect @p statePtr, so you can update, 1498*3117ece4Schristos * digest, and update again. 1499*3117ece4Schristos * 1500*3117ece4Schristos */ 1501*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr); 1502*3117ece4Schristos #endif /* !XXH_NO_STREAM */ 1503*3117ece4Schristos 1504*3117ece4Schristos /* Following helper functions make it possible to compare XXH128_hast_t values. 1505*3117ece4Schristos * Since XXH128_hash_t is a structure, this capability is not offered by the language. 1506*3117ece4Schristos * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */ 1507*3117ece4Schristos 1508*3117ece4Schristos /*! 1509*3117ece4Schristos * @brief Check equality of two XXH128_hash_t values 1510*3117ece4Schristos * 1511*3117ece4Schristos * @param h1 The 128-bit hash value. 1512*3117ece4Schristos * @param h2 Another 128-bit hash value. 1513*3117ece4Schristos * 1514*3117ece4Schristos * @return `1` if `h1` and `h2` are equal. 1515*3117ece4Schristos * @return `0` if they are not. 1516*3117ece4Schristos */ 1517*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2); 1518*3117ece4Schristos 1519*3117ece4Schristos /*! 1520*3117ece4Schristos * @brief Compares two @ref XXH128_hash_t 1521*3117ece4Schristos * 1522*3117ece4Schristos * This comparator is compatible with stdlib's `qsort()`/`bsearch()`. 1523*3117ece4Schristos * 1524*3117ece4Schristos * @param h128_1 Left-hand side value 1525*3117ece4Schristos * @param h128_2 Right-hand side value 1526*3117ece4Schristos * 1527*3117ece4Schristos * @return >0 if @p h128_1 > @p h128_2 1528*3117ece4Schristos * @return =0 if @p h128_1 == @p h128_2 1529*3117ece4Schristos * @return <0 if @p h128_1 < @p h128_2 1530*3117ece4Schristos */ 1531*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2); 1532*3117ece4Schristos 1533*3117ece4Schristos 1534*3117ece4Schristos /******* Canonical representation *******/ 1535*3117ece4Schristos typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t; 1536*3117ece4Schristos 1537*3117ece4Schristos 1538*3117ece4Schristos /*! 1539*3117ece4Schristos * @brief Converts an @ref XXH128_hash_t to a big endian @ref XXH128_canonical_t. 1540*3117ece4Schristos * 1541*3117ece4Schristos * @param dst The @ref XXH128_canonical_t pointer to be stored to. 1542*3117ece4Schristos * @param hash The @ref XXH128_hash_t to be converted. 1543*3117ece4Schristos * 1544*3117ece4Schristos * @pre 1545*3117ece4Schristos * @p dst must not be `NULL`. 1546*3117ece4Schristos * @see @ref canonical_representation_example "Canonical Representation Example" 1547*3117ece4Schristos */ 1548*3117ece4Schristos XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash); 1549*3117ece4Schristos 1550*3117ece4Schristos /*! 1551*3117ece4Schristos * @brief Converts an @ref XXH128_canonical_t to a native @ref XXH128_hash_t. 1552*3117ece4Schristos * 1553*3117ece4Schristos * @param src The @ref XXH128_canonical_t to convert. 1554*3117ece4Schristos * 1555*3117ece4Schristos * @pre 1556*3117ece4Schristos * @p src must not be `NULL`. 1557*3117ece4Schristos * 1558*3117ece4Schristos * @return The converted hash. 1559*3117ece4Schristos * @see @ref canonical_representation_example "Canonical Representation Example" 1560*3117ece4Schristos */ 1561*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src); 1562*3117ece4Schristos 1563*3117ece4Schristos 1564*3117ece4Schristos #endif /* !XXH_NO_XXH3 */ 1565*3117ece4Schristos #endif /* XXH_NO_LONG_LONG */ 1566*3117ece4Schristos 1567*3117ece4Schristos /*! 1568*3117ece4Schristos * @} 1569*3117ece4Schristos */ 1570*3117ece4Schristos #endif /* XXHASH_H_5627135585666179 */ 1571*3117ece4Schristos 1572*3117ece4Schristos 1573*3117ece4Schristos 1574*3117ece4Schristos #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) 1575*3117ece4Schristos #define XXHASH_H_STATIC_13879238742 1576*3117ece4Schristos /* **************************************************************************** 1577*3117ece4Schristos * This section contains declarations which are not guaranteed to remain stable. 1578*3117ece4Schristos * They may change in future versions, becoming incompatible with a different 1579*3117ece4Schristos * version of the library. 1580*3117ece4Schristos * These declarations should only be used with static linking. 1581*3117ece4Schristos * Never use them in association with dynamic linking! 1582*3117ece4Schristos ***************************************************************************** */ 1583*3117ece4Schristos 1584*3117ece4Schristos /* 1585*3117ece4Schristos * These definitions are only present to allow static allocation 1586*3117ece4Schristos * of XXH states, on stack or in a struct, for example. 1587*3117ece4Schristos * Never **ever** access their members directly. 1588*3117ece4Schristos */ 1589*3117ece4Schristos 1590*3117ece4Schristos /*! 1591*3117ece4Schristos * @internal 1592*3117ece4Schristos * @brief Structure for XXH32 streaming API. 1593*3117ece4Schristos * 1594*3117ece4Schristos * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY, 1595*3117ece4Schristos * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is 1596*3117ece4Schristos * an opaque type. This allows fields to safely be changed. 1597*3117ece4Schristos * 1598*3117ece4Schristos * Typedef'd to @ref XXH32_state_t. 1599*3117ece4Schristos * Do not access the members of this struct directly. 1600*3117ece4Schristos * @see XXH64_state_s, XXH3_state_s 1601*3117ece4Schristos */ 1602*3117ece4Schristos struct XXH32_state_s { 1603*3117ece4Schristos XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */ 1604*3117ece4Schristos XXH32_hash_t large_len; /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */ 1605*3117ece4Schristos XXH32_hash_t v[4]; /*!< Accumulator lanes */ 1606*3117ece4Schristos XXH32_hash_t mem32[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */ 1607*3117ece4Schristos XXH32_hash_t memsize; /*!< Amount of data in @ref mem32 */ 1608*3117ece4Schristos XXH32_hash_t reserved; /*!< Reserved field. Do not read nor write to it. */ 1609*3117ece4Schristos }; /* typedef'd to XXH32_state_t */ 1610*3117ece4Schristos 1611*3117ece4Schristos 1612*3117ece4Schristos #ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */ 1613*3117ece4Schristos 1614*3117ece4Schristos /*! 1615*3117ece4Schristos * @internal 1616*3117ece4Schristos * @brief Structure for XXH64 streaming API. 1617*3117ece4Schristos * 1618*3117ece4Schristos * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY, 1619*3117ece4Schristos * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is 1620*3117ece4Schristos * an opaque type. This allows fields to safely be changed. 1621*3117ece4Schristos * 1622*3117ece4Schristos * Typedef'd to @ref XXH64_state_t. 1623*3117ece4Schristos * Do not access the members of this struct directly. 1624*3117ece4Schristos * @see XXH32_state_s, XXH3_state_s 1625*3117ece4Schristos */ 1626*3117ece4Schristos struct XXH64_state_s { 1627*3117ece4Schristos XXH64_hash_t total_len; /*!< Total length hashed. This is always 64-bit. */ 1628*3117ece4Schristos XXH64_hash_t v[4]; /*!< Accumulator lanes */ 1629*3117ece4Schristos XXH64_hash_t mem64[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */ 1630*3117ece4Schristos XXH32_hash_t memsize; /*!< Amount of data in @ref mem64 */ 1631*3117ece4Schristos XXH32_hash_t reserved32; /*!< Reserved field, needed for padding anyways*/ 1632*3117ece4Schristos XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it. */ 1633*3117ece4Schristos }; /* typedef'd to XXH64_state_t */ 1634*3117ece4Schristos 1635*3117ece4Schristos #ifndef XXH_NO_XXH3 1636*3117ece4Schristos 1637*3117ece4Schristos #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */ 1638*3117ece4Schristos # include <stdalign.h> 1639*3117ece4Schristos # define XXH_ALIGN(n) alignas(n) 1640*3117ece4Schristos #elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */ 1641*3117ece4Schristos /* In C++ alignas() is a keyword */ 1642*3117ece4Schristos # define XXH_ALIGN(n) alignas(n) 1643*3117ece4Schristos #elif defined(__GNUC__) 1644*3117ece4Schristos # define XXH_ALIGN(n) __attribute__ ((aligned(n))) 1645*3117ece4Schristos #elif defined(_MSC_VER) 1646*3117ece4Schristos # define XXH_ALIGN(n) __declspec(align(n)) 1647*3117ece4Schristos #else 1648*3117ece4Schristos # define XXH_ALIGN(n) /* disabled */ 1649*3117ece4Schristos #endif 1650*3117ece4Schristos 1651*3117ece4Schristos /* Old GCC versions only accept the attribute after the type in structures. */ 1652*3117ece4Schristos #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \ 1653*3117ece4Schristos && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \ 1654*3117ece4Schristos && defined(__GNUC__) 1655*3117ece4Schristos # define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align) 1656*3117ece4Schristos #else 1657*3117ece4Schristos # define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type 1658*3117ece4Schristos #endif 1659*3117ece4Schristos 1660*3117ece4Schristos /*! 1661*3117ece4Schristos * @brief The size of the internal XXH3 buffer. 1662*3117ece4Schristos * 1663*3117ece4Schristos * This is the optimal update size for incremental hashing. 1664*3117ece4Schristos * 1665*3117ece4Schristos * @see XXH3_64b_update(), XXH3_128b_update(). 1666*3117ece4Schristos */ 1667*3117ece4Schristos #define XXH3_INTERNALBUFFER_SIZE 256 1668*3117ece4Schristos 1669*3117ece4Schristos /*! 1670*3117ece4Schristos * @internal 1671*3117ece4Schristos * @brief Default size of the secret buffer (and @ref XXH3_kSecret). 1672*3117ece4Schristos * 1673*3117ece4Schristos * This is the size used in @ref XXH3_kSecret and the seeded functions. 1674*3117ece4Schristos * 1675*3117ece4Schristos * Not to be confused with @ref XXH3_SECRET_SIZE_MIN. 1676*3117ece4Schristos */ 1677*3117ece4Schristos #define XXH3_SECRET_DEFAULT_SIZE 192 1678*3117ece4Schristos 1679*3117ece4Schristos /*! 1680*3117ece4Schristos * @internal 1681*3117ece4Schristos * @brief Structure for XXH3 streaming API. 1682*3117ece4Schristos * 1683*3117ece4Schristos * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY, 1684*3117ece4Schristos * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. 1685*3117ece4Schristos * Otherwise it is an opaque type. 1686*3117ece4Schristos * Never use this definition in combination with dynamic library. 1687*3117ece4Schristos * This allows fields to safely be changed in the future. 1688*3117ece4Schristos * 1689*3117ece4Schristos * @note ** This structure has a strict alignment requirement of 64 bytes!! ** 1690*3117ece4Schristos * Do not allocate this with `malloc()` or `new`, 1691*3117ece4Schristos * it will not be sufficiently aligned. 1692*3117ece4Schristos * Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack allocation. 1693*3117ece4Schristos * 1694*3117ece4Schristos * Typedef'd to @ref XXH3_state_t. 1695*3117ece4Schristos * Do never access the members of this struct directly. 1696*3117ece4Schristos * 1697*3117ece4Schristos * @see XXH3_INITSTATE() for stack initialization. 1698*3117ece4Schristos * @see XXH3_createState(), XXH3_freeState(). 1699*3117ece4Schristos * @see XXH32_state_s, XXH64_state_s 1700*3117ece4Schristos */ 1701*3117ece4Schristos struct XXH3_state_s { 1702*3117ece4Schristos XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]); 1703*3117ece4Schristos /*!< The 8 accumulators. See @ref XXH32_state_s::v and @ref XXH64_state_s::v */ 1704*3117ece4Schristos XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]); 1705*3117ece4Schristos /*!< Used to store a custom secret generated from a seed. */ 1706*3117ece4Schristos XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]); 1707*3117ece4Schristos /*!< The internal buffer. @see XXH32_state_s::mem32 */ 1708*3117ece4Schristos XXH32_hash_t bufferedSize; 1709*3117ece4Schristos /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */ 1710*3117ece4Schristos XXH32_hash_t useSeed; 1711*3117ece4Schristos /*!< Reserved field. Needed for padding on 64-bit. */ 1712*3117ece4Schristos size_t nbStripesSoFar; 1713*3117ece4Schristos /*!< Number or stripes processed. */ 1714*3117ece4Schristos XXH64_hash_t totalLen; 1715*3117ece4Schristos /*!< Total length hashed. 64-bit even on 32-bit targets. */ 1716*3117ece4Schristos size_t nbStripesPerBlock; 1717*3117ece4Schristos /*!< Number of stripes per block. */ 1718*3117ece4Schristos size_t secretLimit; 1719*3117ece4Schristos /*!< Size of @ref customSecret or @ref extSecret */ 1720*3117ece4Schristos XXH64_hash_t seed; 1721*3117ece4Schristos /*!< Seed for _withSeed variants. Must be zero otherwise, @see XXH3_INITSTATE() */ 1722*3117ece4Schristos XXH64_hash_t reserved64; 1723*3117ece4Schristos /*!< Reserved field. */ 1724*3117ece4Schristos const unsigned char* extSecret; 1725*3117ece4Schristos /*!< Reference to an external secret for the _withSecret variants, NULL 1726*3117ece4Schristos * for other variants. */ 1727*3117ece4Schristos /* note: there may be some padding at the end due to alignment on 64 bytes */ 1728*3117ece4Schristos }; /* typedef'd to XXH3_state_t */ 1729*3117ece4Schristos 1730*3117ece4Schristos #undef XXH_ALIGN_MEMBER 1731*3117ece4Schristos 1732*3117ece4Schristos /*! 1733*3117ece4Schristos * @brief Initializes a stack-allocated `XXH3_state_s`. 1734*3117ece4Schristos * 1735*3117ece4Schristos * When the @ref XXH3_state_t structure is merely emplaced on stack, 1736*3117ece4Schristos * it should be initialized with XXH3_INITSTATE() or a memset() 1737*3117ece4Schristos * in case its first reset uses XXH3_NNbits_reset_withSeed(). 1738*3117ece4Schristos * This init can be omitted if the first reset uses default or _withSecret mode. 1739*3117ece4Schristos * This operation isn't necessary when the state is created with XXH3_createState(). 1740*3117ece4Schristos * Note that this doesn't prepare the state for a streaming operation, 1741*3117ece4Schristos * it's still necessary to use XXH3_NNbits_reset*() afterwards. 1742*3117ece4Schristos */ 1743*3117ece4Schristos #define XXH3_INITSTATE(XXH3_state_ptr) \ 1744*3117ece4Schristos do { \ 1745*3117ece4Schristos XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \ 1746*3117ece4Schristos tmp_xxh3_state_ptr->seed = 0; \ 1747*3117ece4Schristos tmp_xxh3_state_ptr->extSecret = NULL; \ 1748*3117ece4Schristos } while(0) 1749*3117ece4Schristos 1750*3117ece4Schristos 1751*3117ece4Schristos /*! 1752*3117ece4Schristos * @brief Calculates the 128-bit hash of @p data using XXH3. 1753*3117ece4Schristos * 1754*3117ece4Schristos * @param data The block of data to be hashed, at least @p len bytes in size. 1755*3117ece4Schristos * @param len The length of @p data, in bytes. 1756*3117ece4Schristos * @param seed The 64-bit seed to alter the hash's output predictably. 1757*3117ece4Schristos * 1758*3117ece4Schristos * @pre 1759*3117ece4Schristos * The memory between @p data and @p data + @p len must be valid, 1760*3117ece4Schristos * readable, contiguous memory. However, if @p len is `0`, @p data may be 1761*3117ece4Schristos * `NULL`. In C++, this also must be *TriviallyCopyable*. 1762*3117ece4Schristos * 1763*3117ece4Schristos * @return The calculated 128-bit XXH3 value. 1764*3117ece4Schristos * 1765*3117ece4Schristos * @see @ref single_shot_example "Single Shot Example" for an example. 1766*3117ece4Schristos */ 1767*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed); 1768*3117ece4Schristos 1769*3117ece4Schristos 1770*3117ece4Schristos /* === Experimental API === */ 1771*3117ece4Schristos /* Symbols defined below must be considered tied to a specific library version. */ 1772*3117ece4Schristos 1773*3117ece4Schristos /*! 1774*3117ece4Schristos * @brief Derive a high-entropy secret from any user-defined content, named customSeed. 1775*3117ece4Schristos * 1776*3117ece4Schristos * @param secretBuffer A writable buffer for derived high-entropy secret data. 1777*3117ece4Schristos * @param secretSize Size of secretBuffer, in bytes. Must be >= XXH3_SECRET_DEFAULT_SIZE. 1778*3117ece4Schristos * @param customSeed A user-defined content. 1779*3117ece4Schristos * @param customSeedSize Size of customSeed, in bytes. 1780*3117ece4Schristos * 1781*3117ece4Schristos * @return @ref XXH_OK on success. 1782*3117ece4Schristos * @return @ref XXH_ERROR on failure. 1783*3117ece4Schristos * 1784*3117ece4Schristos * The generated secret can be used in combination with `*_withSecret()` functions. 1785*3117ece4Schristos * The `_withSecret()` variants are useful to provide a higher level of protection 1786*3117ece4Schristos * than 64-bit seed, as it becomes much more difficult for an external actor to 1787*3117ece4Schristos * guess how to impact the calculation logic. 1788*3117ece4Schristos * 1789*3117ece4Schristos * The function accepts as input a custom seed of any length and any content, 1790*3117ece4Schristos * and derives from it a high-entropy secret of length @p secretSize into an 1791*3117ece4Schristos * already allocated buffer @p secretBuffer. 1792*3117ece4Schristos * 1793*3117ece4Schristos * The generated secret can then be used with any `*_withSecret()` variant. 1794*3117ece4Schristos * The functions @ref XXH3_128bits_withSecret(), @ref XXH3_64bits_withSecret(), 1795*3117ece4Schristos * @ref XXH3_128bits_reset_withSecret() and @ref XXH3_64bits_reset_withSecret() 1796*3117ece4Schristos * are part of this list. They all accept a `secret` parameter 1797*3117ece4Schristos * which must be large enough for implementation reasons (>= @ref XXH3_SECRET_SIZE_MIN) 1798*3117ece4Schristos * _and_ feature very high entropy (consist of random-looking bytes). 1799*3117ece4Schristos * These conditions can be a high bar to meet, so @ref XXH3_generateSecret() can 1800*3117ece4Schristos * be employed to ensure proper quality. 1801*3117ece4Schristos * 1802*3117ece4Schristos * @p customSeed can be anything. It can have any size, even small ones, 1803*3117ece4Schristos * and its content can be anything, even "poor entropy" sources such as a bunch 1804*3117ece4Schristos * of zeroes. The resulting `secret` will nonetheless provide all required qualities. 1805*3117ece4Schristos * 1806*3117ece4Schristos * @pre 1807*3117ece4Schristos * - @p secretSize must be >= @ref XXH3_SECRET_SIZE_MIN 1808*3117ece4Schristos * - When @p customSeedSize > 0, supplying NULL as customSeed is undefined behavior. 1809*3117ece4Schristos * 1810*3117ece4Schristos * Example code: 1811*3117ece4Schristos * @code{.c} 1812*3117ece4Schristos * #include <stdio.h> 1813*3117ece4Schristos * #include <stdlib.h> 1814*3117ece4Schristos * #include <string.h> 1815*3117ece4Schristos * #define XXH_STATIC_LINKING_ONLY // expose unstable API 1816*3117ece4Schristos * #include "xxhash.h" 1817*3117ece4Schristos * // Hashes argv[2] using the entropy from argv[1]. 1818*3117ece4Schristos * int main(int argc, char* argv[]) 1819*3117ece4Schristos * { 1820*3117ece4Schristos * char secret[XXH3_SECRET_SIZE_MIN]; 1821*3117ece4Schristos * if (argv != 3) { return 1; } 1822*3117ece4Schristos * XXH3_generateSecret(secret, sizeof(secret), argv[1], strlen(argv[1])); 1823*3117ece4Schristos * XXH64_hash_t h = XXH3_64bits_withSecret( 1824*3117ece4Schristos * argv[2], strlen(argv[2]), 1825*3117ece4Schristos * secret, sizeof(secret) 1826*3117ece4Schristos * ); 1827*3117ece4Schristos * printf("%016llx\n", (unsigned long long) h); 1828*3117ece4Schristos * } 1829*3117ece4Schristos * @endcode 1830*3117ece4Schristos */ 1831*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize); 1832*3117ece4Schristos 1833*3117ece4Schristos /*! 1834*3117ece4Schristos * @brief Generate the same secret as the _withSeed() variants. 1835*3117ece4Schristos * 1836*3117ece4Schristos * @param secretBuffer A writable buffer of @ref XXH3_SECRET_SIZE_MIN bytes 1837*3117ece4Schristos * @param seed The 64-bit seed to alter the hash result predictably. 1838*3117ece4Schristos * 1839*3117ece4Schristos * The generated secret can be used in combination with 1840*3117ece4Schristos *`*_withSecret()` and `_withSecretandSeed()` variants. 1841*3117ece4Schristos * 1842*3117ece4Schristos * Example C++ `std::string` hash class: 1843*3117ece4Schristos * @code{.cpp} 1844*3117ece4Schristos * #include <string> 1845*3117ece4Schristos * #define XXH_STATIC_LINKING_ONLY // expose unstable API 1846*3117ece4Schristos * #include "xxhash.h" 1847*3117ece4Schristos * // Slow, seeds each time 1848*3117ece4Schristos * class HashSlow { 1849*3117ece4Schristos * XXH64_hash_t seed; 1850*3117ece4Schristos * public: 1851*3117ece4Schristos * HashSlow(XXH64_hash_t s) : seed{s} {} 1852*3117ece4Schristos * size_t operator()(const std::string& x) const { 1853*3117ece4Schristos * return size_t{XXH3_64bits_withSeed(x.c_str(), x.length(), seed)}; 1854*3117ece4Schristos * } 1855*3117ece4Schristos * }; 1856*3117ece4Schristos * // Fast, caches the seeded secret for future uses. 1857*3117ece4Schristos * class HashFast { 1858*3117ece4Schristos * unsigned char secret[XXH3_SECRET_SIZE_MIN]; 1859*3117ece4Schristos * public: 1860*3117ece4Schristos * HashFast(XXH64_hash_t s) { 1861*3117ece4Schristos * XXH3_generateSecret_fromSeed(secret, seed); 1862*3117ece4Schristos * } 1863*3117ece4Schristos * size_t operator()(const std::string& x) const { 1864*3117ece4Schristos * return size_t{ 1865*3117ece4Schristos * XXH3_64bits_withSecret(x.c_str(), x.length(), secret, sizeof(secret)) 1866*3117ece4Schristos * }; 1867*3117ece4Schristos * } 1868*3117ece4Schristos * }; 1869*3117ece4Schristos * @endcode 1870*3117ece4Schristos */ 1871*3117ece4Schristos XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed); 1872*3117ece4Schristos 1873*3117ece4Schristos /*! 1874*3117ece4Schristos * @brief Calculates 64/128-bit seeded variant of XXH3 hash of @p data. 1875*3117ece4Schristos * 1876*3117ece4Schristos * @param data The block of data to be hashed, at least @p len bytes in size. 1877*3117ece4Schristos * @param len The length of @p data, in bytes. 1878*3117ece4Schristos * @param secret The secret data. 1879*3117ece4Schristos * @param secretSize The length of @p secret, in bytes. 1880*3117ece4Schristos * @param seed The 64-bit seed to alter the hash result predictably. 1881*3117ece4Schristos * 1882*3117ece4Schristos * These variants generate hash values using either 1883*3117ece4Schristos * @p seed for "short" keys (< @ref XXH3_MIDSIZE_MAX = 240 bytes) 1884*3117ece4Schristos * or @p secret for "large" keys (>= @ref XXH3_MIDSIZE_MAX). 1885*3117ece4Schristos * 1886*3117ece4Schristos * This generally benefits speed, compared to `_withSeed()` or `_withSecret()`. 1887*3117ece4Schristos * `_withSeed()` has to generate the secret on the fly for "large" keys. 1888*3117ece4Schristos * It's fast, but can be perceptible for "not so large" keys (< 1 KB). 1889*3117ece4Schristos * `_withSecret()` has to generate the masks on the fly for "small" keys, 1890*3117ece4Schristos * which requires more instructions than _withSeed() variants. 1891*3117ece4Schristos * Therefore, _withSecretandSeed variant combines the best of both worlds. 1892*3117ece4Schristos * 1893*3117ece4Schristos * When @p secret has been generated by XXH3_generateSecret_fromSeed(), 1894*3117ece4Schristos * this variant produces *exactly* the same results as `_withSeed()` variant, 1895*3117ece4Schristos * hence offering only a pure speed benefit on "large" input, 1896*3117ece4Schristos * by skipping the need to regenerate the secret for every large input. 1897*3117ece4Schristos * 1898*3117ece4Schristos * Another usage scenario is to hash the secret to a 64-bit hash value, 1899*3117ece4Schristos * for example with XXH3_64bits(), which then becomes the seed, 1900*3117ece4Schristos * and then employ both the seed and the secret in _withSecretandSeed(). 1901*3117ece4Schristos * On top of speed, an added benefit is that each bit in the secret 1902*3117ece4Schristos * has a 50% chance to swap each bit in the output, via its impact to the seed. 1903*3117ece4Schristos * 1904*3117ece4Schristos * This is not guaranteed when using the secret directly in "small data" scenarios, 1905*3117ece4Schristos * because only portions of the secret are employed for small data. 1906*3117ece4Schristos */ 1907*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF XXH64_hash_t 1908*3117ece4Schristos XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len, 1909*3117ece4Schristos XXH_NOESCAPE const void* secret, size_t secretSize, 1910*3117ece4Schristos XXH64_hash_t seed); 1911*3117ece4Schristos /*! 1912*3117ece4Schristos * @brief Calculates 128-bit seeded variant of XXH3 hash of @p data. 1913*3117ece4Schristos * 1914*3117ece4Schristos * @param input The block of data to be hashed, at least @p len bytes in size. 1915*3117ece4Schristos * @param length The length of @p data, in bytes. 1916*3117ece4Schristos * @param secret The secret data. 1917*3117ece4Schristos * @param secretSize The length of @p secret, in bytes. 1918*3117ece4Schristos * @param seed64 The 64-bit seed to alter the hash result predictably. 1919*3117ece4Schristos * 1920*3117ece4Schristos * @return @ref XXH_OK on success. 1921*3117ece4Schristos * @return @ref XXH_ERROR on failure. 1922*3117ece4Schristos * 1923*3117ece4Schristos * @see XXH3_64bits_withSecretandSeed() 1924*3117ece4Schristos */ 1925*3117ece4Schristos XXH_PUBLIC_API XXH_PUREF XXH128_hash_t 1926*3117ece4Schristos XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, 1927*3117ece4Schristos XXH_NOESCAPE const void* secret, size_t secretSize, 1928*3117ece4Schristos XXH64_hash_t seed64); 1929*3117ece4Schristos #ifndef XXH_NO_STREAM 1930*3117ece4Schristos /*! 1931*3117ece4Schristos * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash. 1932*3117ece4Schristos * 1933*3117ece4Schristos * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState(). 1934*3117ece4Schristos * @param secret The secret data. 1935*3117ece4Schristos * @param secretSize The length of @p secret, in bytes. 1936*3117ece4Schristos * @param seed64 The 64-bit seed to alter the hash result predictably. 1937*3117ece4Schristos * 1938*3117ece4Schristos * @return @ref XXH_OK on success. 1939*3117ece4Schristos * @return @ref XXH_ERROR on failure. 1940*3117ece4Schristos * 1941*3117ece4Schristos * @see XXH3_64bits_withSecretandSeed() 1942*3117ece4Schristos */ 1943*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode 1944*3117ece4Schristos XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, 1945*3117ece4Schristos XXH_NOESCAPE const void* secret, size_t secretSize, 1946*3117ece4Schristos XXH64_hash_t seed64); 1947*3117ece4Schristos /*! 1948*3117ece4Schristos * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash. 1949*3117ece4Schristos * 1950*3117ece4Schristos * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState(). 1951*3117ece4Schristos * @param secret The secret data. 1952*3117ece4Schristos * @param secretSize The length of @p secret, in bytes. 1953*3117ece4Schristos * @param seed64 The 64-bit seed to alter the hash result predictably. 1954*3117ece4Schristos * 1955*3117ece4Schristos * @return @ref XXH_OK on success. 1956*3117ece4Schristos * @return @ref XXH_ERROR on failure. 1957*3117ece4Schristos * 1958*3117ece4Schristos * @see XXH3_64bits_withSecretandSeed() 1959*3117ece4Schristos */ 1960*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode 1961*3117ece4Schristos XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, 1962*3117ece4Schristos XXH_NOESCAPE const void* secret, size_t secretSize, 1963*3117ece4Schristos XXH64_hash_t seed64); 1964*3117ece4Schristos #endif /* !XXH_NO_STREAM */ 1965*3117ece4Schristos 1966*3117ece4Schristos #endif /* !XXH_NO_XXH3 */ 1967*3117ece4Schristos #endif /* XXH_NO_LONG_LONG */ 1968*3117ece4Schristos #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) 1969*3117ece4Schristos # define XXH_IMPLEMENTATION 1970*3117ece4Schristos #endif 1971*3117ece4Schristos 1972*3117ece4Schristos #endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */ 1973*3117ece4Schristos 1974*3117ece4Schristos 1975*3117ece4Schristos /* ======================================================================== */ 1976*3117ece4Schristos /* ======================================================================== */ 1977*3117ece4Schristos /* ======================================================================== */ 1978*3117ece4Schristos 1979*3117ece4Schristos 1980*3117ece4Schristos /*-********************************************************************** 1981*3117ece4Schristos * xxHash implementation 1982*3117ece4Schristos *-********************************************************************** 1983*3117ece4Schristos * xxHash's implementation used to be hosted inside xxhash.c. 1984*3117ece4Schristos * 1985*3117ece4Schristos * However, inlining requires implementation to be visible to the compiler, 1986*3117ece4Schristos * hence be included alongside the header. 1987*3117ece4Schristos * Previously, implementation was hosted inside xxhash.c, 1988*3117ece4Schristos * which was then #included when inlining was activated. 1989*3117ece4Schristos * This construction created issues with a few build and install systems, 1990*3117ece4Schristos * as it required xxhash.c to be stored in /include directory. 1991*3117ece4Schristos * 1992*3117ece4Schristos * xxHash implementation is now directly integrated within xxhash.h. 1993*3117ece4Schristos * As a consequence, xxhash.c is no longer needed in /include. 1994*3117ece4Schristos * 1995*3117ece4Schristos * xxhash.c is still available and is still useful. 1996*3117ece4Schristos * In a "normal" setup, when xxhash is not inlined, 1997*3117ece4Schristos * xxhash.h only exposes the prototypes and public symbols, 1998*3117ece4Schristos * while xxhash.c can be built into an object file xxhash.o 1999*3117ece4Schristos * which can then be linked into the final binary. 2000*3117ece4Schristos ************************************************************************/ 2001*3117ece4Schristos 2002*3117ece4Schristos #if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \ 2003*3117ece4Schristos || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387) 2004*3117ece4Schristos # define XXH_IMPLEM_13a8737387 2005*3117ece4Schristos 2006*3117ece4Schristos /* ************************************* 2007*3117ece4Schristos * Tuning parameters 2008*3117ece4Schristos ***************************************/ 2009*3117ece4Schristos 2010*3117ece4Schristos /*! 2011*3117ece4Schristos * @defgroup tuning Tuning parameters 2012*3117ece4Schristos * @{ 2013*3117ece4Schristos * 2014*3117ece4Schristos * Various macros to control xxHash's behavior. 2015*3117ece4Schristos */ 2016*3117ece4Schristos #ifdef XXH_DOXYGEN 2017*3117ece4Schristos /*! 2018*3117ece4Schristos * @brief Define this to disable 64-bit code. 2019*3117ece4Schristos * 2020*3117ece4Schristos * Useful if only using the @ref XXH32_family and you have a strict C90 compiler. 2021*3117ece4Schristos */ 2022*3117ece4Schristos # define XXH_NO_LONG_LONG 2023*3117ece4Schristos # undef XXH_NO_LONG_LONG /* don't actually */ 2024*3117ece4Schristos /*! 2025*3117ece4Schristos * @brief Controls how unaligned memory is accessed. 2026*3117ece4Schristos * 2027*3117ece4Schristos * By default, access to unaligned memory is controlled by `memcpy()`, which is 2028*3117ece4Schristos * safe and portable. 2029*3117ece4Schristos * 2030*3117ece4Schristos * Unfortunately, on some target/compiler combinations, the generated assembly 2031*3117ece4Schristos * is sub-optimal. 2032*3117ece4Schristos * 2033*3117ece4Schristos * The below switch allow selection of a different access method 2034*3117ece4Schristos * in the search for improved performance. 2035*3117ece4Schristos * 2036*3117ece4Schristos * @par Possible options: 2037*3117ece4Schristos * 2038*3117ece4Schristos * - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy` 2039*3117ece4Schristos * @par 2040*3117ece4Schristos * Use `memcpy()`. Safe and portable. Note that most modern compilers will 2041*3117ece4Schristos * eliminate the function call and treat it as an unaligned access. 2042*3117ece4Schristos * 2043*3117ece4Schristos * - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))` 2044*3117ece4Schristos * @par 2045*3117ece4Schristos * Depends on compiler extensions and is therefore not portable. 2046*3117ece4Schristos * This method is safe _if_ your compiler supports it, 2047*3117ece4Schristos * and *generally* as fast or faster than `memcpy`. 2048*3117ece4Schristos * 2049*3117ece4Schristos * - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast 2050*3117ece4Schristos * @par 2051*3117ece4Schristos * Casts directly and dereferences. This method doesn't depend on the 2052*3117ece4Schristos * compiler, but it violates the C standard as it directly dereferences an 2053*3117ece4Schristos * unaligned pointer. It can generate buggy code on targets which do not 2054*3117ece4Schristos * support unaligned memory accesses, but in some circumstances, it's the 2055*3117ece4Schristos * only known way to get the most performance. 2056*3117ece4Schristos * 2057*3117ece4Schristos * - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift 2058*3117ece4Schristos * @par 2059*3117ece4Schristos * Also portable. This can generate the best code on old compilers which don't 2060*3117ece4Schristos * inline small `memcpy()` calls, and it might also be faster on big-endian 2061*3117ece4Schristos * systems which lack a native byteswap instruction. However, some compilers 2062*3117ece4Schristos * will emit literal byteshifts even if the target supports unaligned access. 2063*3117ece4Schristos * 2064*3117ece4Schristos * 2065*3117ece4Schristos * @warning 2066*3117ece4Schristos * Methods 1 and 2 rely on implementation-defined behavior. Use these with 2067*3117ece4Schristos * care, as what works on one compiler/platform/optimization level may cause 2068*3117ece4Schristos * another to read garbage data or even crash. 2069*3117ece4Schristos * 2070*3117ece4Schristos * See https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details. 2071*3117ece4Schristos * 2072*3117ece4Schristos * Prefer these methods in priority order (0 > 3 > 1 > 2) 2073*3117ece4Schristos */ 2074*3117ece4Schristos # define XXH_FORCE_MEMORY_ACCESS 0 2075*3117ece4Schristos 2076*3117ece4Schristos /*! 2077*3117ece4Schristos * @def XXH_SIZE_OPT 2078*3117ece4Schristos * @brief Controls how much xxHash optimizes for size. 2079*3117ece4Schristos * 2080*3117ece4Schristos * xxHash, when compiled, tends to result in a rather large binary size. This 2081*3117ece4Schristos * is mostly due to heavy usage to forced inlining and constant folding of the 2082*3117ece4Schristos * @ref XXH3_family to increase performance. 2083*3117ece4Schristos * 2084*3117ece4Schristos * However, some developers prefer size over speed. This option can 2085*3117ece4Schristos * significantly reduce the size of the generated code. When using the `-Os` 2086*3117ece4Schristos * or `-Oz` options on GCC or Clang, this is defined to 1 by default, 2087*3117ece4Schristos * otherwise it is defined to 0. 2088*3117ece4Schristos * 2089*3117ece4Schristos * Most of these size optimizations can be controlled manually. 2090*3117ece4Schristos * 2091*3117ece4Schristos * This is a number from 0-2. 2092*3117ece4Schristos * - `XXH_SIZE_OPT` == 0: Default. xxHash makes no size optimizations. Speed 2093*3117ece4Schristos * comes first. 2094*3117ece4Schristos * - `XXH_SIZE_OPT` == 1: Default for `-Os` and `-Oz`. xxHash is more 2095*3117ece4Schristos * conservative and disables hacks that increase code size. It implies the 2096*3117ece4Schristos * options @ref XXH_NO_INLINE_HINTS == 1, @ref XXH_FORCE_ALIGN_CHECK == 0, 2097*3117ece4Schristos * and @ref XXH3_NEON_LANES == 8 if they are not already defined. 2098*3117ece4Schristos * - `XXH_SIZE_OPT` == 2: xxHash tries to make itself as small as possible. 2099*3117ece4Schristos * Performance may cry. For example, the single shot functions just use the 2100*3117ece4Schristos * streaming API. 2101*3117ece4Schristos */ 2102*3117ece4Schristos # define XXH_SIZE_OPT 0 2103*3117ece4Schristos 2104*3117ece4Schristos /*! 2105*3117ece4Schristos * @def XXH_FORCE_ALIGN_CHECK 2106*3117ece4Schristos * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32() 2107*3117ece4Schristos * and XXH64() only). 2108*3117ece4Schristos * 2109*3117ece4Schristos * This is an important performance trick for architectures without decent 2110*3117ece4Schristos * unaligned memory access performance. 2111*3117ece4Schristos * 2112*3117ece4Schristos * It checks for input alignment, and when conditions are met, uses a "fast 2113*3117ece4Schristos * path" employing direct 32-bit/64-bit reads, resulting in _dramatically 2114*3117ece4Schristos * faster_ read speed. 2115*3117ece4Schristos * 2116*3117ece4Schristos * The check costs one initial branch per hash, which is generally negligible, 2117*3117ece4Schristos * but not zero. 2118*3117ece4Schristos * 2119*3117ece4Schristos * Moreover, it's not useful to generate an additional code path if memory 2120*3117ece4Schristos * access uses the same instruction for both aligned and unaligned 2121*3117ece4Schristos * addresses (e.g. x86 and aarch64). 2122*3117ece4Schristos * 2123*3117ece4Schristos * In these cases, the alignment check can be removed by setting this macro to 0. 2124*3117ece4Schristos * Then the code will always use unaligned memory access. 2125*3117ece4Schristos * Align check is automatically disabled on x86, x64, ARM64, and some ARM chips 2126*3117ece4Schristos * which are platforms known to offer good unaligned memory accesses performance. 2127*3117ece4Schristos * 2128*3117ece4Schristos * It is also disabled by default when @ref XXH_SIZE_OPT >= 1. 2129*3117ece4Schristos * 2130*3117ece4Schristos * This option does not affect XXH3 (only XXH32 and XXH64). 2131*3117ece4Schristos */ 2132*3117ece4Schristos # define XXH_FORCE_ALIGN_CHECK 0 2133*3117ece4Schristos 2134*3117ece4Schristos /*! 2135*3117ece4Schristos * @def XXH_NO_INLINE_HINTS 2136*3117ece4Schristos * @brief When non-zero, sets all functions to `static`. 2137*3117ece4Schristos * 2138*3117ece4Schristos * By default, xxHash tries to force the compiler to inline almost all internal 2139*3117ece4Schristos * functions. 2140*3117ece4Schristos * 2141*3117ece4Schristos * This can usually improve performance due to reduced jumping and improved 2142*3117ece4Schristos * constant folding, but significantly increases the size of the binary which 2143*3117ece4Schristos * might not be favorable. 2144*3117ece4Schristos * 2145*3117ece4Schristos * Additionally, sometimes the forced inlining can be detrimental to performance, 2146*3117ece4Schristos * depending on the architecture. 2147*3117ece4Schristos * 2148*3117ece4Schristos * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the 2149*3117ece4Schristos * compiler full control on whether to inline or not. 2150*3117ece4Schristos * 2151*3117ece4Schristos * When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if 2152*3117ece4Schristos * @ref XXH_SIZE_OPT >= 1, this will automatically be defined. 2153*3117ece4Schristos */ 2154*3117ece4Schristos # define XXH_NO_INLINE_HINTS 0 2155*3117ece4Schristos 2156*3117ece4Schristos /*! 2157*3117ece4Schristos * @def XXH3_INLINE_SECRET 2158*3117ece4Schristos * @brief Determines whether to inline the XXH3 withSecret code. 2159*3117ece4Schristos * 2160*3117ece4Schristos * When the secret size is known, the compiler can improve the performance 2161*3117ece4Schristos * of XXH3_64bits_withSecret() and XXH3_128bits_withSecret(). 2162*3117ece4Schristos * 2163*3117ece4Schristos * However, if the secret size is not known, it doesn't have any benefit. This 2164*3117ece4Schristos * happens when xxHash is compiled into a global symbol. Therefore, if 2165*3117ece4Schristos * @ref XXH_INLINE_ALL is *not* defined, this will be defined to 0. 2166*3117ece4Schristos * 2167*3117ece4Schristos * Additionally, this defaults to 0 on GCC 12+, which has an issue with function pointers 2168*3117ece4Schristos * that are *sometimes* force inline on -Og, and it is impossible to automatically 2169*3117ece4Schristos * detect this optimization level. 2170*3117ece4Schristos */ 2171*3117ece4Schristos # define XXH3_INLINE_SECRET 0 2172*3117ece4Schristos 2173*3117ece4Schristos /*! 2174*3117ece4Schristos * @def XXH32_ENDJMP 2175*3117ece4Schristos * @brief Whether to use a jump for `XXH32_finalize`. 2176*3117ece4Schristos * 2177*3117ece4Schristos * For performance, `XXH32_finalize` uses multiple branches in the finalizer. 2178*3117ece4Schristos * This is generally preferable for performance, 2179*3117ece4Schristos * but depending on exact architecture, a jmp may be preferable. 2180*3117ece4Schristos * 2181*3117ece4Schristos * This setting is only possibly making a difference for very small inputs. 2182*3117ece4Schristos */ 2183*3117ece4Schristos # define XXH32_ENDJMP 0 2184*3117ece4Schristos 2185*3117ece4Schristos /*! 2186*3117ece4Schristos * @internal 2187*3117ece4Schristos * @brief Redefines old internal names. 2188*3117ece4Schristos * 2189*3117ece4Schristos * For compatibility with code that uses xxHash's internals before the names 2190*3117ece4Schristos * were changed to improve namespacing. There is no other reason to use this. 2191*3117ece4Schristos */ 2192*3117ece4Schristos # define XXH_OLD_NAMES 2193*3117ece4Schristos # undef XXH_OLD_NAMES /* don't actually use, it is ugly. */ 2194*3117ece4Schristos 2195*3117ece4Schristos /*! 2196*3117ece4Schristos * @def XXH_NO_STREAM 2197*3117ece4Schristos * @brief Disables the streaming API. 2198*3117ece4Schristos * 2199*3117ece4Schristos * When xxHash is not inlined and the streaming functions are not used, disabling 2200*3117ece4Schristos * the streaming functions can improve code size significantly, especially with 2201*3117ece4Schristos * the @ref XXH3_family which tends to make constant folded copies of itself. 2202*3117ece4Schristos */ 2203*3117ece4Schristos # define XXH_NO_STREAM 2204*3117ece4Schristos # undef XXH_NO_STREAM /* don't actually */ 2205*3117ece4Schristos #endif /* XXH_DOXYGEN */ 2206*3117ece4Schristos /*! 2207*3117ece4Schristos * @} 2208*3117ece4Schristos */ 2209*3117ece4Schristos 2210*3117ece4Schristos #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ 2211*3117ece4Schristos /* prefer __packed__ structures (method 1) for GCC 2212*3117ece4Schristos * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy 2213*3117ece4Schristos * which for some reason does unaligned loads. */ 2214*3117ece4Schristos # if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED)) 2215*3117ece4Schristos # define XXH_FORCE_MEMORY_ACCESS 1 2216*3117ece4Schristos # endif 2217*3117ece4Schristos #endif 2218*3117ece4Schristos 2219*3117ece4Schristos #ifndef XXH_SIZE_OPT 2220*3117ece4Schristos /* default to 1 for -Os or -Oz */ 2221*3117ece4Schristos # if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__) 2222*3117ece4Schristos # define XXH_SIZE_OPT 1 2223*3117ece4Schristos # else 2224*3117ece4Schristos # define XXH_SIZE_OPT 0 2225*3117ece4Schristos # endif 2226*3117ece4Schristos #endif 2227*3117ece4Schristos 2228*3117ece4Schristos #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ 2229*3117ece4Schristos /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */ 2230*3117ece4Schristos # if XXH_SIZE_OPT >= 1 || \ 2231*3117ece4Schristos defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \ 2232*3117ece4Schristos || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM) /* visual */ 2233*3117ece4Schristos # define XXH_FORCE_ALIGN_CHECK 0 2234*3117ece4Schristos # else 2235*3117ece4Schristos # define XXH_FORCE_ALIGN_CHECK 1 2236*3117ece4Schristos # endif 2237*3117ece4Schristos #endif 2238*3117ece4Schristos 2239*3117ece4Schristos #ifndef XXH_NO_INLINE_HINTS 2240*3117ece4Schristos # if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__) /* -O0, -fno-inline */ 2241*3117ece4Schristos # define XXH_NO_INLINE_HINTS 1 2242*3117ece4Schristos # else 2243*3117ece4Schristos # define XXH_NO_INLINE_HINTS 0 2244*3117ece4Schristos # endif 2245*3117ece4Schristos #endif 2246*3117ece4Schristos 2247*3117ece4Schristos #ifndef XXH3_INLINE_SECRET 2248*3117ece4Schristos # if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \ 2249*3117ece4Schristos || !defined(XXH_INLINE_ALL) 2250*3117ece4Schristos # define XXH3_INLINE_SECRET 0 2251*3117ece4Schristos # else 2252*3117ece4Schristos # define XXH3_INLINE_SECRET 1 2253*3117ece4Schristos # endif 2254*3117ece4Schristos #endif 2255*3117ece4Schristos 2256*3117ece4Schristos #ifndef XXH32_ENDJMP 2257*3117ece4Schristos /* generally preferable for performance */ 2258*3117ece4Schristos # define XXH32_ENDJMP 0 2259*3117ece4Schristos #endif 2260*3117ece4Schristos 2261*3117ece4Schristos /*! 2262*3117ece4Schristos * @defgroup impl Implementation 2263*3117ece4Schristos * @{ 2264*3117ece4Schristos */ 2265*3117ece4Schristos 2266*3117ece4Schristos 2267*3117ece4Schristos /* ************************************* 2268*3117ece4Schristos * Includes & Memory related functions 2269*3117ece4Schristos ***************************************/ 2270*3117ece4Schristos #if defined(XXH_NO_STREAM) 2271*3117ece4Schristos /* nothing */ 2272*3117ece4Schristos #elif defined(XXH_NO_STDLIB) 2273*3117ece4Schristos 2274*3117ece4Schristos /* When requesting to disable any mention of stdlib, 2275*3117ece4Schristos * the library loses the ability to invoked malloc / free. 2276*3117ece4Schristos * In practice, it means that functions like `XXH*_createState()` 2277*3117ece4Schristos * will always fail, and return NULL. 2278*3117ece4Schristos * This flag is useful in situations where 2279*3117ece4Schristos * xxhash.h is integrated into some kernel, embedded or limited environment 2280*3117ece4Schristos * without access to dynamic allocation. 2281*3117ece4Schristos */ 2282*3117ece4Schristos 2283*3117ece4Schristos static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; } 2284*3117ece4Schristos static void XXH_free(void* p) { (void)p; } 2285*3117ece4Schristos 2286*3117ece4Schristos #else 2287*3117ece4Schristos 2288*3117ece4Schristos /* 2289*3117ece4Schristos * Modify the local functions below should you wish to use 2290*3117ece4Schristos * different memory routines for malloc() and free() 2291*3117ece4Schristos */ 2292*3117ece4Schristos #include <stdlib.h> 2293*3117ece4Schristos 2294*3117ece4Schristos /*! 2295*3117ece4Schristos * @internal 2296*3117ece4Schristos * @brief Modify this function to use a different routine than malloc(). 2297*3117ece4Schristos */ 2298*3117ece4Schristos static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); } 2299*3117ece4Schristos 2300*3117ece4Schristos /*! 2301*3117ece4Schristos * @internal 2302*3117ece4Schristos * @brief Modify this function to use a different routine than free(). 2303*3117ece4Schristos */ 2304*3117ece4Schristos static void XXH_free(void* p) { free(p); } 2305*3117ece4Schristos 2306*3117ece4Schristos #endif /* XXH_NO_STDLIB */ 2307*3117ece4Schristos 2308*3117ece4Schristos #include <string.h> 2309*3117ece4Schristos 2310*3117ece4Schristos /*! 2311*3117ece4Schristos * @internal 2312*3117ece4Schristos * @brief Modify this function to use a different routine than memcpy(). 2313*3117ece4Schristos */ 2314*3117ece4Schristos static void* XXH_memcpy(void* dest, const void* src, size_t size) 2315*3117ece4Schristos { 2316*3117ece4Schristos return memcpy(dest,src,size); 2317*3117ece4Schristos } 2318*3117ece4Schristos 2319*3117ece4Schristos #include <limits.h> /* ULLONG_MAX */ 2320*3117ece4Schristos 2321*3117ece4Schristos 2322*3117ece4Schristos /* ************************************* 2323*3117ece4Schristos * Compiler Specific Options 2324*3117ece4Schristos ***************************************/ 2325*3117ece4Schristos #ifdef _MSC_VER /* Visual Studio warning fix */ 2326*3117ece4Schristos # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 2327*3117ece4Schristos #endif 2328*3117ece4Schristos 2329*3117ece4Schristos #if XXH_NO_INLINE_HINTS /* disable inlining hints */ 2330*3117ece4Schristos # if defined(__GNUC__) || defined(__clang__) 2331*3117ece4Schristos # define XXH_FORCE_INLINE static __attribute__((unused)) 2332*3117ece4Schristos # else 2333*3117ece4Schristos # define XXH_FORCE_INLINE static 2334*3117ece4Schristos # endif 2335*3117ece4Schristos # define XXH_NO_INLINE static 2336*3117ece4Schristos /* enable inlining hints */ 2337*3117ece4Schristos #elif defined(__GNUC__) || defined(__clang__) 2338*3117ece4Schristos # define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused)) 2339*3117ece4Schristos # define XXH_NO_INLINE static __attribute__((noinline)) 2340*3117ece4Schristos #elif defined(_MSC_VER) /* Visual Studio */ 2341*3117ece4Schristos # define XXH_FORCE_INLINE static __forceinline 2342*3117ece4Schristos # define XXH_NO_INLINE static __declspec(noinline) 2343*3117ece4Schristos #elif defined (__cplusplus) \ 2344*3117ece4Schristos || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */ 2345*3117ece4Schristos # define XXH_FORCE_INLINE static inline 2346*3117ece4Schristos # define XXH_NO_INLINE static 2347*3117ece4Schristos #else 2348*3117ece4Schristos # define XXH_FORCE_INLINE static 2349*3117ece4Schristos # define XXH_NO_INLINE static 2350*3117ece4Schristos #endif 2351*3117ece4Schristos 2352*3117ece4Schristos #if XXH3_INLINE_SECRET 2353*3117ece4Schristos # define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE 2354*3117ece4Schristos #else 2355*3117ece4Schristos # define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE 2356*3117ece4Schristos #endif 2357*3117ece4Schristos 2358*3117ece4Schristos 2359*3117ece4Schristos /* ************************************* 2360*3117ece4Schristos * Debug 2361*3117ece4Schristos ***************************************/ 2362*3117ece4Schristos /*! 2363*3117ece4Schristos * @ingroup tuning 2364*3117ece4Schristos * @def XXH_DEBUGLEVEL 2365*3117ece4Schristos * @brief Sets the debugging level. 2366*3117ece4Schristos * 2367*3117ece4Schristos * XXH_DEBUGLEVEL is expected to be defined externally, typically via the 2368*3117ece4Schristos * compiler's command line options. The value must be a number. 2369*3117ece4Schristos */ 2370*3117ece4Schristos #ifndef XXH_DEBUGLEVEL 2371*3117ece4Schristos # ifdef DEBUGLEVEL /* backwards compat */ 2372*3117ece4Schristos # define XXH_DEBUGLEVEL DEBUGLEVEL 2373*3117ece4Schristos # else 2374*3117ece4Schristos # define XXH_DEBUGLEVEL 0 2375*3117ece4Schristos # endif 2376*3117ece4Schristos #endif 2377*3117ece4Schristos 2378*3117ece4Schristos #if (XXH_DEBUGLEVEL>=1) 2379*3117ece4Schristos # include <assert.h> /* note: can still be disabled with NDEBUG */ 2380*3117ece4Schristos # define XXH_ASSERT(c) assert(c) 2381*3117ece4Schristos #else 2382*3117ece4Schristos # if defined(__INTEL_COMPILER) 2383*3117ece4Schristos # define XXH_ASSERT(c) XXH_ASSUME((unsigned char) (c)) 2384*3117ece4Schristos # else 2385*3117ece4Schristos # define XXH_ASSERT(c) XXH_ASSUME(c) 2386*3117ece4Schristos # endif 2387*3117ece4Schristos #endif 2388*3117ece4Schristos 2389*3117ece4Schristos /* note: use after variable declarations */ 2390*3117ece4Schristos #ifndef XXH_STATIC_ASSERT 2391*3117ece4Schristos # if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */ 2392*3117ece4Schristos # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0) 2393*3117ece4Schristos # elif defined(__cplusplus) && (__cplusplus >= 201103L) /* C++11 */ 2394*3117ece4Schristos # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0) 2395*3117ece4Schristos # else 2396*3117ece4Schristos # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0) 2397*3117ece4Schristos # endif 2398*3117ece4Schristos # define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c) 2399*3117ece4Schristos #endif 2400*3117ece4Schristos 2401*3117ece4Schristos /*! 2402*3117ece4Schristos * @internal 2403*3117ece4Schristos * @def XXH_COMPILER_GUARD(var) 2404*3117ece4Schristos * @brief Used to prevent unwanted optimizations for @p var. 2405*3117ece4Schristos * 2406*3117ece4Schristos * It uses an empty GCC inline assembly statement with a register constraint 2407*3117ece4Schristos * which forces @p var into a general purpose register (eg eax, ebx, ecx 2408*3117ece4Schristos * on x86) and marks it as modified. 2409*3117ece4Schristos * 2410*3117ece4Schristos * This is used in a few places to avoid unwanted autovectorization (e.g. 2411*3117ece4Schristos * XXH32_round()). All vectorization we want is explicit via intrinsics, 2412*3117ece4Schristos * and _usually_ isn't wanted elsewhere. 2413*3117ece4Schristos * 2414*3117ece4Schristos * We also use it to prevent unwanted constant folding for AArch64 in 2415*3117ece4Schristos * XXH3_initCustomSecret_scalar(). 2416*3117ece4Schristos */ 2417*3117ece4Schristos #if defined(__GNUC__) || defined(__clang__) 2418*3117ece4Schristos # define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var)) 2419*3117ece4Schristos #else 2420*3117ece4Schristos # define XXH_COMPILER_GUARD(var) ((void)0) 2421*3117ece4Schristos #endif 2422*3117ece4Schristos 2423*3117ece4Schristos /* Specifically for NEON vectors which use the "w" constraint, on 2424*3117ece4Schristos * Clang. */ 2425*3117ece4Schristos #if defined(__clang__) && defined(__ARM_ARCH) && !defined(__wasm__) 2426*3117ece4Schristos # define XXH_COMPILER_GUARD_CLANG_NEON(var) __asm__("" : "+w" (var)) 2427*3117ece4Schristos #else 2428*3117ece4Schristos # define XXH_COMPILER_GUARD_CLANG_NEON(var) ((void)0) 2429*3117ece4Schristos #endif 2430*3117ece4Schristos 2431*3117ece4Schristos /* ************************************* 2432*3117ece4Schristos * Basic Types 2433*3117ece4Schristos ***************************************/ 2434*3117ece4Schristos #if !defined (__VMS) \ 2435*3117ece4Schristos && (defined (__cplusplus) \ 2436*3117ece4Schristos || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) 2437*3117ece4Schristos # ifdef _AIX 2438*3117ece4Schristos # include <inttypes.h> 2439*3117ece4Schristos # else 2440*3117ece4Schristos # include <stdint.h> 2441*3117ece4Schristos # endif 2442*3117ece4Schristos typedef uint8_t xxh_u8; 2443*3117ece4Schristos #else 2444*3117ece4Schristos typedef unsigned char xxh_u8; 2445*3117ece4Schristos #endif 2446*3117ece4Schristos typedef XXH32_hash_t xxh_u32; 2447*3117ece4Schristos 2448*3117ece4Schristos #ifdef XXH_OLD_NAMES 2449*3117ece4Schristos # warning "XXH_OLD_NAMES is planned to be removed starting v0.9. If the program depends on it, consider moving away from it by employing newer type names directly" 2450*3117ece4Schristos # define BYTE xxh_u8 2451*3117ece4Schristos # define U8 xxh_u8 2452*3117ece4Schristos # define U32 xxh_u32 2453*3117ece4Schristos #endif 2454*3117ece4Schristos 2455*3117ece4Schristos /* *** Memory access *** */ 2456*3117ece4Schristos 2457*3117ece4Schristos /*! 2458*3117ece4Schristos * @internal 2459*3117ece4Schristos * @fn xxh_u32 XXH_read32(const void* ptr) 2460*3117ece4Schristos * @brief Reads an unaligned 32-bit integer from @p ptr in native endianness. 2461*3117ece4Schristos * 2462*3117ece4Schristos * Affected by @ref XXH_FORCE_MEMORY_ACCESS. 2463*3117ece4Schristos * 2464*3117ece4Schristos * @param ptr The pointer to read from. 2465*3117ece4Schristos * @return The 32-bit native endian integer from the bytes at @p ptr. 2466*3117ece4Schristos */ 2467*3117ece4Schristos 2468*3117ece4Schristos /*! 2469*3117ece4Schristos * @internal 2470*3117ece4Schristos * @fn xxh_u32 XXH_readLE32(const void* ptr) 2471*3117ece4Schristos * @brief Reads an unaligned 32-bit little endian integer from @p ptr. 2472*3117ece4Schristos * 2473*3117ece4Schristos * Affected by @ref XXH_FORCE_MEMORY_ACCESS. 2474*3117ece4Schristos * 2475*3117ece4Schristos * @param ptr The pointer to read from. 2476*3117ece4Schristos * @return The 32-bit little endian integer from the bytes at @p ptr. 2477*3117ece4Schristos */ 2478*3117ece4Schristos 2479*3117ece4Schristos /*! 2480*3117ece4Schristos * @internal 2481*3117ece4Schristos * @fn xxh_u32 XXH_readBE32(const void* ptr) 2482*3117ece4Schristos * @brief Reads an unaligned 32-bit big endian integer from @p ptr. 2483*3117ece4Schristos * 2484*3117ece4Schristos * Affected by @ref XXH_FORCE_MEMORY_ACCESS. 2485*3117ece4Schristos * 2486*3117ece4Schristos * @param ptr The pointer to read from. 2487*3117ece4Schristos * @return The 32-bit big endian integer from the bytes at @p ptr. 2488*3117ece4Schristos */ 2489*3117ece4Schristos 2490*3117ece4Schristos /*! 2491*3117ece4Schristos * @internal 2492*3117ece4Schristos * @fn xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align) 2493*3117ece4Schristos * @brief Like @ref XXH_readLE32(), but has an option for aligned reads. 2494*3117ece4Schristos * 2495*3117ece4Schristos * Affected by @ref XXH_FORCE_MEMORY_ACCESS. 2496*3117ece4Schristos * Note that when @ref XXH_FORCE_ALIGN_CHECK == 0, the @p align parameter is 2497*3117ece4Schristos * always @ref XXH_alignment::XXH_unaligned. 2498*3117ece4Schristos * 2499*3117ece4Schristos * @param ptr The pointer to read from. 2500*3117ece4Schristos * @param align Whether @p ptr is aligned. 2501*3117ece4Schristos * @pre 2502*3117ece4Schristos * If @p align == @ref XXH_alignment::XXH_aligned, @p ptr must be 4 byte 2503*3117ece4Schristos * aligned. 2504*3117ece4Schristos * @return The 32-bit little endian integer from the bytes at @p ptr. 2505*3117ece4Schristos */ 2506*3117ece4Schristos 2507*3117ece4Schristos #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) 2508*3117ece4Schristos /* 2509*3117ece4Schristos * Manual byteshift. Best for old compilers which don't inline memcpy. 2510*3117ece4Schristos * We actually directly use XXH_readLE32 and XXH_readBE32. 2511*3117ece4Schristos */ 2512*3117ece4Schristos #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) 2513*3117ece4Schristos 2514*3117ece4Schristos /* 2515*3117ece4Schristos * Force direct memory access. Only works on CPU which support unaligned memory 2516*3117ece4Schristos * access in hardware. 2517*3117ece4Schristos */ 2518*3117ece4Schristos static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; } 2519*3117ece4Schristos 2520*3117ece4Schristos #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) 2521*3117ece4Schristos 2522*3117ece4Schristos /* 2523*3117ece4Schristos * __attribute__((aligned(1))) is supported by gcc and clang. Originally the 2524*3117ece4Schristos * documentation claimed that it only increased the alignment, but actually it 2525*3117ece4Schristos * can decrease it on gcc, clang, and icc: 2526*3117ece4Schristos * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502, 2527*3117ece4Schristos * https://gcc.godbolt.org/z/xYez1j67Y. 2528*3117ece4Schristos */ 2529*3117ece4Schristos #ifdef XXH_OLD_NAMES 2530*3117ece4Schristos typedef union { xxh_u32 u32; } __attribute__((packed)) unalign; 2531*3117ece4Schristos #endif 2532*3117ece4Schristos static xxh_u32 XXH_read32(const void* ptr) 2533*3117ece4Schristos { 2534*3117ece4Schristos typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32; 2535*3117ece4Schristos return *((const xxh_unalign32*)ptr); 2536*3117ece4Schristos } 2537*3117ece4Schristos 2538*3117ece4Schristos #else 2539*3117ece4Schristos 2540*3117ece4Schristos /* 2541*3117ece4Schristos * Portable and safe solution. Generally efficient. 2542*3117ece4Schristos * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html 2543*3117ece4Schristos */ 2544*3117ece4Schristos static xxh_u32 XXH_read32(const void* memPtr) 2545*3117ece4Schristos { 2546*3117ece4Schristos xxh_u32 val; 2547*3117ece4Schristos XXH_memcpy(&val, memPtr, sizeof(val)); 2548*3117ece4Schristos return val; 2549*3117ece4Schristos } 2550*3117ece4Schristos 2551*3117ece4Schristos #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ 2552*3117ece4Schristos 2553*3117ece4Schristos 2554*3117ece4Schristos /* *** Endianness *** */ 2555*3117ece4Schristos 2556*3117ece4Schristos /*! 2557*3117ece4Schristos * @ingroup tuning 2558*3117ece4Schristos * @def XXH_CPU_LITTLE_ENDIAN 2559*3117ece4Schristos * @brief Whether the target is little endian. 2560*3117ece4Schristos * 2561*3117ece4Schristos * Defined to 1 if the target is little endian, or 0 if it is big endian. 2562*3117ece4Schristos * It can be defined externally, for example on the compiler command line. 2563*3117ece4Schristos * 2564*3117ece4Schristos * If it is not defined, 2565*3117ece4Schristos * a runtime check (which is usually constant folded) is used instead. 2566*3117ece4Schristos * 2567*3117ece4Schristos * @note 2568*3117ece4Schristos * This is not necessarily defined to an integer constant. 2569*3117ece4Schristos * 2570*3117ece4Schristos * @see XXH_isLittleEndian() for the runtime check. 2571*3117ece4Schristos */ 2572*3117ece4Schristos #ifndef XXH_CPU_LITTLE_ENDIAN 2573*3117ece4Schristos /* 2574*3117ece4Schristos * Try to detect endianness automatically, to avoid the nonstandard behavior 2575*3117ece4Schristos * in `XXH_isLittleEndian()` 2576*3117ece4Schristos */ 2577*3117ece4Schristos # if defined(_WIN32) /* Windows is always little endian */ \ 2578*3117ece4Schristos || defined(__LITTLE_ENDIAN__) \ 2579*3117ece4Schristos || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) 2580*3117ece4Schristos # define XXH_CPU_LITTLE_ENDIAN 1 2581*3117ece4Schristos # elif defined(__BIG_ENDIAN__) \ 2582*3117ece4Schristos || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) 2583*3117ece4Schristos # define XXH_CPU_LITTLE_ENDIAN 0 2584*3117ece4Schristos # else 2585*3117ece4Schristos /*! 2586*3117ece4Schristos * @internal 2587*3117ece4Schristos * @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN. 2588*3117ece4Schristos * 2589*3117ece4Schristos * Most compilers will constant fold this. 2590*3117ece4Schristos */ 2591*3117ece4Schristos static int XXH_isLittleEndian(void) 2592*3117ece4Schristos { 2593*3117ece4Schristos /* 2594*3117ece4Schristos * Portable and well-defined behavior. 2595*3117ece4Schristos * Don't use static: it is detrimental to performance. 2596*3117ece4Schristos */ 2597*3117ece4Schristos const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 }; 2598*3117ece4Schristos return one.c[0]; 2599*3117ece4Schristos } 2600*3117ece4Schristos # define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() 2601*3117ece4Schristos # endif 2602*3117ece4Schristos #endif 2603*3117ece4Schristos 2604*3117ece4Schristos 2605*3117ece4Schristos 2606*3117ece4Schristos 2607*3117ece4Schristos /* **************************************** 2608*3117ece4Schristos * Compiler-specific Functions and Macros 2609*3117ece4Schristos ******************************************/ 2610*3117ece4Schristos #define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 2611*3117ece4Schristos 2612*3117ece4Schristos #ifdef __has_builtin 2613*3117ece4Schristos # define XXH_HAS_BUILTIN(x) __has_builtin(x) 2614*3117ece4Schristos #else 2615*3117ece4Schristos # define XXH_HAS_BUILTIN(x) 0 2616*3117ece4Schristos #endif 2617*3117ece4Schristos 2618*3117ece4Schristos 2619*3117ece4Schristos 2620*3117ece4Schristos /* 2621*3117ece4Schristos * C23 and future versions have standard "unreachable()". 2622*3117ece4Schristos * Once it has been implemented reliably we can add it as an 2623*3117ece4Schristos * additional case: 2624*3117ece4Schristos * 2625*3117ece4Schristos * ``` 2626*3117ece4Schristos * #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) 2627*3117ece4Schristos * # include <stddef.h> 2628*3117ece4Schristos * # ifdef unreachable 2629*3117ece4Schristos * # define XXH_UNREACHABLE() unreachable() 2630*3117ece4Schristos * # endif 2631*3117ece4Schristos * #endif 2632*3117ece4Schristos * ``` 2633*3117ece4Schristos * 2634*3117ece4Schristos * Note C++23 also has std::unreachable() which can be detected 2635*3117ece4Schristos * as follows: 2636*3117ece4Schristos * ``` 2637*3117ece4Schristos * #if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L) 2638*3117ece4Schristos * # include <utility> 2639*3117ece4Schristos * # define XXH_UNREACHABLE() std::unreachable() 2640*3117ece4Schristos * #endif 2641*3117ece4Schristos * ``` 2642*3117ece4Schristos * NB: `__cpp_lib_unreachable` is defined in the `<version>` header. 2643*3117ece4Schristos * We don't use that as including `<utility>` in `extern "C"` blocks 2644*3117ece4Schristos * doesn't work on GCC12 2645*3117ece4Schristos */ 2646*3117ece4Schristos 2647*3117ece4Schristos #if XXH_HAS_BUILTIN(__builtin_unreachable) 2648*3117ece4Schristos # define XXH_UNREACHABLE() __builtin_unreachable() 2649*3117ece4Schristos 2650*3117ece4Schristos #elif defined(_MSC_VER) 2651*3117ece4Schristos # define XXH_UNREACHABLE() __assume(0) 2652*3117ece4Schristos 2653*3117ece4Schristos #else 2654*3117ece4Schristos # define XXH_UNREACHABLE() 2655*3117ece4Schristos #endif 2656*3117ece4Schristos 2657*3117ece4Schristos #if XXH_HAS_BUILTIN(__builtin_assume) 2658*3117ece4Schristos # define XXH_ASSUME(c) __builtin_assume(c) 2659*3117ece4Schristos #else 2660*3117ece4Schristos # define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); } 2661*3117ece4Schristos #endif 2662*3117ece4Schristos 2663*3117ece4Schristos /*! 2664*3117ece4Schristos * @internal 2665*3117ece4Schristos * @def XXH_rotl32(x,r) 2666*3117ece4Schristos * @brief 32-bit rotate left. 2667*3117ece4Schristos * 2668*3117ece4Schristos * @param x The 32-bit integer to be rotated. 2669*3117ece4Schristos * @param r The number of bits to rotate. 2670*3117ece4Schristos * @pre 2671*3117ece4Schristos * @p r > 0 && @p r < 32 2672*3117ece4Schristos * @note 2673*3117ece4Schristos * @p x and @p r may be evaluated multiple times. 2674*3117ece4Schristos * @return The rotated result. 2675*3117ece4Schristos */ 2676*3117ece4Schristos #if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \ 2677*3117ece4Schristos && XXH_HAS_BUILTIN(__builtin_rotateleft64) 2678*3117ece4Schristos # define XXH_rotl32 __builtin_rotateleft32 2679*3117ece4Schristos # define XXH_rotl64 __builtin_rotateleft64 2680*3117ece4Schristos /* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */ 2681*3117ece4Schristos #elif defined(_MSC_VER) 2682*3117ece4Schristos # define XXH_rotl32(x,r) _rotl(x,r) 2683*3117ece4Schristos # define XXH_rotl64(x,r) _rotl64(x,r) 2684*3117ece4Schristos #else 2685*3117ece4Schristos # define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) 2686*3117ece4Schristos # define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r)))) 2687*3117ece4Schristos #endif 2688*3117ece4Schristos 2689*3117ece4Schristos /*! 2690*3117ece4Schristos * @internal 2691*3117ece4Schristos * @fn xxh_u32 XXH_swap32(xxh_u32 x) 2692*3117ece4Schristos * @brief A 32-bit byteswap. 2693*3117ece4Schristos * 2694*3117ece4Schristos * @param x The 32-bit integer to byteswap. 2695*3117ece4Schristos * @return @p x, byteswapped. 2696*3117ece4Schristos */ 2697*3117ece4Schristos #if defined(_MSC_VER) /* Visual Studio */ 2698*3117ece4Schristos # define XXH_swap32 _byteswap_ulong 2699*3117ece4Schristos #elif XXH_GCC_VERSION >= 403 2700*3117ece4Schristos # define XXH_swap32 __builtin_bswap32 2701*3117ece4Schristos #else 2702*3117ece4Schristos static xxh_u32 XXH_swap32 (xxh_u32 x) 2703*3117ece4Schristos { 2704*3117ece4Schristos return ((x << 24) & 0xff000000 ) | 2705*3117ece4Schristos ((x << 8) & 0x00ff0000 ) | 2706*3117ece4Schristos ((x >> 8) & 0x0000ff00 ) | 2707*3117ece4Schristos ((x >> 24) & 0x000000ff ); 2708*3117ece4Schristos } 2709*3117ece4Schristos #endif 2710*3117ece4Schristos 2711*3117ece4Schristos 2712*3117ece4Schristos /* *************************** 2713*3117ece4Schristos * Memory reads 2714*3117ece4Schristos *****************************/ 2715*3117ece4Schristos 2716*3117ece4Schristos /*! 2717*3117ece4Schristos * @internal 2718*3117ece4Schristos * @brief Enum to indicate whether a pointer is aligned. 2719*3117ece4Schristos */ 2720*3117ece4Schristos typedef enum { 2721*3117ece4Schristos XXH_aligned, /*!< Aligned */ 2722*3117ece4Schristos XXH_unaligned /*!< Possibly unaligned */ 2723*3117ece4Schristos } XXH_alignment; 2724*3117ece4Schristos 2725*3117ece4Schristos /* 2726*3117ece4Schristos * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. 2727*3117ece4Schristos * 2728*3117ece4Schristos * This is ideal for older compilers which don't inline memcpy. 2729*3117ece4Schristos */ 2730*3117ece4Schristos #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) 2731*3117ece4Schristos 2732*3117ece4Schristos XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr) 2733*3117ece4Schristos { 2734*3117ece4Schristos const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; 2735*3117ece4Schristos return bytePtr[0] 2736*3117ece4Schristos | ((xxh_u32)bytePtr[1] << 8) 2737*3117ece4Schristos | ((xxh_u32)bytePtr[2] << 16) 2738*3117ece4Schristos | ((xxh_u32)bytePtr[3] << 24); 2739*3117ece4Schristos } 2740*3117ece4Schristos 2741*3117ece4Schristos XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr) 2742*3117ece4Schristos { 2743*3117ece4Schristos const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; 2744*3117ece4Schristos return bytePtr[3] 2745*3117ece4Schristos | ((xxh_u32)bytePtr[2] << 8) 2746*3117ece4Schristos | ((xxh_u32)bytePtr[1] << 16) 2747*3117ece4Schristos | ((xxh_u32)bytePtr[0] << 24); 2748*3117ece4Schristos } 2749*3117ece4Schristos 2750*3117ece4Schristos #else 2751*3117ece4Schristos XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr) 2752*3117ece4Schristos { 2753*3117ece4Schristos return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); 2754*3117ece4Schristos } 2755*3117ece4Schristos 2756*3117ece4Schristos static xxh_u32 XXH_readBE32(const void* ptr) 2757*3117ece4Schristos { 2758*3117ece4Schristos return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); 2759*3117ece4Schristos } 2760*3117ece4Schristos #endif 2761*3117ece4Schristos 2762*3117ece4Schristos XXH_FORCE_INLINE xxh_u32 2763*3117ece4Schristos XXH_readLE32_align(const void* ptr, XXH_alignment align) 2764*3117ece4Schristos { 2765*3117ece4Schristos if (align==XXH_unaligned) { 2766*3117ece4Schristos return XXH_readLE32(ptr); 2767*3117ece4Schristos } else { 2768*3117ece4Schristos return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr); 2769*3117ece4Schristos } 2770*3117ece4Schristos } 2771*3117ece4Schristos 2772*3117ece4Schristos 2773*3117ece4Schristos /* ************************************* 2774*3117ece4Schristos * Misc 2775*3117ece4Schristos ***************************************/ 2776*3117ece4Schristos /*! @ingroup public */ 2777*3117ece4Schristos XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } 2778*3117ece4Schristos 2779*3117ece4Schristos 2780*3117ece4Schristos /* ******************************************************************* 2781*3117ece4Schristos * 32-bit hash functions 2782*3117ece4Schristos *********************************************************************/ 2783*3117ece4Schristos /*! 2784*3117ece4Schristos * @} 2785*3117ece4Schristos * @defgroup XXH32_impl XXH32 implementation 2786*3117ece4Schristos * @ingroup impl 2787*3117ece4Schristos * 2788*3117ece4Schristos * Details on the XXH32 implementation. 2789*3117ece4Schristos * @{ 2790*3117ece4Schristos */ 2791*3117ece4Schristos /* #define instead of static const, to be used as initializers */ 2792*3117ece4Schristos #define XXH_PRIME32_1 0x9E3779B1U /*!< 0b10011110001101110111100110110001 */ 2793*3117ece4Schristos #define XXH_PRIME32_2 0x85EBCA77U /*!< 0b10000101111010111100101001110111 */ 2794*3117ece4Schristos #define XXH_PRIME32_3 0xC2B2AE3DU /*!< 0b11000010101100101010111000111101 */ 2795*3117ece4Schristos #define XXH_PRIME32_4 0x27D4EB2FU /*!< 0b00100111110101001110101100101111 */ 2796*3117ece4Schristos #define XXH_PRIME32_5 0x165667B1U /*!< 0b00010110010101100110011110110001 */ 2797*3117ece4Schristos 2798*3117ece4Schristos #ifdef XXH_OLD_NAMES 2799*3117ece4Schristos # define PRIME32_1 XXH_PRIME32_1 2800*3117ece4Schristos # define PRIME32_2 XXH_PRIME32_2 2801*3117ece4Schristos # define PRIME32_3 XXH_PRIME32_3 2802*3117ece4Schristos # define PRIME32_4 XXH_PRIME32_4 2803*3117ece4Schristos # define PRIME32_5 XXH_PRIME32_5 2804*3117ece4Schristos #endif 2805*3117ece4Schristos 2806*3117ece4Schristos /*! 2807*3117ece4Schristos * @internal 2808*3117ece4Schristos * @brief Normal stripe processing routine. 2809*3117ece4Schristos * 2810*3117ece4Schristos * This shuffles the bits so that any bit from @p input impacts several bits in 2811*3117ece4Schristos * @p acc. 2812*3117ece4Schristos * 2813*3117ece4Schristos * @param acc The accumulator lane. 2814*3117ece4Schristos * @param input The stripe of input to mix. 2815*3117ece4Schristos * @return The mixed accumulator lane. 2816*3117ece4Schristos */ 2817*3117ece4Schristos static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input) 2818*3117ece4Schristos { 2819*3117ece4Schristos acc += input * XXH_PRIME32_2; 2820*3117ece4Schristos acc = XXH_rotl32(acc, 13); 2821*3117ece4Schristos acc *= XXH_PRIME32_1; 2822*3117ece4Schristos #if (defined(__SSE4_1__) || defined(__aarch64__) || defined(__wasm_simd128__)) && !defined(XXH_ENABLE_AUTOVECTORIZE) 2823*3117ece4Schristos /* 2824*3117ece4Schristos * UGLY HACK: 2825*3117ece4Schristos * A compiler fence is the only thing that prevents GCC and Clang from 2826*3117ece4Schristos * autovectorizing the XXH32 loop (pragmas and attributes don't work for some 2827*3117ece4Schristos * reason) without globally disabling SSE4.1. 2828*3117ece4Schristos * 2829*3117ece4Schristos * The reason we want to avoid vectorization is because despite working on 2830*3117ece4Schristos * 4 integers at a time, there are multiple factors slowing XXH32 down on 2831*3117ece4Schristos * SSE4: 2832*3117ece4Schristos * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on 2833*3117ece4Schristos * newer chips!) making it slightly slower to multiply four integers at 2834*3117ece4Schristos * once compared to four integers independently. Even when pmulld was 2835*3117ece4Schristos * fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE 2836*3117ece4Schristos * just to multiply unless doing a long operation. 2837*3117ece4Schristos * 2838*3117ece4Schristos * - Four instructions are required to rotate, 2839*3117ece4Schristos * movqda tmp, v // not required with VEX encoding 2840*3117ece4Schristos * pslld tmp, 13 // tmp <<= 13 2841*3117ece4Schristos * psrld v, 19 // x >>= 19 2842*3117ece4Schristos * por v, tmp // x |= tmp 2843*3117ece4Schristos * compared to one for scalar: 2844*3117ece4Schristos * roll v, 13 // reliably fast across the board 2845*3117ece4Schristos * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason 2846*3117ece4Schristos * 2847*3117ece4Schristos * - Instruction level parallelism is actually more beneficial here because 2848*3117ece4Schristos * the SIMD actually serializes this operation: While v1 is rotating, v2 2849*3117ece4Schristos * can load data, while v3 can multiply. SSE forces them to operate 2850*3117ece4Schristos * together. 2851*3117ece4Schristos * 2852*3117ece4Schristos * This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing 2853*3117ece4Schristos * the loop. NEON is only faster on the A53, and with the newer cores, it is less 2854*3117ece4Schristos * than half the speed. 2855*3117ece4Schristos * 2856*3117ece4Schristos * Additionally, this is used on WASM SIMD128 because it JITs to the same 2857*3117ece4Schristos * SIMD instructions and has the same issue. 2858*3117ece4Schristos */ 2859*3117ece4Schristos XXH_COMPILER_GUARD(acc); 2860*3117ece4Schristos #endif 2861*3117ece4Schristos return acc; 2862*3117ece4Schristos } 2863*3117ece4Schristos 2864*3117ece4Schristos /*! 2865*3117ece4Schristos * @internal 2866*3117ece4Schristos * @brief Mixes all bits to finalize the hash. 2867*3117ece4Schristos * 2868*3117ece4Schristos * The final mix ensures that all input bits have a chance to impact any bit in 2869*3117ece4Schristos * the output digest, resulting in an unbiased distribution. 2870*3117ece4Schristos * 2871*3117ece4Schristos * @param hash The hash to avalanche. 2872*3117ece4Schristos * @return The avalanched hash. 2873*3117ece4Schristos */ 2874*3117ece4Schristos static xxh_u32 XXH32_avalanche(xxh_u32 hash) 2875*3117ece4Schristos { 2876*3117ece4Schristos hash ^= hash >> 15; 2877*3117ece4Schristos hash *= XXH_PRIME32_2; 2878*3117ece4Schristos hash ^= hash >> 13; 2879*3117ece4Schristos hash *= XXH_PRIME32_3; 2880*3117ece4Schristos hash ^= hash >> 16; 2881*3117ece4Schristos return hash; 2882*3117ece4Schristos } 2883*3117ece4Schristos 2884*3117ece4Schristos #define XXH_get32bits(p) XXH_readLE32_align(p, align) 2885*3117ece4Schristos 2886*3117ece4Schristos /*! 2887*3117ece4Schristos * @internal 2888*3117ece4Schristos * @brief Processes the last 0-15 bytes of @p ptr. 2889*3117ece4Schristos * 2890*3117ece4Schristos * There may be up to 15 bytes remaining to consume from the input. 2891*3117ece4Schristos * This final stage will digest them to ensure that all input bytes are present 2892*3117ece4Schristos * in the final mix. 2893*3117ece4Schristos * 2894*3117ece4Schristos * @param hash The hash to finalize. 2895*3117ece4Schristos * @param ptr The pointer to the remaining input. 2896*3117ece4Schristos * @param len The remaining length, modulo 16. 2897*3117ece4Schristos * @param align Whether @p ptr is aligned. 2898*3117ece4Schristos * @return The finalized hash. 2899*3117ece4Schristos * @see XXH64_finalize(). 2900*3117ece4Schristos */ 2901*3117ece4Schristos static XXH_PUREF xxh_u32 2902*3117ece4Schristos XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align) 2903*3117ece4Schristos { 2904*3117ece4Schristos #define XXH_PROCESS1 do { \ 2905*3117ece4Schristos hash += (*ptr++) * XXH_PRIME32_5; \ 2906*3117ece4Schristos hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1; \ 2907*3117ece4Schristos } while (0) 2908*3117ece4Schristos 2909*3117ece4Schristos #define XXH_PROCESS4 do { \ 2910*3117ece4Schristos hash += XXH_get32bits(ptr) * XXH_PRIME32_3; \ 2911*3117ece4Schristos ptr += 4; \ 2912*3117ece4Schristos hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4; \ 2913*3117ece4Schristos } while (0) 2914*3117ece4Schristos 2915*3117ece4Schristos if (ptr==NULL) XXH_ASSERT(len == 0); 2916*3117ece4Schristos 2917*3117ece4Schristos /* Compact rerolled version; generally faster */ 2918*3117ece4Schristos if (!XXH32_ENDJMP) { 2919*3117ece4Schristos len &= 15; 2920*3117ece4Schristos while (len >= 4) { 2921*3117ece4Schristos XXH_PROCESS4; 2922*3117ece4Schristos len -= 4; 2923*3117ece4Schristos } 2924*3117ece4Schristos while (len > 0) { 2925*3117ece4Schristos XXH_PROCESS1; 2926*3117ece4Schristos --len; 2927*3117ece4Schristos } 2928*3117ece4Schristos return XXH32_avalanche(hash); 2929*3117ece4Schristos } else { 2930*3117ece4Schristos switch(len&15) /* or switch(bEnd - p) */ { 2931*3117ece4Schristos case 12: XXH_PROCESS4; 2932*3117ece4Schristos XXH_FALLTHROUGH; /* fallthrough */ 2933*3117ece4Schristos case 8: XXH_PROCESS4; 2934*3117ece4Schristos XXH_FALLTHROUGH; /* fallthrough */ 2935*3117ece4Schristos case 4: XXH_PROCESS4; 2936*3117ece4Schristos return XXH32_avalanche(hash); 2937*3117ece4Schristos 2938*3117ece4Schristos case 13: XXH_PROCESS4; 2939*3117ece4Schristos XXH_FALLTHROUGH; /* fallthrough */ 2940*3117ece4Schristos case 9: XXH_PROCESS4; 2941*3117ece4Schristos XXH_FALLTHROUGH; /* fallthrough */ 2942*3117ece4Schristos case 5: XXH_PROCESS4; 2943*3117ece4Schristos XXH_PROCESS1; 2944*3117ece4Schristos return XXH32_avalanche(hash); 2945*3117ece4Schristos 2946*3117ece4Schristos case 14: XXH_PROCESS4; 2947*3117ece4Schristos XXH_FALLTHROUGH; /* fallthrough */ 2948*3117ece4Schristos case 10: XXH_PROCESS4; 2949*3117ece4Schristos XXH_FALLTHROUGH; /* fallthrough */ 2950*3117ece4Schristos case 6: XXH_PROCESS4; 2951*3117ece4Schristos XXH_PROCESS1; 2952*3117ece4Schristos XXH_PROCESS1; 2953*3117ece4Schristos return XXH32_avalanche(hash); 2954*3117ece4Schristos 2955*3117ece4Schristos case 15: XXH_PROCESS4; 2956*3117ece4Schristos XXH_FALLTHROUGH; /* fallthrough */ 2957*3117ece4Schristos case 11: XXH_PROCESS4; 2958*3117ece4Schristos XXH_FALLTHROUGH; /* fallthrough */ 2959*3117ece4Schristos case 7: XXH_PROCESS4; 2960*3117ece4Schristos XXH_FALLTHROUGH; /* fallthrough */ 2961*3117ece4Schristos case 3: XXH_PROCESS1; 2962*3117ece4Schristos XXH_FALLTHROUGH; /* fallthrough */ 2963*3117ece4Schristos case 2: XXH_PROCESS1; 2964*3117ece4Schristos XXH_FALLTHROUGH; /* fallthrough */ 2965*3117ece4Schristos case 1: XXH_PROCESS1; 2966*3117ece4Schristos XXH_FALLTHROUGH; /* fallthrough */ 2967*3117ece4Schristos case 0: return XXH32_avalanche(hash); 2968*3117ece4Schristos } 2969*3117ece4Schristos XXH_ASSERT(0); 2970*3117ece4Schristos return hash; /* reaching this point is deemed impossible */ 2971*3117ece4Schristos } 2972*3117ece4Schristos } 2973*3117ece4Schristos 2974*3117ece4Schristos #ifdef XXH_OLD_NAMES 2975*3117ece4Schristos # define PROCESS1 XXH_PROCESS1 2976*3117ece4Schristos # define PROCESS4 XXH_PROCESS4 2977*3117ece4Schristos #else 2978*3117ece4Schristos # undef XXH_PROCESS1 2979*3117ece4Schristos # undef XXH_PROCESS4 2980*3117ece4Schristos #endif 2981*3117ece4Schristos 2982*3117ece4Schristos /*! 2983*3117ece4Schristos * @internal 2984*3117ece4Schristos * @brief The implementation for @ref XXH32(). 2985*3117ece4Schristos * 2986*3117ece4Schristos * @param input , len , seed Directly passed from @ref XXH32(). 2987*3117ece4Schristos * @param align Whether @p input is aligned. 2988*3117ece4Schristos * @return The calculated hash. 2989*3117ece4Schristos */ 2990*3117ece4Schristos XXH_FORCE_INLINE XXH_PUREF xxh_u32 2991*3117ece4Schristos XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align) 2992*3117ece4Schristos { 2993*3117ece4Schristos xxh_u32 h32; 2994*3117ece4Schristos 2995*3117ece4Schristos if (input==NULL) XXH_ASSERT(len == 0); 2996*3117ece4Schristos 2997*3117ece4Schristos if (len>=16) { 2998*3117ece4Schristos const xxh_u8* const bEnd = input + len; 2999*3117ece4Schristos const xxh_u8* const limit = bEnd - 15; 3000*3117ece4Schristos xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2; 3001*3117ece4Schristos xxh_u32 v2 = seed + XXH_PRIME32_2; 3002*3117ece4Schristos xxh_u32 v3 = seed + 0; 3003*3117ece4Schristos xxh_u32 v4 = seed - XXH_PRIME32_1; 3004*3117ece4Schristos 3005*3117ece4Schristos do { 3006*3117ece4Schristos v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4; 3007*3117ece4Schristos v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4; 3008*3117ece4Schristos v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4; 3009*3117ece4Schristos v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4; 3010*3117ece4Schristos } while (input < limit); 3011*3117ece4Schristos 3012*3117ece4Schristos h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) 3013*3117ece4Schristos + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); 3014*3117ece4Schristos } else { 3015*3117ece4Schristos h32 = seed + XXH_PRIME32_5; 3016*3117ece4Schristos } 3017*3117ece4Schristos 3018*3117ece4Schristos h32 += (xxh_u32)len; 3019*3117ece4Schristos 3020*3117ece4Schristos return XXH32_finalize(h32, input, len&15, align); 3021*3117ece4Schristos } 3022*3117ece4Schristos 3023*3117ece4Schristos /*! @ingroup XXH32_family */ 3024*3117ece4Schristos XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed) 3025*3117ece4Schristos { 3026*3117ece4Schristos #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2 3027*3117ece4Schristos /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ 3028*3117ece4Schristos XXH32_state_t state; 3029*3117ece4Schristos XXH32_reset(&state, seed); 3030*3117ece4Schristos XXH32_update(&state, (const xxh_u8*)input, len); 3031*3117ece4Schristos return XXH32_digest(&state); 3032*3117ece4Schristos #else 3033*3117ece4Schristos if (XXH_FORCE_ALIGN_CHECK) { 3034*3117ece4Schristos if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ 3035*3117ece4Schristos return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); 3036*3117ece4Schristos } } 3037*3117ece4Schristos 3038*3117ece4Schristos return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); 3039*3117ece4Schristos #endif 3040*3117ece4Schristos } 3041*3117ece4Schristos 3042*3117ece4Schristos 3043*3117ece4Schristos 3044*3117ece4Schristos /******* Hash streaming *******/ 3045*3117ece4Schristos #ifndef XXH_NO_STREAM 3046*3117ece4Schristos /*! @ingroup XXH32_family */ 3047*3117ece4Schristos XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) 3048*3117ece4Schristos { 3049*3117ece4Schristos return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); 3050*3117ece4Schristos } 3051*3117ece4Schristos /*! @ingroup XXH32_family */ 3052*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) 3053*3117ece4Schristos { 3054*3117ece4Schristos XXH_free(statePtr); 3055*3117ece4Schristos return XXH_OK; 3056*3117ece4Schristos } 3057*3117ece4Schristos 3058*3117ece4Schristos /*! @ingroup XXH32_family */ 3059*3117ece4Schristos XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState) 3060*3117ece4Schristos { 3061*3117ece4Schristos XXH_memcpy(dstState, srcState, sizeof(*dstState)); 3062*3117ece4Schristos } 3063*3117ece4Schristos 3064*3117ece4Schristos /*! @ingroup XXH32_family */ 3065*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed) 3066*3117ece4Schristos { 3067*3117ece4Schristos XXH_ASSERT(statePtr != NULL); 3068*3117ece4Schristos memset(statePtr, 0, sizeof(*statePtr)); 3069*3117ece4Schristos statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2; 3070*3117ece4Schristos statePtr->v[1] = seed + XXH_PRIME32_2; 3071*3117ece4Schristos statePtr->v[2] = seed + 0; 3072*3117ece4Schristos statePtr->v[3] = seed - XXH_PRIME32_1; 3073*3117ece4Schristos return XXH_OK; 3074*3117ece4Schristos } 3075*3117ece4Schristos 3076*3117ece4Schristos 3077*3117ece4Schristos /*! @ingroup XXH32_family */ 3078*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode 3079*3117ece4Schristos XXH32_update(XXH32_state_t* state, const void* input, size_t len) 3080*3117ece4Schristos { 3081*3117ece4Schristos if (input==NULL) { 3082*3117ece4Schristos XXH_ASSERT(len == 0); 3083*3117ece4Schristos return XXH_OK; 3084*3117ece4Schristos } 3085*3117ece4Schristos 3086*3117ece4Schristos { const xxh_u8* p = (const xxh_u8*)input; 3087*3117ece4Schristos const xxh_u8* const bEnd = p + len; 3088*3117ece4Schristos 3089*3117ece4Schristos state->total_len_32 += (XXH32_hash_t)len; 3090*3117ece4Schristos state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16)); 3091*3117ece4Schristos 3092*3117ece4Schristos if (state->memsize + len < 16) { /* fill in tmp buffer */ 3093*3117ece4Schristos XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len); 3094*3117ece4Schristos state->memsize += (XXH32_hash_t)len; 3095*3117ece4Schristos return XXH_OK; 3096*3117ece4Schristos } 3097*3117ece4Schristos 3098*3117ece4Schristos if (state->memsize) { /* some data left from previous update */ 3099*3117ece4Schristos XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize); 3100*3117ece4Schristos { const xxh_u32* p32 = state->mem32; 3101*3117ece4Schristos state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++; 3102*3117ece4Schristos state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++; 3103*3117ece4Schristos state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++; 3104*3117ece4Schristos state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32)); 3105*3117ece4Schristos } 3106*3117ece4Schristos p += 16-state->memsize; 3107*3117ece4Schristos state->memsize = 0; 3108*3117ece4Schristos } 3109*3117ece4Schristos 3110*3117ece4Schristos if (p <= bEnd-16) { 3111*3117ece4Schristos const xxh_u8* const limit = bEnd - 16; 3112*3117ece4Schristos 3113*3117ece4Schristos do { 3114*3117ece4Schristos state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4; 3115*3117ece4Schristos state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4; 3116*3117ece4Schristos state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4; 3117*3117ece4Schristos state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4; 3118*3117ece4Schristos } while (p<=limit); 3119*3117ece4Schristos 3120*3117ece4Schristos } 3121*3117ece4Schristos 3122*3117ece4Schristos if (p < bEnd) { 3123*3117ece4Schristos XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); 3124*3117ece4Schristos state->memsize = (unsigned)(bEnd-p); 3125*3117ece4Schristos } 3126*3117ece4Schristos } 3127*3117ece4Schristos 3128*3117ece4Schristos return XXH_OK; 3129*3117ece4Schristos } 3130*3117ece4Schristos 3131*3117ece4Schristos 3132*3117ece4Schristos /*! @ingroup XXH32_family */ 3133*3117ece4Schristos XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state) 3134*3117ece4Schristos { 3135*3117ece4Schristos xxh_u32 h32; 3136*3117ece4Schristos 3137*3117ece4Schristos if (state->large_len) { 3138*3117ece4Schristos h32 = XXH_rotl32(state->v[0], 1) 3139*3117ece4Schristos + XXH_rotl32(state->v[1], 7) 3140*3117ece4Schristos + XXH_rotl32(state->v[2], 12) 3141*3117ece4Schristos + XXH_rotl32(state->v[3], 18); 3142*3117ece4Schristos } else { 3143*3117ece4Schristos h32 = state->v[2] /* == seed */ + XXH_PRIME32_5; 3144*3117ece4Schristos } 3145*3117ece4Schristos 3146*3117ece4Schristos h32 += state->total_len_32; 3147*3117ece4Schristos 3148*3117ece4Schristos return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned); 3149*3117ece4Schristos } 3150*3117ece4Schristos #endif /* !XXH_NO_STREAM */ 3151*3117ece4Schristos 3152*3117ece4Schristos /******* Canonical representation *******/ 3153*3117ece4Schristos 3154*3117ece4Schristos /*! @ingroup XXH32_family */ 3155*3117ece4Schristos XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) 3156*3117ece4Schristos { 3157*3117ece4Schristos XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); 3158*3117ece4Schristos if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); 3159*3117ece4Schristos XXH_memcpy(dst, &hash, sizeof(*dst)); 3160*3117ece4Schristos } 3161*3117ece4Schristos /*! @ingroup XXH32_family */ 3162*3117ece4Schristos XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) 3163*3117ece4Schristos { 3164*3117ece4Schristos return XXH_readBE32(src); 3165*3117ece4Schristos } 3166*3117ece4Schristos 3167*3117ece4Schristos 3168*3117ece4Schristos #ifndef XXH_NO_LONG_LONG 3169*3117ece4Schristos 3170*3117ece4Schristos /* ******************************************************************* 3171*3117ece4Schristos * 64-bit hash functions 3172*3117ece4Schristos *********************************************************************/ 3173*3117ece4Schristos /*! 3174*3117ece4Schristos * @} 3175*3117ece4Schristos * @ingroup impl 3176*3117ece4Schristos * @{ 3177*3117ece4Schristos */ 3178*3117ece4Schristos /******* Memory access *******/ 3179*3117ece4Schristos 3180*3117ece4Schristos typedef XXH64_hash_t xxh_u64; 3181*3117ece4Schristos 3182*3117ece4Schristos #ifdef XXH_OLD_NAMES 3183*3117ece4Schristos # define U64 xxh_u64 3184*3117ece4Schristos #endif 3185*3117ece4Schristos 3186*3117ece4Schristos #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) 3187*3117ece4Schristos /* 3188*3117ece4Schristos * Manual byteshift. Best for old compilers which don't inline memcpy. 3189*3117ece4Schristos * We actually directly use XXH_readLE64 and XXH_readBE64. 3190*3117ece4Schristos */ 3191*3117ece4Schristos #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) 3192*3117ece4Schristos 3193*3117ece4Schristos /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ 3194*3117ece4Schristos static xxh_u64 XXH_read64(const void* memPtr) 3195*3117ece4Schristos { 3196*3117ece4Schristos return *(const xxh_u64*) memPtr; 3197*3117ece4Schristos } 3198*3117ece4Schristos 3199*3117ece4Schristos #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) 3200*3117ece4Schristos 3201*3117ece4Schristos /* 3202*3117ece4Schristos * __attribute__((aligned(1))) is supported by gcc and clang. Originally the 3203*3117ece4Schristos * documentation claimed that it only increased the alignment, but actually it 3204*3117ece4Schristos * can decrease it on gcc, clang, and icc: 3205*3117ece4Schristos * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502, 3206*3117ece4Schristos * https://gcc.godbolt.org/z/xYez1j67Y. 3207*3117ece4Schristos */ 3208*3117ece4Schristos #ifdef XXH_OLD_NAMES 3209*3117ece4Schristos typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64; 3210*3117ece4Schristos #endif 3211*3117ece4Schristos static xxh_u64 XXH_read64(const void* ptr) 3212*3117ece4Schristos { 3213*3117ece4Schristos typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64; 3214*3117ece4Schristos return *((const xxh_unalign64*)ptr); 3215*3117ece4Schristos } 3216*3117ece4Schristos 3217*3117ece4Schristos #else 3218*3117ece4Schristos 3219*3117ece4Schristos /* 3220*3117ece4Schristos * Portable and safe solution. Generally efficient. 3221*3117ece4Schristos * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html 3222*3117ece4Schristos */ 3223*3117ece4Schristos static xxh_u64 XXH_read64(const void* memPtr) 3224*3117ece4Schristos { 3225*3117ece4Schristos xxh_u64 val; 3226*3117ece4Schristos XXH_memcpy(&val, memPtr, sizeof(val)); 3227*3117ece4Schristos return val; 3228*3117ece4Schristos } 3229*3117ece4Schristos 3230*3117ece4Schristos #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ 3231*3117ece4Schristos 3232*3117ece4Schristos #if defined(_MSC_VER) /* Visual Studio */ 3233*3117ece4Schristos # define XXH_swap64 _byteswap_uint64 3234*3117ece4Schristos #elif XXH_GCC_VERSION >= 403 3235*3117ece4Schristos # define XXH_swap64 __builtin_bswap64 3236*3117ece4Schristos #else 3237*3117ece4Schristos static xxh_u64 XXH_swap64(xxh_u64 x) 3238*3117ece4Schristos { 3239*3117ece4Schristos return ((x << 56) & 0xff00000000000000ULL) | 3240*3117ece4Schristos ((x << 40) & 0x00ff000000000000ULL) | 3241*3117ece4Schristos ((x << 24) & 0x0000ff0000000000ULL) | 3242*3117ece4Schristos ((x << 8) & 0x000000ff00000000ULL) | 3243*3117ece4Schristos ((x >> 8) & 0x00000000ff000000ULL) | 3244*3117ece4Schristos ((x >> 24) & 0x0000000000ff0000ULL) | 3245*3117ece4Schristos ((x >> 40) & 0x000000000000ff00ULL) | 3246*3117ece4Schristos ((x >> 56) & 0x00000000000000ffULL); 3247*3117ece4Schristos } 3248*3117ece4Schristos #endif 3249*3117ece4Schristos 3250*3117ece4Schristos 3251*3117ece4Schristos /* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */ 3252*3117ece4Schristos #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) 3253*3117ece4Schristos 3254*3117ece4Schristos XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr) 3255*3117ece4Schristos { 3256*3117ece4Schristos const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; 3257*3117ece4Schristos return bytePtr[0] 3258*3117ece4Schristos | ((xxh_u64)bytePtr[1] << 8) 3259*3117ece4Schristos | ((xxh_u64)bytePtr[2] << 16) 3260*3117ece4Schristos | ((xxh_u64)bytePtr[3] << 24) 3261*3117ece4Schristos | ((xxh_u64)bytePtr[4] << 32) 3262*3117ece4Schristos | ((xxh_u64)bytePtr[5] << 40) 3263*3117ece4Schristos | ((xxh_u64)bytePtr[6] << 48) 3264*3117ece4Schristos | ((xxh_u64)bytePtr[7] << 56); 3265*3117ece4Schristos } 3266*3117ece4Schristos 3267*3117ece4Schristos XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr) 3268*3117ece4Schristos { 3269*3117ece4Schristos const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; 3270*3117ece4Schristos return bytePtr[7] 3271*3117ece4Schristos | ((xxh_u64)bytePtr[6] << 8) 3272*3117ece4Schristos | ((xxh_u64)bytePtr[5] << 16) 3273*3117ece4Schristos | ((xxh_u64)bytePtr[4] << 24) 3274*3117ece4Schristos | ((xxh_u64)bytePtr[3] << 32) 3275*3117ece4Schristos | ((xxh_u64)bytePtr[2] << 40) 3276*3117ece4Schristos | ((xxh_u64)bytePtr[1] << 48) 3277*3117ece4Schristos | ((xxh_u64)bytePtr[0] << 56); 3278*3117ece4Schristos } 3279*3117ece4Schristos 3280*3117ece4Schristos #else 3281*3117ece4Schristos XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr) 3282*3117ece4Schristos { 3283*3117ece4Schristos return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); 3284*3117ece4Schristos } 3285*3117ece4Schristos 3286*3117ece4Schristos static xxh_u64 XXH_readBE64(const void* ptr) 3287*3117ece4Schristos { 3288*3117ece4Schristos return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); 3289*3117ece4Schristos } 3290*3117ece4Schristos #endif 3291*3117ece4Schristos 3292*3117ece4Schristos XXH_FORCE_INLINE xxh_u64 3293*3117ece4Schristos XXH_readLE64_align(const void* ptr, XXH_alignment align) 3294*3117ece4Schristos { 3295*3117ece4Schristos if (align==XXH_unaligned) 3296*3117ece4Schristos return XXH_readLE64(ptr); 3297*3117ece4Schristos else 3298*3117ece4Schristos return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr); 3299*3117ece4Schristos } 3300*3117ece4Schristos 3301*3117ece4Schristos 3302*3117ece4Schristos /******* xxh64 *******/ 3303*3117ece4Schristos /*! 3304*3117ece4Schristos * @} 3305*3117ece4Schristos * @defgroup XXH64_impl XXH64 implementation 3306*3117ece4Schristos * @ingroup impl 3307*3117ece4Schristos * 3308*3117ece4Schristos * Details on the XXH64 implementation. 3309*3117ece4Schristos * @{ 3310*3117ece4Schristos */ 3311*3117ece4Schristos /* #define rather that static const, to be used as initializers */ 3312*3117ece4Schristos #define XXH_PRIME64_1 0x9E3779B185EBCA87ULL /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */ 3313*3117ece4Schristos #define XXH_PRIME64_2 0xC2B2AE3D27D4EB4FULL /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */ 3314*3117ece4Schristos #define XXH_PRIME64_3 0x165667B19E3779F9ULL /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */ 3315*3117ece4Schristos #define XXH_PRIME64_4 0x85EBCA77C2B2AE63ULL /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */ 3316*3117ece4Schristos #define XXH_PRIME64_5 0x27D4EB2F165667C5ULL /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */ 3317*3117ece4Schristos 3318*3117ece4Schristos #ifdef XXH_OLD_NAMES 3319*3117ece4Schristos # define PRIME64_1 XXH_PRIME64_1 3320*3117ece4Schristos # define PRIME64_2 XXH_PRIME64_2 3321*3117ece4Schristos # define PRIME64_3 XXH_PRIME64_3 3322*3117ece4Schristos # define PRIME64_4 XXH_PRIME64_4 3323*3117ece4Schristos # define PRIME64_5 XXH_PRIME64_5 3324*3117ece4Schristos #endif 3325*3117ece4Schristos 3326*3117ece4Schristos /*! @copydoc XXH32_round */ 3327*3117ece4Schristos static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input) 3328*3117ece4Schristos { 3329*3117ece4Schristos acc += input * XXH_PRIME64_2; 3330*3117ece4Schristos acc = XXH_rotl64(acc, 31); 3331*3117ece4Schristos acc *= XXH_PRIME64_1; 3332*3117ece4Schristos #if (defined(__AVX512F__)) && !defined(XXH_ENABLE_AUTOVECTORIZE) 3333*3117ece4Schristos /* 3334*3117ece4Schristos * DISABLE AUTOVECTORIZATION: 3335*3117ece4Schristos * A compiler fence is used to prevent GCC and Clang from 3336*3117ece4Schristos * autovectorizing the XXH64 loop (pragmas and attributes don't work for some 3337*3117ece4Schristos * reason) without globally disabling AVX512. 3338*3117ece4Schristos * 3339*3117ece4Schristos * Autovectorization of XXH64 tends to be detrimental, 3340*3117ece4Schristos * though the exact outcome may change depending on exact cpu and compiler version. 3341*3117ece4Schristos * For information, it has been reported as detrimental for Skylake-X, 3342*3117ece4Schristos * but possibly beneficial for Zen4. 3343*3117ece4Schristos * 3344*3117ece4Schristos * The default is to disable auto-vectorization, 3345*3117ece4Schristos * but you can select to enable it instead using `XXH_ENABLE_AUTOVECTORIZE` build variable. 3346*3117ece4Schristos */ 3347*3117ece4Schristos XXH_COMPILER_GUARD(acc); 3348*3117ece4Schristos #endif 3349*3117ece4Schristos return acc; 3350*3117ece4Schristos } 3351*3117ece4Schristos 3352*3117ece4Schristos static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val) 3353*3117ece4Schristos { 3354*3117ece4Schristos val = XXH64_round(0, val); 3355*3117ece4Schristos acc ^= val; 3356*3117ece4Schristos acc = acc * XXH_PRIME64_1 + XXH_PRIME64_4; 3357*3117ece4Schristos return acc; 3358*3117ece4Schristos } 3359*3117ece4Schristos 3360*3117ece4Schristos /*! @copydoc XXH32_avalanche */ 3361*3117ece4Schristos static xxh_u64 XXH64_avalanche(xxh_u64 hash) 3362*3117ece4Schristos { 3363*3117ece4Schristos hash ^= hash >> 33; 3364*3117ece4Schristos hash *= XXH_PRIME64_2; 3365*3117ece4Schristos hash ^= hash >> 29; 3366*3117ece4Schristos hash *= XXH_PRIME64_3; 3367*3117ece4Schristos hash ^= hash >> 32; 3368*3117ece4Schristos return hash; 3369*3117ece4Schristos } 3370*3117ece4Schristos 3371*3117ece4Schristos 3372*3117ece4Schristos #define XXH_get64bits(p) XXH_readLE64_align(p, align) 3373*3117ece4Schristos 3374*3117ece4Schristos /*! 3375*3117ece4Schristos * @internal 3376*3117ece4Schristos * @brief Processes the last 0-31 bytes of @p ptr. 3377*3117ece4Schristos * 3378*3117ece4Schristos * There may be up to 31 bytes remaining to consume from the input. 3379*3117ece4Schristos * This final stage will digest them to ensure that all input bytes are present 3380*3117ece4Schristos * in the final mix. 3381*3117ece4Schristos * 3382*3117ece4Schristos * @param hash The hash to finalize. 3383*3117ece4Schristos * @param ptr The pointer to the remaining input. 3384*3117ece4Schristos * @param len The remaining length, modulo 32. 3385*3117ece4Schristos * @param align Whether @p ptr is aligned. 3386*3117ece4Schristos * @return The finalized hash 3387*3117ece4Schristos * @see XXH32_finalize(). 3388*3117ece4Schristos */ 3389*3117ece4Schristos static XXH_PUREF xxh_u64 3390*3117ece4Schristos XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align) 3391*3117ece4Schristos { 3392*3117ece4Schristos if (ptr==NULL) XXH_ASSERT(len == 0); 3393*3117ece4Schristos len &= 31; 3394*3117ece4Schristos while (len >= 8) { 3395*3117ece4Schristos xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); 3396*3117ece4Schristos ptr += 8; 3397*3117ece4Schristos hash ^= k1; 3398*3117ece4Schristos hash = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4; 3399*3117ece4Schristos len -= 8; 3400*3117ece4Schristos } 3401*3117ece4Schristos if (len >= 4) { 3402*3117ece4Schristos hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1; 3403*3117ece4Schristos ptr += 4; 3404*3117ece4Schristos hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3; 3405*3117ece4Schristos len -= 4; 3406*3117ece4Schristos } 3407*3117ece4Schristos while (len > 0) { 3408*3117ece4Schristos hash ^= (*ptr++) * XXH_PRIME64_5; 3409*3117ece4Schristos hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1; 3410*3117ece4Schristos --len; 3411*3117ece4Schristos } 3412*3117ece4Schristos return XXH64_avalanche(hash); 3413*3117ece4Schristos } 3414*3117ece4Schristos 3415*3117ece4Schristos #ifdef XXH_OLD_NAMES 3416*3117ece4Schristos # define PROCESS1_64 XXH_PROCESS1_64 3417*3117ece4Schristos # define PROCESS4_64 XXH_PROCESS4_64 3418*3117ece4Schristos # define PROCESS8_64 XXH_PROCESS8_64 3419*3117ece4Schristos #else 3420*3117ece4Schristos # undef XXH_PROCESS1_64 3421*3117ece4Schristos # undef XXH_PROCESS4_64 3422*3117ece4Schristos # undef XXH_PROCESS8_64 3423*3117ece4Schristos #endif 3424*3117ece4Schristos 3425*3117ece4Schristos /*! 3426*3117ece4Schristos * @internal 3427*3117ece4Schristos * @brief The implementation for @ref XXH64(). 3428*3117ece4Schristos * 3429*3117ece4Schristos * @param input , len , seed Directly passed from @ref XXH64(). 3430*3117ece4Schristos * @param align Whether @p input is aligned. 3431*3117ece4Schristos * @return The calculated hash. 3432*3117ece4Schristos */ 3433*3117ece4Schristos XXH_FORCE_INLINE XXH_PUREF xxh_u64 3434*3117ece4Schristos XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align) 3435*3117ece4Schristos { 3436*3117ece4Schristos xxh_u64 h64; 3437*3117ece4Schristos if (input==NULL) XXH_ASSERT(len == 0); 3438*3117ece4Schristos 3439*3117ece4Schristos if (len>=32) { 3440*3117ece4Schristos const xxh_u8* const bEnd = input + len; 3441*3117ece4Schristos const xxh_u8* const limit = bEnd - 31; 3442*3117ece4Schristos xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2; 3443*3117ece4Schristos xxh_u64 v2 = seed + XXH_PRIME64_2; 3444*3117ece4Schristos xxh_u64 v3 = seed + 0; 3445*3117ece4Schristos xxh_u64 v4 = seed - XXH_PRIME64_1; 3446*3117ece4Schristos 3447*3117ece4Schristos do { 3448*3117ece4Schristos v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8; 3449*3117ece4Schristos v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8; 3450*3117ece4Schristos v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8; 3451*3117ece4Schristos v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8; 3452*3117ece4Schristos } while (input<limit); 3453*3117ece4Schristos 3454*3117ece4Schristos h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); 3455*3117ece4Schristos h64 = XXH64_mergeRound(h64, v1); 3456*3117ece4Schristos h64 = XXH64_mergeRound(h64, v2); 3457*3117ece4Schristos h64 = XXH64_mergeRound(h64, v3); 3458*3117ece4Schristos h64 = XXH64_mergeRound(h64, v4); 3459*3117ece4Schristos 3460*3117ece4Schristos } else { 3461*3117ece4Schristos h64 = seed + XXH_PRIME64_5; 3462*3117ece4Schristos } 3463*3117ece4Schristos 3464*3117ece4Schristos h64 += (xxh_u64) len; 3465*3117ece4Schristos 3466*3117ece4Schristos return XXH64_finalize(h64, input, len, align); 3467*3117ece4Schristos } 3468*3117ece4Schristos 3469*3117ece4Schristos 3470*3117ece4Schristos /*! @ingroup XXH64_family */ 3471*3117ece4Schristos XXH_PUBLIC_API XXH64_hash_t XXH64 (XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed) 3472*3117ece4Schristos { 3473*3117ece4Schristos #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2 3474*3117ece4Schristos /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ 3475*3117ece4Schristos XXH64_state_t state; 3476*3117ece4Schristos XXH64_reset(&state, seed); 3477*3117ece4Schristos XXH64_update(&state, (const xxh_u8*)input, len); 3478*3117ece4Schristos return XXH64_digest(&state); 3479*3117ece4Schristos #else 3480*3117ece4Schristos if (XXH_FORCE_ALIGN_CHECK) { 3481*3117ece4Schristos if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ 3482*3117ece4Schristos return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); 3483*3117ece4Schristos } } 3484*3117ece4Schristos 3485*3117ece4Schristos return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); 3486*3117ece4Schristos 3487*3117ece4Schristos #endif 3488*3117ece4Schristos } 3489*3117ece4Schristos 3490*3117ece4Schristos /******* Hash Streaming *******/ 3491*3117ece4Schristos #ifndef XXH_NO_STREAM 3492*3117ece4Schristos /*! @ingroup XXH64_family*/ 3493*3117ece4Schristos XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) 3494*3117ece4Schristos { 3495*3117ece4Schristos return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); 3496*3117ece4Schristos } 3497*3117ece4Schristos /*! @ingroup XXH64_family */ 3498*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) 3499*3117ece4Schristos { 3500*3117ece4Schristos XXH_free(statePtr); 3501*3117ece4Schristos return XXH_OK; 3502*3117ece4Schristos } 3503*3117ece4Schristos 3504*3117ece4Schristos /*! @ingroup XXH64_family */ 3505*3117ece4Schristos XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dstState, const XXH64_state_t* srcState) 3506*3117ece4Schristos { 3507*3117ece4Schristos XXH_memcpy(dstState, srcState, sizeof(*dstState)); 3508*3117ece4Schristos } 3509*3117ece4Schristos 3510*3117ece4Schristos /*! @ingroup XXH64_family */ 3511*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed) 3512*3117ece4Schristos { 3513*3117ece4Schristos XXH_ASSERT(statePtr != NULL); 3514*3117ece4Schristos memset(statePtr, 0, sizeof(*statePtr)); 3515*3117ece4Schristos statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2; 3516*3117ece4Schristos statePtr->v[1] = seed + XXH_PRIME64_2; 3517*3117ece4Schristos statePtr->v[2] = seed + 0; 3518*3117ece4Schristos statePtr->v[3] = seed - XXH_PRIME64_1; 3519*3117ece4Schristos return XXH_OK; 3520*3117ece4Schristos } 3521*3117ece4Schristos 3522*3117ece4Schristos /*! @ingroup XXH64_family */ 3523*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode 3524*3117ece4Schristos XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len) 3525*3117ece4Schristos { 3526*3117ece4Schristos if (input==NULL) { 3527*3117ece4Schristos XXH_ASSERT(len == 0); 3528*3117ece4Schristos return XXH_OK; 3529*3117ece4Schristos } 3530*3117ece4Schristos 3531*3117ece4Schristos { const xxh_u8* p = (const xxh_u8*)input; 3532*3117ece4Schristos const xxh_u8* const bEnd = p + len; 3533*3117ece4Schristos 3534*3117ece4Schristos state->total_len += len; 3535*3117ece4Schristos 3536*3117ece4Schristos if (state->memsize + len < 32) { /* fill in tmp buffer */ 3537*3117ece4Schristos XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len); 3538*3117ece4Schristos state->memsize += (xxh_u32)len; 3539*3117ece4Schristos return XXH_OK; 3540*3117ece4Schristos } 3541*3117ece4Schristos 3542*3117ece4Schristos if (state->memsize) { /* tmp buffer is full */ 3543*3117ece4Schristos XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize); 3544*3117ece4Schristos state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0)); 3545*3117ece4Schristos state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1)); 3546*3117ece4Schristos state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2)); 3547*3117ece4Schristos state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3)); 3548*3117ece4Schristos p += 32 - state->memsize; 3549*3117ece4Schristos state->memsize = 0; 3550*3117ece4Schristos } 3551*3117ece4Schristos 3552*3117ece4Schristos if (p+32 <= bEnd) { 3553*3117ece4Schristos const xxh_u8* const limit = bEnd - 32; 3554*3117ece4Schristos 3555*3117ece4Schristos do { 3556*3117ece4Schristos state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8; 3557*3117ece4Schristos state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8; 3558*3117ece4Schristos state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8; 3559*3117ece4Schristos state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8; 3560*3117ece4Schristos } while (p<=limit); 3561*3117ece4Schristos 3562*3117ece4Schristos } 3563*3117ece4Schristos 3564*3117ece4Schristos if (p < bEnd) { 3565*3117ece4Schristos XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); 3566*3117ece4Schristos state->memsize = (unsigned)(bEnd-p); 3567*3117ece4Schristos } 3568*3117ece4Schristos } 3569*3117ece4Schristos 3570*3117ece4Schristos return XXH_OK; 3571*3117ece4Schristos } 3572*3117ece4Schristos 3573*3117ece4Schristos 3574*3117ece4Schristos /*! @ingroup XXH64_family */ 3575*3117ece4Schristos XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state) 3576*3117ece4Schristos { 3577*3117ece4Schristos xxh_u64 h64; 3578*3117ece4Schristos 3579*3117ece4Schristos if (state->total_len >= 32) { 3580*3117ece4Schristos h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18); 3581*3117ece4Schristos h64 = XXH64_mergeRound(h64, state->v[0]); 3582*3117ece4Schristos h64 = XXH64_mergeRound(h64, state->v[1]); 3583*3117ece4Schristos h64 = XXH64_mergeRound(h64, state->v[2]); 3584*3117ece4Schristos h64 = XXH64_mergeRound(h64, state->v[3]); 3585*3117ece4Schristos } else { 3586*3117ece4Schristos h64 = state->v[2] /*seed*/ + XXH_PRIME64_5; 3587*3117ece4Schristos } 3588*3117ece4Schristos 3589*3117ece4Schristos h64 += (xxh_u64) state->total_len; 3590*3117ece4Schristos 3591*3117ece4Schristos return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned); 3592*3117ece4Schristos } 3593*3117ece4Schristos #endif /* !XXH_NO_STREAM */ 3594*3117ece4Schristos 3595*3117ece4Schristos /******* Canonical representation *******/ 3596*3117ece4Schristos 3597*3117ece4Schristos /*! @ingroup XXH64_family */ 3598*3117ece4Schristos XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash) 3599*3117ece4Schristos { 3600*3117ece4Schristos XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); 3601*3117ece4Schristos if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); 3602*3117ece4Schristos XXH_memcpy(dst, &hash, sizeof(*dst)); 3603*3117ece4Schristos } 3604*3117ece4Schristos 3605*3117ece4Schristos /*! @ingroup XXH64_family */ 3606*3117ece4Schristos XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src) 3607*3117ece4Schristos { 3608*3117ece4Schristos return XXH_readBE64(src); 3609*3117ece4Schristos } 3610*3117ece4Schristos 3611*3117ece4Schristos #ifndef XXH_NO_XXH3 3612*3117ece4Schristos 3613*3117ece4Schristos /* ********************************************************************* 3614*3117ece4Schristos * XXH3 3615*3117ece4Schristos * New generation hash designed for speed on small keys and vectorization 3616*3117ece4Schristos ************************************************************************ */ 3617*3117ece4Schristos /*! 3618*3117ece4Schristos * @} 3619*3117ece4Schristos * @defgroup XXH3_impl XXH3 implementation 3620*3117ece4Schristos * @ingroup impl 3621*3117ece4Schristos * @{ 3622*3117ece4Schristos */ 3623*3117ece4Schristos 3624*3117ece4Schristos /* === Compiler specifics === */ 3625*3117ece4Schristos 3626*3117ece4Schristos #if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */ 3627*3117ece4Schristos # define XXH_RESTRICT /* disable */ 3628*3117ece4Schristos #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */ 3629*3117ece4Schristos # define XXH_RESTRICT restrict 3630*3117ece4Schristos #elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \ 3631*3117ece4Schristos || (defined (__clang__)) \ 3632*3117ece4Schristos || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \ 3633*3117ece4Schristos || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300)) 3634*3117ece4Schristos /* 3635*3117ece4Schristos * There are a LOT more compilers that recognize __restrict but this 3636*3117ece4Schristos * covers the major ones. 3637*3117ece4Schristos */ 3638*3117ece4Schristos # define XXH_RESTRICT __restrict 3639*3117ece4Schristos #else 3640*3117ece4Schristos # define XXH_RESTRICT /* disable */ 3641*3117ece4Schristos #endif 3642*3117ece4Schristos 3643*3117ece4Schristos #if (defined(__GNUC__) && (__GNUC__ >= 3)) \ 3644*3117ece4Schristos || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \ 3645*3117ece4Schristos || defined(__clang__) 3646*3117ece4Schristos # define XXH_likely(x) __builtin_expect(x, 1) 3647*3117ece4Schristos # define XXH_unlikely(x) __builtin_expect(x, 0) 3648*3117ece4Schristos #else 3649*3117ece4Schristos # define XXH_likely(x) (x) 3650*3117ece4Schristos # define XXH_unlikely(x) (x) 3651*3117ece4Schristos #endif 3652*3117ece4Schristos 3653*3117ece4Schristos #ifndef XXH_HAS_INCLUDE 3654*3117ece4Schristos # ifdef __has_include 3655*3117ece4Schristos /* 3656*3117ece4Schristos * Not defined as XXH_HAS_INCLUDE(x) (function-like) because 3657*3117ece4Schristos * this causes segfaults in Apple Clang 4.2 (on Mac OS X 10.7 Lion) 3658*3117ece4Schristos */ 3659*3117ece4Schristos # define XXH_HAS_INCLUDE __has_include 3660*3117ece4Schristos # else 3661*3117ece4Schristos # define XXH_HAS_INCLUDE(x) 0 3662*3117ece4Schristos # endif 3663*3117ece4Schristos #endif 3664*3117ece4Schristos 3665*3117ece4Schristos #if defined(__GNUC__) || defined(__clang__) 3666*3117ece4Schristos # if defined(__ARM_FEATURE_SVE) 3667*3117ece4Schristos # include <arm_sve.h> 3668*3117ece4Schristos # endif 3669*3117ece4Schristos # if defined(__ARM_NEON__) || defined(__ARM_NEON) \ 3670*3117ece4Schristos || (defined(_M_ARM) && _M_ARM >= 7) \ 3671*3117ece4Schristos || defined(_M_ARM64) || defined(_M_ARM64EC) \ 3672*3117ece4Schristos || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE(<arm_neon.h>)) /* WASM SIMD128 via SIMDe */ 3673*3117ece4Schristos # define inline __inline__ /* circumvent a clang bug */ 3674*3117ece4Schristos # include <arm_neon.h> 3675*3117ece4Schristos # undef inline 3676*3117ece4Schristos # elif defined(__AVX2__) 3677*3117ece4Schristos # include <immintrin.h> 3678*3117ece4Schristos # elif defined(__SSE2__) 3679*3117ece4Schristos # include <emmintrin.h> 3680*3117ece4Schristos # endif 3681*3117ece4Schristos #endif 3682*3117ece4Schristos 3683*3117ece4Schristos #if defined(_MSC_VER) 3684*3117ece4Schristos # include <intrin.h> 3685*3117ece4Schristos #endif 3686*3117ece4Schristos 3687*3117ece4Schristos /* 3688*3117ece4Schristos * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while 3689*3117ece4Schristos * remaining a true 64-bit/128-bit hash function. 3690*3117ece4Schristos * 3691*3117ece4Schristos * This is done by prioritizing a subset of 64-bit operations that can be 3692*3117ece4Schristos * emulated without too many steps on the average 32-bit machine. 3693*3117ece4Schristos * 3694*3117ece4Schristos * For example, these two lines seem similar, and run equally fast on 64-bit: 3695*3117ece4Schristos * 3696*3117ece4Schristos * xxh_u64 x; 3697*3117ece4Schristos * x ^= (x >> 47); // good 3698*3117ece4Schristos * x ^= (x >> 13); // bad 3699*3117ece4Schristos * 3700*3117ece4Schristos * However, to a 32-bit machine, there is a major difference. 3701*3117ece4Schristos * 3702*3117ece4Schristos * x ^= (x >> 47) looks like this: 3703*3117ece4Schristos * 3704*3117ece4Schristos * x.lo ^= (x.hi >> (47 - 32)); 3705*3117ece4Schristos * 3706*3117ece4Schristos * while x ^= (x >> 13) looks like this: 3707*3117ece4Schristos * 3708*3117ece4Schristos * // note: funnel shifts are not usually cheap. 3709*3117ece4Schristos * x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13)); 3710*3117ece4Schristos * x.hi ^= (x.hi >> 13); 3711*3117ece4Schristos * 3712*3117ece4Schristos * The first one is significantly faster than the second, simply because the 3713*3117ece4Schristos * shift is larger than 32. This means: 3714*3117ece4Schristos * - All the bits we need are in the upper 32 bits, so we can ignore the lower 3715*3117ece4Schristos * 32 bits in the shift. 3716*3117ece4Schristos * - The shift result will always fit in the lower 32 bits, and therefore, 3717*3117ece4Schristos * we can ignore the upper 32 bits in the xor. 3718*3117ece4Schristos * 3719*3117ece4Schristos * Thanks to this optimization, XXH3 only requires these features to be efficient: 3720*3117ece4Schristos * 3721*3117ece4Schristos * - Usable unaligned access 3722*3117ece4Schristos * - A 32-bit or 64-bit ALU 3723*3117ece4Schristos * - If 32-bit, a decent ADC instruction 3724*3117ece4Schristos * - A 32 or 64-bit multiply with a 64-bit result 3725*3117ece4Schristos * - For the 128-bit variant, a decent byteswap helps short inputs. 3726*3117ece4Schristos * 3727*3117ece4Schristos * The first two are already required by XXH32, and almost all 32-bit and 64-bit 3728*3117ece4Schristos * platforms which can run XXH32 can run XXH3 efficiently. 3729*3117ece4Schristos * 3730*3117ece4Schristos * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one 3731*3117ece4Schristos * notable exception. 3732*3117ece4Schristos * 3733*3117ece4Schristos * First of all, Thumb-1 lacks support for the UMULL instruction which 3734*3117ece4Schristos * performs the important long multiply. This means numerous __aeabi_lmul 3735*3117ece4Schristos * calls. 3736*3117ece4Schristos * 3737*3117ece4Schristos * Second of all, the 8 functional registers are just not enough. 3738*3117ece4Schristos * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need 3739*3117ece4Schristos * Lo registers, and this shuffling results in thousands more MOVs than A32. 3740*3117ece4Schristos * 3741*3117ece4Schristos * A32 and T32 don't have this limitation. They can access all 14 registers, 3742*3117ece4Schristos * do a 32->64 multiply with UMULL, and the flexible operand allowing free 3743*3117ece4Schristos * shifts is helpful, too. 3744*3117ece4Schristos * 3745*3117ece4Schristos * Therefore, we do a quick sanity check. 3746*3117ece4Schristos * 3747*3117ece4Schristos * If compiling Thumb-1 for a target which supports ARM instructions, we will 3748*3117ece4Schristos * emit a warning, as it is not a "sane" platform to compile for. 3749*3117ece4Schristos * 3750*3117ece4Schristos * Usually, if this happens, it is because of an accident and you probably need 3751*3117ece4Schristos * to specify -march, as you likely meant to compile for a newer architecture. 3752*3117ece4Schristos * 3753*3117ece4Schristos * Credit: large sections of the vectorial and asm source code paths 3754*3117ece4Schristos * have been contributed by @easyaspi314 3755*3117ece4Schristos */ 3756*3117ece4Schristos #if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM) 3757*3117ece4Schristos # warning "XXH3 is highly inefficient without ARM or Thumb-2." 3758*3117ece4Schristos #endif 3759*3117ece4Schristos 3760*3117ece4Schristos /* ========================================== 3761*3117ece4Schristos * Vectorization detection 3762*3117ece4Schristos * ========================================== */ 3763*3117ece4Schristos 3764*3117ece4Schristos #ifdef XXH_DOXYGEN 3765*3117ece4Schristos /*! 3766*3117ece4Schristos * @ingroup tuning 3767*3117ece4Schristos * @brief Overrides the vectorization implementation chosen for XXH3. 3768*3117ece4Schristos * 3769*3117ece4Schristos * Can be defined to 0 to disable SIMD or any of the values mentioned in 3770*3117ece4Schristos * @ref XXH_VECTOR_TYPE. 3771*3117ece4Schristos * 3772*3117ece4Schristos * If this is not defined, it uses predefined macros to determine the best 3773*3117ece4Schristos * implementation. 3774*3117ece4Schristos */ 3775*3117ece4Schristos # define XXH_VECTOR XXH_SCALAR 3776*3117ece4Schristos /*! 3777*3117ece4Schristos * @ingroup tuning 3778*3117ece4Schristos * @brief Possible values for @ref XXH_VECTOR. 3779*3117ece4Schristos * 3780*3117ece4Schristos * Note that these are actually implemented as macros. 3781*3117ece4Schristos * 3782*3117ece4Schristos * If this is not defined, it is detected automatically. 3783*3117ece4Schristos * internal macro XXH_X86DISPATCH overrides this. 3784*3117ece4Schristos */ 3785*3117ece4Schristos enum XXH_VECTOR_TYPE /* fake enum */ { 3786*3117ece4Schristos XXH_SCALAR = 0, /*!< Portable scalar version */ 3787*3117ece4Schristos XXH_SSE2 = 1, /*!< 3788*3117ece4Schristos * SSE2 for Pentium 4, Opteron, all x86_64. 3789*3117ece4Schristos * 3790*3117ece4Schristos * @note SSE2 is also guaranteed on Windows 10, macOS, and 3791*3117ece4Schristos * Android x86. 3792*3117ece4Schristos */ 3793*3117ece4Schristos XXH_AVX2 = 2, /*!< AVX2 for Haswell and Bulldozer */ 3794*3117ece4Schristos XXH_AVX512 = 3, /*!< AVX512 for Skylake and Icelake */ 3795*3117ece4Schristos XXH_NEON = 4, /*!< 3796*3117ece4Schristos * NEON for most ARMv7-A, all AArch64, and WASM SIMD128 3797*3117ece4Schristos * via the SIMDeverywhere polyfill provided with the 3798*3117ece4Schristos * Emscripten SDK. 3799*3117ece4Schristos */ 3800*3117ece4Schristos XXH_VSX = 5, /*!< VSX and ZVector for POWER8/z13 (64-bit) */ 3801*3117ece4Schristos XXH_SVE = 6, /*!< SVE for some ARMv8-A and ARMv9-A */ 3802*3117ece4Schristos }; 3803*3117ece4Schristos /*! 3804*3117ece4Schristos * @ingroup tuning 3805*3117ece4Schristos * @brief Selects the minimum alignment for XXH3's accumulators. 3806*3117ece4Schristos * 3807*3117ece4Schristos * When using SIMD, this should match the alignment required for said vector 3808*3117ece4Schristos * type, so, for example, 32 for AVX2. 3809*3117ece4Schristos * 3810*3117ece4Schristos * Default: Auto detected. 3811*3117ece4Schristos */ 3812*3117ece4Schristos # define XXH_ACC_ALIGN 8 3813*3117ece4Schristos #endif 3814*3117ece4Schristos 3815*3117ece4Schristos /* Actual definition */ 3816*3117ece4Schristos #ifndef XXH_DOXYGEN 3817*3117ece4Schristos # define XXH_SCALAR 0 3818*3117ece4Schristos # define XXH_SSE2 1 3819*3117ece4Schristos # define XXH_AVX2 2 3820*3117ece4Schristos # define XXH_AVX512 3 3821*3117ece4Schristos # define XXH_NEON 4 3822*3117ece4Schristos # define XXH_VSX 5 3823*3117ece4Schristos # define XXH_SVE 6 3824*3117ece4Schristos #endif 3825*3117ece4Schristos 3826*3117ece4Schristos #ifndef XXH_VECTOR /* can be defined on command line */ 3827*3117ece4Schristos # if defined(__ARM_FEATURE_SVE) 3828*3117ece4Schristos # define XXH_VECTOR XXH_SVE 3829*3117ece4Schristos # elif ( \ 3830*3117ece4Schristos defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \ 3831*3117ece4Schristos || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \ 3832*3117ece4Schristos || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE(<arm_neon.h>)) /* wasm simd128 via SIMDe */ \ 3833*3117ece4Schristos ) && ( \ 3834*3117ece4Schristos defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \ 3835*3117ece4Schristos || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \ 3836*3117ece4Schristos ) 3837*3117ece4Schristos # define XXH_VECTOR XXH_NEON 3838*3117ece4Schristos # elif defined(__AVX512F__) 3839*3117ece4Schristos # define XXH_VECTOR XXH_AVX512 3840*3117ece4Schristos # elif defined(__AVX2__) 3841*3117ece4Schristos # define XXH_VECTOR XXH_AVX2 3842*3117ece4Schristos # elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2)) 3843*3117ece4Schristos # define XXH_VECTOR XXH_SSE2 3844*3117ece4Schristos # elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \ 3845*3117ece4Schristos || (defined(__s390x__) && defined(__VEC__)) \ 3846*3117ece4Schristos && defined(__GNUC__) /* TODO: IBM XL */ 3847*3117ece4Schristos # define XXH_VECTOR XXH_VSX 3848*3117ece4Schristos # else 3849*3117ece4Schristos # define XXH_VECTOR XXH_SCALAR 3850*3117ece4Schristos # endif 3851*3117ece4Schristos #endif 3852*3117ece4Schristos 3853*3117ece4Schristos /* __ARM_FEATURE_SVE is only supported by GCC & Clang. */ 3854*3117ece4Schristos #if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE) 3855*3117ece4Schristos # ifdef _MSC_VER 3856*3117ece4Schristos # pragma warning(once : 4606) 3857*3117ece4Schristos # else 3858*3117ece4Schristos # warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead." 3859*3117ece4Schristos # endif 3860*3117ece4Schristos # undef XXH_VECTOR 3861*3117ece4Schristos # define XXH_VECTOR XXH_SCALAR 3862*3117ece4Schristos #endif 3863*3117ece4Schristos 3864*3117ece4Schristos /* 3865*3117ece4Schristos * Controls the alignment of the accumulator, 3866*3117ece4Schristos * for compatibility with aligned vector loads, which are usually faster. 3867*3117ece4Schristos */ 3868*3117ece4Schristos #ifndef XXH_ACC_ALIGN 3869*3117ece4Schristos # if defined(XXH_X86DISPATCH) 3870*3117ece4Schristos # define XXH_ACC_ALIGN 64 /* for compatibility with avx512 */ 3871*3117ece4Schristos # elif XXH_VECTOR == XXH_SCALAR /* scalar */ 3872*3117ece4Schristos # define XXH_ACC_ALIGN 8 3873*3117ece4Schristos # elif XXH_VECTOR == XXH_SSE2 /* sse2 */ 3874*3117ece4Schristos # define XXH_ACC_ALIGN 16 3875*3117ece4Schristos # elif XXH_VECTOR == XXH_AVX2 /* avx2 */ 3876*3117ece4Schristos # define XXH_ACC_ALIGN 32 3877*3117ece4Schristos # elif XXH_VECTOR == XXH_NEON /* neon */ 3878*3117ece4Schristos # define XXH_ACC_ALIGN 16 3879*3117ece4Schristos # elif XXH_VECTOR == XXH_VSX /* vsx */ 3880*3117ece4Schristos # define XXH_ACC_ALIGN 16 3881*3117ece4Schristos # elif XXH_VECTOR == XXH_AVX512 /* avx512 */ 3882*3117ece4Schristos # define XXH_ACC_ALIGN 64 3883*3117ece4Schristos # elif XXH_VECTOR == XXH_SVE /* sve */ 3884*3117ece4Schristos # define XXH_ACC_ALIGN 64 3885*3117ece4Schristos # endif 3886*3117ece4Schristos #endif 3887*3117ece4Schristos 3888*3117ece4Schristos #if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \ 3889*3117ece4Schristos || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512 3890*3117ece4Schristos # define XXH_SEC_ALIGN XXH_ACC_ALIGN 3891*3117ece4Schristos #elif XXH_VECTOR == XXH_SVE 3892*3117ece4Schristos # define XXH_SEC_ALIGN XXH_ACC_ALIGN 3893*3117ece4Schristos #else 3894*3117ece4Schristos # define XXH_SEC_ALIGN 8 3895*3117ece4Schristos #endif 3896*3117ece4Schristos 3897*3117ece4Schristos #if defined(__GNUC__) || defined(__clang__) 3898*3117ece4Schristos # define XXH_ALIASING __attribute__((may_alias)) 3899*3117ece4Schristos #else 3900*3117ece4Schristos # define XXH_ALIASING /* nothing */ 3901*3117ece4Schristos #endif 3902*3117ece4Schristos 3903*3117ece4Schristos /* 3904*3117ece4Schristos * UGLY HACK: 3905*3117ece4Schristos * GCC usually generates the best code with -O3 for xxHash. 3906*3117ece4Schristos * 3907*3117ece4Schristos * However, when targeting AVX2, it is overzealous in its unrolling resulting 3908*3117ece4Schristos * in code roughly 3/4 the speed of Clang. 3909*3117ece4Schristos * 3910*3117ece4Schristos * There are other issues, such as GCC splitting _mm256_loadu_si256 into 3911*3117ece4Schristos * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which 3912*3117ece4Schristos * only applies to Sandy and Ivy Bridge... which don't even support AVX2. 3913*3117ece4Schristos * 3914*3117ece4Schristos * That is why when compiling the AVX2 version, it is recommended to use either 3915*3117ece4Schristos * -O2 -mavx2 -march=haswell 3916*3117ece4Schristos * or 3917*3117ece4Schristos * -O2 -mavx2 -mno-avx256-split-unaligned-load 3918*3117ece4Schristos * for decent performance, or to use Clang instead. 3919*3117ece4Schristos * 3920*3117ece4Schristos * Fortunately, we can control the first one with a pragma that forces GCC into 3921*3117ece4Schristos * -O2, but the other one we can't control without "failed to inline always 3922*3117ece4Schristos * inline function due to target mismatch" warnings. 3923*3117ece4Schristos */ 3924*3117ece4Schristos #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \ 3925*3117ece4Schristos && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ 3926*3117ece4Schristos && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */ 3927*3117ece4Schristos # pragma GCC push_options 3928*3117ece4Schristos # pragma GCC optimize("-O2") 3929*3117ece4Schristos #endif 3930*3117ece4Schristos 3931*3117ece4Schristos #if XXH_VECTOR == XXH_NEON 3932*3117ece4Schristos 3933*3117ece4Schristos /* 3934*3117ece4Schristos * UGLY HACK: While AArch64 GCC on Linux does not seem to care, on macOS, GCC -O3 3935*3117ece4Schristos * optimizes out the entire hashLong loop because of the aliasing violation. 3936*3117ece4Schristos * 3937*3117ece4Schristos * However, GCC is also inefficient at load-store optimization with vld1q/vst1q, 3938*3117ece4Schristos * so the only option is to mark it as aliasing. 3939*3117ece4Schristos */ 3940*3117ece4Schristos typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING; 3941*3117ece4Schristos 3942*3117ece4Schristos /*! 3943*3117ece4Schristos * @internal 3944*3117ece4Schristos * @brief `vld1q_u64` but faster and alignment-safe. 3945*3117ece4Schristos * 3946*3117ece4Schristos * On AArch64, unaligned access is always safe, but on ARMv7-a, it is only 3947*3117ece4Schristos * *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86). 3948*3117ece4Schristos * 3949*3117ece4Schristos * GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it 3950*3117ece4Schristos * prohibits load-store optimizations. Therefore, a direct dereference is used. 3951*3117ece4Schristos * 3952*3117ece4Schristos * Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe 3953*3117ece4Schristos * unaligned load. 3954*3117ece4Schristos */ 3955*3117ece4Schristos #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) 3956*3117ece4Schristos XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */ 3957*3117ece4Schristos { 3958*3117ece4Schristos return *(xxh_aliasing_uint64x2_t const *)ptr; 3959*3117ece4Schristos } 3960*3117ece4Schristos #else 3961*3117ece4Schristos XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) 3962*3117ece4Schristos { 3963*3117ece4Schristos return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr)); 3964*3117ece4Schristos } 3965*3117ece4Schristos #endif 3966*3117ece4Schristos 3967*3117ece4Schristos /*! 3968*3117ece4Schristos * @internal 3969*3117ece4Schristos * @brief `vmlal_u32` on low and high halves of a vector. 3970*3117ece4Schristos * 3971*3117ece4Schristos * This is a workaround for AArch64 GCC < 11 which implemented arm_neon.h with 3972*3117ece4Schristos * inline assembly and were therefore incapable of merging the `vget_{low, high}_u32` 3973*3117ece4Schristos * with `vmlal_u32`. 3974*3117ece4Schristos */ 3975*3117ece4Schristos #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11 3976*3117ece4Schristos XXH_FORCE_INLINE uint64x2_t 3977*3117ece4Schristos XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) 3978*3117ece4Schristos { 3979*3117ece4Schristos /* Inline assembly is the only way */ 3980*3117ece4Schristos __asm__("umlal %0.2d, %1.2s, %2.2s" : "+w" (acc) : "w" (lhs), "w" (rhs)); 3981*3117ece4Schristos return acc; 3982*3117ece4Schristos } 3983*3117ece4Schristos XXH_FORCE_INLINE uint64x2_t 3984*3117ece4Schristos XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) 3985*3117ece4Schristos { 3986*3117ece4Schristos /* This intrinsic works as expected */ 3987*3117ece4Schristos return vmlal_high_u32(acc, lhs, rhs); 3988*3117ece4Schristos } 3989*3117ece4Schristos #else 3990*3117ece4Schristos /* Portable intrinsic versions */ 3991*3117ece4Schristos XXH_FORCE_INLINE uint64x2_t 3992*3117ece4Schristos XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) 3993*3117ece4Schristos { 3994*3117ece4Schristos return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs)); 3995*3117ece4Schristos } 3996*3117ece4Schristos /*! @copydoc XXH_vmlal_low_u32 3997*3117ece4Schristos * Assume the compiler converts this to vmlal_high_u32 on aarch64 */ 3998*3117ece4Schristos XXH_FORCE_INLINE uint64x2_t 3999*3117ece4Schristos XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) 4000*3117ece4Schristos { 4001*3117ece4Schristos return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs)); 4002*3117ece4Schristos } 4003*3117ece4Schristos #endif 4004*3117ece4Schristos 4005*3117ece4Schristos /*! 4006*3117ece4Schristos * @ingroup tuning 4007*3117ece4Schristos * @brief Controls the NEON to scalar ratio for XXH3 4008*3117ece4Schristos * 4009*3117ece4Schristos * This can be set to 2, 4, 6, or 8. 4010*3117ece4Schristos * 4011*3117ece4Schristos * ARM Cortex CPUs are _very_ sensitive to how their pipelines are used. 4012*3117ece4Schristos * 4013*3117ece4Schristos * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but only 2 of those 4014*3117ece4Schristos * can be NEON. If you are only using NEON instructions, you are only using 2/3 of the CPU 4015*3117ece4Schristos * bandwidth. 4016*3117ece4Schristos * 4017*3117ece4Schristos * This is even more noticeable on the more advanced cores like the Cortex-A76 which 4018*3117ece4Schristos * can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once. 4019*3117ece4Schristos * 4020*3117ece4Schristos * Therefore, to make the most out of the pipeline, it is beneficial to run 6 NEON lanes 4021*3117ece4Schristos * and 2 scalar lanes, which is chosen by default. 4022*3117ece4Schristos * 4023*3117ece4Schristos * This does not apply to Apple processors or 32-bit processors, which run better with 4024*3117ece4Schristos * full NEON. These will default to 8. Additionally, size-optimized builds run 8 lanes. 4025*3117ece4Schristos * 4026*3117ece4Schristos * This change benefits CPUs with large micro-op buffers without negatively affecting 4027*3117ece4Schristos * most other CPUs: 4028*3117ece4Schristos * 4029*3117ece4Schristos * | Chipset | Dispatch type | NEON only | 6:2 hybrid | Diff. | 4030*3117ece4Schristos * |:----------------------|:--------------------|----------:|-----------:|------:| 4031*3117ece4Schristos * | Snapdragon 730 (A76) | 2 NEON/8 micro-ops | 8.8 GB/s | 10.1 GB/s | ~16% | 4032*3117ece4Schristos * | Snapdragon 835 (A73) | 2 NEON/3 micro-ops | 5.1 GB/s | 5.3 GB/s | ~5% | 4033*3117ece4Schristos * | Marvell PXA1928 (A53) | In-order dual-issue | 1.9 GB/s | 1.9 GB/s | 0% | 4034*3117ece4Schristos * | Apple M1 | 4 NEON/8 micro-ops | 37.3 GB/s | 36.1 GB/s | ~-3% | 4035*3117ece4Schristos * 4036*3117ece4Schristos * It also seems to fix some bad codegen on GCC, making it almost as fast as clang. 4037*3117ece4Schristos * 4038*3117ece4Schristos * When using WASM SIMD128, if this is 2 or 6, SIMDe will scalarize 2 of the lanes meaning 4039*3117ece4Schristos * it effectively becomes worse 4. 4040*3117ece4Schristos * 4041*3117ece4Schristos * @see XXH3_accumulate_512_neon() 4042*3117ece4Schristos */ 4043*3117ece4Schristos # ifndef XXH3_NEON_LANES 4044*3117ece4Schristos # if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \ 4045*3117ece4Schristos && !defined(__APPLE__) && XXH_SIZE_OPT <= 0 4046*3117ece4Schristos # define XXH3_NEON_LANES 6 4047*3117ece4Schristos # else 4048*3117ece4Schristos # define XXH3_NEON_LANES XXH_ACC_NB 4049*3117ece4Schristos # endif 4050*3117ece4Schristos # endif 4051*3117ece4Schristos #endif /* XXH_VECTOR == XXH_NEON */ 4052*3117ece4Schristos 4053*3117ece4Schristos /* 4054*3117ece4Schristos * VSX and Z Vector helpers. 4055*3117ece4Schristos * 4056*3117ece4Schristos * This is very messy, and any pull requests to clean this up are welcome. 4057*3117ece4Schristos * 4058*3117ece4Schristos * There are a lot of problems with supporting VSX and s390x, due to 4059*3117ece4Schristos * inconsistent intrinsics, spotty coverage, and multiple endiannesses. 4060*3117ece4Schristos */ 4061*3117ece4Schristos #if XXH_VECTOR == XXH_VSX 4062*3117ece4Schristos /* Annoyingly, these headers _may_ define three macros: `bool`, `vector`, 4063*3117ece4Schristos * and `pixel`. This is a problem for obvious reasons. 4064*3117ece4Schristos * 4065*3117ece4Schristos * These keywords are unnecessary; the spec literally says they are 4066*3117ece4Schristos * equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd 4067*3117ece4Schristos * after including the header. 4068*3117ece4Schristos * 4069*3117ece4Schristos * We use pragma push_macro/pop_macro to keep the namespace clean. */ 4070*3117ece4Schristos # pragma push_macro("bool") 4071*3117ece4Schristos # pragma push_macro("vector") 4072*3117ece4Schristos # pragma push_macro("pixel") 4073*3117ece4Schristos /* silence potential macro redefined warnings */ 4074*3117ece4Schristos # undef bool 4075*3117ece4Schristos # undef vector 4076*3117ece4Schristos # undef pixel 4077*3117ece4Schristos 4078*3117ece4Schristos # if defined(__s390x__) 4079*3117ece4Schristos # include <s390intrin.h> 4080*3117ece4Schristos # else 4081*3117ece4Schristos # include <altivec.h> 4082*3117ece4Schristos # endif 4083*3117ece4Schristos 4084*3117ece4Schristos /* Restore the original macro values, if applicable. */ 4085*3117ece4Schristos # pragma pop_macro("pixel") 4086*3117ece4Schristos # pragma pop_macro("vector") 4087*3117ece4Schristos # pragma pop_macro("bool") 4088*3117ece4Schristos 4089*3117ece4Schristos typedef __vector unsigned long long xxh_u64x2; 4090*3117ece4Schristos typedef __vector unsigned char xxh_u8x16; 4091*3117ece4Schristos typedef __vector unsigned xxh_u32x4; 4092*3117ece4Schristos 4093*3117ece4Schristos /* 4094*3117ece4Schristos * UGLY HACK: Similar to aarch64 macOS GCC, s390x GCC has the same aliasing issue. 4095*3117ece4Schristos */ 4096*3117ece4Schristos typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING; 4097*3117ece4Schristos 4098*3117ece4Schristos # ifndef XXH_VSX_BE 4099*3117ece4Schristos # if defined(__BIG_ENDIAN__) \ 4100*3117ece4Schristos || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) 4101*3117ece4Schristos # define XXH_VSX_BE 1 4102*3117ece4Schristos # elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ 4103*3117ece4Schristos # warning "-maltivec=be is not recommended. Please use native endianness." 4104*3117ece4Schristos # define XXH_VSX_BE 1 4105*3117ece4Schristos # else 4106*3117ece4Schristos # define XXH_VSX_BE 0 4107*3117ece4Schristos # endif 4108*3117ece4Schristos # endif /* !defined(XXH_VSX_BE) */ 4109*3117ece4Schristos 4110*3117ece4Schristos # if XXH_VSX_BE 4111*3117ece4Schristos # if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__)) 4112*3117ece4Schristos # define XXH_vec_revb vec_revb 4113*3117ece4Schristos # else 4114*3117ece4Schristos /*! 4115*3117ece4Schristos * A polyfill for POWER9's vec_revb(). 4116*3117ece4Schristos */ 4117*3117ece4Schristos XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val) 4118*3117ece4Schristos { 4119*3117ece4Schristos xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 4120*3117ece4Schristos 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 }; 4121*3117ece4Schristos return vec_perm(val, val, vByteSwap); 4122*3117ece4Schristos } 4123*3117ece4Schristos # endif 4124*3117ece4Schristos # endif /* XXH_VSX_BE */ 4125*3117ece4Schristos 4126*3117ece4Schristos /*! 4127*3117ece4Schristos * Performs an unaligned vector load and byte swaps it on big endian. 4128*3117ece4Schristos */ 4129*3117ece4Schristos XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr) 4130*3117ece4Schristos { 4131*3117ece4Schristos xxh_u64x2 ret; 4132*3117ece4Schristos XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2)); 4133*3117ece4Schristos # if XXH_VSX_BE 4134*3117ece4Schristos ret = XXH_vec_revb(ret); 4135*3117ece4Schristos # endif 4136*3117ece4Schristos return ret; 4137*3117ece4Schristos } 4138*3117ece4Schristos 4139*3117ece4Schristos /* 4140*3117ece4Schristos * vec_mulo and vec_mule are very problematic intrinsics on PowerPC 4141*3117ece4Schristos * 4142*3117ece4Schristos * These intrinsics weren't added until GCC 8, despite existing for a while, 4143*3117ece4Schristos * and they are endian dependent. Also, their meaning swap depending on version. 4144*3117ece4Schristos * */ 4145*3117ece4Schristos # if defined(__s390x__) 4146*3117ece4Schristos /* s390x is always big endian, no issue on this platform */ 4147*3117ece4Schristos # define XXH_vec_mulo vec_mulo 4148*3117ece4Schristos # define XXH_vec_mule vec_mule 4149*3117ece4Schristos # elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__) 4150*3117ece4Schristos /* Clang has a better way to control this, we can just use the builtin which doesn't swap. */ 4151*3117ece4Schristos /* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */ 4152*3117ece4Schristos # define XXH_vec_mulo __builtin_altivec_vmulouw 4153*3117ece4Schristos # define XXH_vec_mule __builtin_altivec_vmuleuw 4154*3117ece4Schristos # else 4155*3117ece4Schristos /* gcc needs inline assembly */ 4156*3117ece4Schristos /* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */ 4157*3117ece4Schristos XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b) 4158*3117ece4Schristos { 4159*3117ece4Schristos xxh_u64x2 result; 4160*3117ece4Schristos __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b)); 4161*3117ece4Schristos return result; 4162*3117ece4Schristos } 4163*3117ece4Schristos XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b) 4164*3117ece4Schristos { 4165*3117ece4Schristos xxh_u64x2 result; 4166*3117ece4Schristos __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b)); 4167*3117ece4Schristos return result; 4168*3117ece4Schristos } 4169*3117ece4Schristos # endif /* XXH_vec_mulo, XXH_vec_mule */ 4170*3117ece4Schristos #endif /* XXH_VECTOR == XXH_VSX */ 4171*3117ece4Schristos 4172*3117ece4Schristos #if XXH_VECTOR == XXH_SVE 4173*3117ece4Schristos #define ACCRND(acc, offset) \ 4174*3117ece4Schristos do { \ 4175*3117ece4Schristos svuint64_t input_vec = svld1_u64(mask, xinput + offset); \ 4176*3117ece4Schristos svuint64_t secret_vec = svld1_u64(mask, xsecret + offset); \ 4177*3117ece4Schristos svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec); \ 4178*3117ece4Schristos svuint64_t swapped = svtbl_u64(input_vec, kSwap); \ 4179*3117ece4Schristos svuint64_t mixed_lo = svextw_u64_x(mask, mixed); \ 4180*3117ece4Schristos svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32); \ 4181*3117ece4Schristos svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \ 4182*3117ece4Schristos acc = svadd_u64_x(mask, acc, mul); \ 4183*3117ece4Schristos } while (0) 4184*3117ece4Schristos #endif /* XXH_VECTOR == XXH_SVE */ 4185*3117ece4Schristos 4186*3117ece4Schristos /* prefetch 4187*3117ece4Schristos * can be disabled, by declaring XXH_NO_PREFETCH build macro */ 4188*3117ece4Schristos #if defined(XXH_NO_PREFETCH) 4189*3117ece4Schristos # define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ 4190*3117ece4Schristos #else 4191*3117ece4Schristos # if XXH_SIZE_OPT >= 1 4192*3117ece4Schristos # define XXH_PREFETCH(ptr) (void)(ptr) 4193*3117ece4Schristos # elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */ 4194*3117ece4Schristos # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ 4195*3117ece4Schristos # define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) 4196*3117ece4Schristos # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) 4197*3117ece4Schristos # define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) 4198*3117ece4Schristos # else 4199*3117ece4Schristos # define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ 4200*3117ece4Schristos # endif 4201*3117ece4Schristos #endif /* XXH_NO_PREFETCH */ 4202*3117ece4Schristos 4203*3117ece4Schristos 4204*3117ece4Schristos /* ========================================== 4205*3117ece4Schristos * XXH3 default settings 4206*3117ece4Schristos * ========================================== */ 4207*3117ece4Schristos 4208*3117ece4Schristos #define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */ 4209*3117ece4Schristos 4210*3117ece4Schristos #if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN) 4211*3117ece4Schristos # error "default keyset is not large enough" 4212*3117ece4Schristos #endif 4213*3117ece4Schristos 4214*3117ece4Schristos /*! Pseudorandom secret taken directly from FARSH. */ 4215*3117ece4Schristos XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = { 4216*3117ece4Schristos 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, 4217*3117ece4Schristos 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, 4218*3117ece4Schristos 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, 4219*3117ece4Schristos 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, 4220*3117ece4Schristos 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, 4221*3117ece4Schristos 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, 4222*3117ece4Schristos 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, 4223*3117ece4Schristos 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, 4224*3117ece4Schristos 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, 4225*3117ece4Schristos 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, 4226*3117ece4Schristos 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, 4227*3117ece4Schristos 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, 4228*3117ece4Schristos }; 4229*3117ece4Schristos 4230*3117ece4Schristos static const xxh_u64 PRIME_MX1 = 0x165667919E3779F9ULL; /*!< 0b0001011001010110011001111001000110011110001101110111100111111001 */ 4231*3117ece4Schristos static const xxh_u64 PRIME_MX2 = 0x9FB21C651E98DF25ULL; /*!< 0b1001111110110010000111000110010100011110100110001101111100100101 */ 4232*3117ece4Schristos 4233*3117ece4Schristos #ifdef XXH_OLD_NAMES 4234*3117ece4Schristos # define kSecret XXH3_kSecret 4235*3117ece4Schristos #endif 4236*3117ece4Schristos 4237*3117ece4Schristos #ifdef XXH_DOXYGEN 4238*3117ece4Schristos /*! 4239*3117ece4Schristos * @brief Calculates a 32-bit to 64-bit long multiply. 4240*3117ece4Schristos * 4241*3117ece4Schristos * Implemented as a macro. 4242*3117ece4Schristos * 4243*3117ece4Schristos * Wraps `__emulu` on MSVC x86 because it tends to call `__allmul` when it doesn't 4244*3117ece4Schristos * need to (but it shouldn't need to anyways, it is about 7 instructions to do 4245*3117ece4Schristos * a 64x64 multiply...). Since we know that this will _always_ emit `MULL`, we 4246*3117ece4Schristos * use that instead of the normal method. 4247*3117ece4Schristos * 4248*3117ece4Schristos * If you are compiling for platforms like Thumb-1 and don't have a better option, 4249*3117ece4Schristos * you may also want to write your own long multiply routine here. 4250*3117ece4Schristos * 4251*3117ece4Schristos * @param x, y Numbers to be multiplied 4252*3117ece4Schristos * @return 64-bit product of the low 32 bits of @p x and @p y. 4253*3117ece4Schristos */ 4254*3117ece4Schristos XXH_FORCE_INLINE xxh_u64 4255*3117ece4Schristos XXH_mult32to64(xxh_u64 x, xxh_u64 y) 4256*3117ece4Schristos { 4257*3117ece4Schristos return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF); 4258*3117ece4Schristos } 4259*3117ece4Schristos #elif defined(_MSC_VER) && defined(_M_IX86) 4260*3117ece4Schristos # define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y)) 4261*3117ece4Schristos #else 4262*3117ece4Schristos /* 4263*3117ece4Schristos * Downcast + upcast is usually better than masking on older compilers like 4264*3117ece4Schristos * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers. 4265*3117ece4Schristos * 4266*3117ece4Schristos * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands 4267*3117ece4Schristos * and perform a full 64x64 multiply -- entirely redundant on 32-bit. 4268*3117ece4Schristos */ 4269*3117ece4Schristos # define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y)) 4270*3117ece4Schristos #endif 4271*3117ece4Schristos 4272*3117ece4Schristos /*! 4273*3117ece4Schristos * @brief Calculates a 64->128-bit long multiply. 4274*3117ece4Schristos * 4275*3117ece4Schristos * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar 4276*3117ece4Schristos * version. 4277*3117ece4Schristos * 4278*3117ece4Schristos * @param lhs , rhs The 64-bit integers to be multiplied 4279*3117ece4Schristos * @return The 128-bit result represented in an @ref XXH128_hash_t. 4280*3117ece4Schristos */ 4281*3117ece4Schristos static XXH128_hash_t 4282*3117ece4Schristos XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs) 4283*3117ece4Schristos { 4284*3117ece4Schristos /* 4285*3117ece4Schristos * GCC/Clang __uint128_t method. 4286*3117ece4Schristos * 4287*3117ece4Schristos * On most 64-bit targets, GCC and Clang define a __uint128_t type. 4288*3117ece4Schristos * This is usually the best way as it usually uses a native long 64-bit 4289*3117ece4Schristos * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64. 4290*3117ece4Schristos * 4291*3117ece4Schristos * Usually. 4292*3117ece4Schristos * 4293*3117ece4Schristos * Despite being a 32-bit platform, Clang (and emscripten) define this type 4294*3117ece4Schristos * despite not having the arithmetic for it. This results in a laggy 4295*3117ece4Schristos * compiler builtin call which calculates a full 128-bit multiply. 4296*3117ece4Schristos * In that case it is best to use the portable one. 4297*3117ece4Schristos * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677 4298*3117ece4Schristos */ 4299*3117ece4Schristos #if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \ 4300*3117ece4Schristos && defined(__SIZEOF_INT128__) \ 4301*3117ece4Schristos || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) 4302*3117ece4Schristos 4303*3117ece4Schristos __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs; 4304*3117ece4Schristos XXH128_hash_t r128; 4305*3117ece4Schristos r128.low64 = (xxh_u64)(product); 4306*3117ece4Schristos r128.high64 = (xxh_u64)(product >> 64); 4307*3117ece4Schristos return r128; 4308*3117ece4Schristos 4309*3117ece4Schristos /* 4310*3117ece4Schristos * MSVC for x64's _umul128 method. 4311*3117ece4Schristos * 4312*3117ece4Schristos * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct); 4313*3117ece4Schristos * 4314*3117ece4Schristos * This compiles to single operand MUL on x64. 4315*3117ece4Schristos */ 4316*3117ece4Schristos #elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC) 4317*3117ece4Schristos 4318*3117ece4Schristos #ifndef _MSC_VER 4319*3117ece4Schristos # pragma intrinsic(_umul128) 4320*3117ece4Schristos #endif 4321*3117ece4Schristos xxh_u64 product_high; 4322*3117ece4Schristos xxh_u64 const product_low = _umul128(lhs, rhs, &product_high); 4323*3117ece4Schristos XXH128_hash_t r128; 4324*3117ece4Schristos r128.low64 = product_low; 4325*3117ece4Schristos r128.high64 = product_high; 4326*3117ece4Schristos return r128; 4327*3117ece4Schristos 4328*3117ece4Schristos /* 4329*3117ece4Schristos * MSVC for ARM64's __umulh method. 4330*3117ece4Schristos * 4331*3117ece4Schristos * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method. 4332*3117ece4Schristos */ 4333*3117ece4Schristos #elif defined(_M_ARM64) || defined(_M_ARM64EC) 4334*3117ece4Schristos 4335*3117ece4Schristos #ifndef _MSC_VER 4336*3117ece4Schristos # pragma intrinsic(__umulh) 4337*3117ece4Schristos #endif 4338*3117ece4Schristos XXH128_hash_t r128; 4339*3117ece4Schristos r128.low64 = lhs * rhs; 4340*3117ece4Schristos r128.high64 = __umulh(lhs, rhs); 4341*3117ece4Schristos return r128; 4342*3117ece4Schristos 4343*3117ece4Schristos #else 4344*3117ece4Schristos /* 4345*3117ece4Schristos * Portable scalar method. Optimized for 32-bit and 64-bit ALUs. 4346*3117ece4Schristos * 4347*3117ece4Schristos * This is a fast and simple grade school multiply, which is shown below 4348*3117ece4Schristos * with base 10 arithmetic instead of base 0x100000000. 4349*3117ece4Schristos * 4350*3117ece4Schristos * 9 3 // D2 lhs = 93 4351*3117ece4Schristos * x 7 5 // D2 rhs = 75 4352*3117ece4Schristos * ---------- 4353*3117ece4Schristos * 1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15 4354*3117ece4Schristos * 4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45 4355*3117ece4Schristos * 2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21 4356*3117ece4Schristos * + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63 4357*3117ece4Schristos * --------- 4358*3117ece4Schristos * 2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27 4359*3117ece4Schristos * + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67 4360*3117ece4Schristos * --------- 4361*3117ece4Schristos * 6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975 4362*3117ece4Schristos * 4363*3117ece4Schristos * The reasons for adding the products like this are: 4364*3117ece4Schristos * 1. It avoids manual carry tracking. Just like how 4365*3117ece4Schristos * (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX. 4366*3117ece4Schristos * This avoids a lot of complexity. 4367*3117ece4Schristos * 4368*3117ece4Schristos * 2. It hints for, and on Clang, compiles to, the powerful UMAAL 4369*3117ece4Schristos * instruction available in ARM's Digital Signal Processing extension 4370*3117ece4Schristos * in 32-bit ARMv6 and later, which is shown below: 4371*3117ece4Schristos * 4372*3117ece4Schristos * void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm) 4373*3117ece4Schristos * { 4374*3117ece4Schristos * xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm; 4375*3117ece4Schristos * *RdLo = (xxh_u32)(product & 0xFFFFFFFF); 4376*3117ece4Schristos * *RdHi = (xxh_u32)(product >> 32); 4377*3117ece4Schristos * } 4378*3117ece4Schristos * 4379*3117ece4Schristos * This instruction was designed for efficient long multiplication, and 4380*3117ece4Schristos * allows this to be calculated in only 4 instructions at speeds 4381*3117ece4Schristos * comparable to some 64-bit ALUs. 4382*3117ece4Schristos * 4383*3117ece4Schristos * 3. It isn't terrible on other platforms. Usually this will be a couple 4384*3117ece4Schristos * of 32-bit ADD/ADCs. 4385*3117ece4Schristos */ 4386*3117ece4Schristos 4387*3117ece4Schristos /* First calculate all of the cross products. */ 4388*3117ece4Schristos xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF); 4389*3117ece4Schristos xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF); 4390*3117ece4Schristos xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32); 4391*3117ece4Schristos xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32); 4392*3117ece4Schristos 4393*3117ece4Schristos /* Now add the products together. These will never overflow. */ 4394*3117ece4Schristos xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi; 4395*3117ece4Schristos xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi; 4396*3117ece4Schristos xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF); 4397*3117ece4Schristos 4398*3117ece4Schristos XXH128_hash_t r128; 4399*3117ece4Schristos r128.low64 = lower; 4400*3117ece4Schristos r128.high64 = upper; 4401*3117ece4Schristos return r128; 4402*3117ece4Schristos #endif 4403*3117ece4Schristos } 4404*3117ece4Schristos 4405*3117ece4Schristos /*! 4406*3117ece4Schristos * @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it. 4407*3117ece4Schristos * 4408*3117ece4Schristos * The reason for the separate function is to prevent passing too many structs 4409*3117ece4Schristos * around by value. This will hopefully inline the multiply, but we don't force it. 4410*3117ece4Schristos * 4411*3117ece4Schristos * @param lhs , rhs The 64-bit integers to multiply 4412*3117ece4Schristos * @return The low 64 bits of the product XOR'd by the high 64 bits. 4413*3117ece4Schristos * @see XXH_mult64to128() 4414*3117ece4Schristos */ 4415*3117ece4Schristos static xxh_u64 4416*3117ece4Schristos XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs) 4417*3117ece4Schristos { 4418*3117ece4Schristos XXH128_hash_t product = XXH_mult64to128(lhs, rhs); 4419*3117ece4Schristos return product.low64 ^ product.high64; 4420*3117ece4Schristos } 4421*3117ece4Schristos 4422*3117ece4Schristos /*! Seems to produce slightly better code on GCC for some reason. */ 4423*3117ece4Schristos XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift) 4424*3117ece4Schristos { 4425*3117ece4Schristos XXH_ASSERT(0 <= shift && shift < 64); 4426*3117ece4Schristos return v64 ^ (v64 >> shift); 4427*3117ece4Schristos } 4428*3117ece4Schristos 4429*3117ece4Schristos /* 4430*3117ece4Schristos * This is a fast avalanche stage, 4431*3117ece4Schristos * suitable when input bits are already partially mixed 4432*3117ece4Schristos */ 4433*3117ece4Schristos static XXH64_hash_t XXH3_avalanche(xxh_u64 h64) 4434*3117ece4Schristos { 4435*3117ece4Schristos h64 = XXH_xorshift64(h64, 37); 4436*3117ece4Schristos h64 *= PRIME_MX1; 4437*3117ece4Schristos h64 = XXH_xorshift64(h64, 32); 4438*3117ece4Schristos return h64; 4439*3117ece4Schristos } 4440*3117ece4Schristos 4441*3117ece4Schristos /* 4442*3117ece4Schristos * This is a stronger avalanche, 4443*3117ece4Schristos * inspired by Pelle Evensen's rrmxmx 4444*3117ece4Schristos * preferable when input has not been previously mixed 4445*3117ece4Schristos */ 4446*3117ece4Schristos static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len) 4447*3117ece4Schristos { 4448*3117ece4Schristos /* this mix is inspired by Pelle Evensen's rrmxmx */ 4449*3117ece4Schristos h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24); 4450*3117ece4Schristos h64 *= PRIME_MX2; 4451*3117ece4Schristos h64 ^= (h64 >> 35) + len ; 4452*3117ece4Schristos h64 *= PRIME_MX2; 4453*3117ece4Schristos return XXH_xorshift64(h64, 28); 4454*3117ece4Schristos } 4455*3117ece4Schristos 4456*3117ece4Schristos 4457*3117ece4Schristos /* ========================================== 4458*3117ece4Schristos * Short keys 4459*3117ece4Schristos * ========================================== 4460*3117ece4Schristos * One of the shortcomings of XXH32 and XXH64 was that their performance was 4461*3117ece4Schristos * sub-optimal on short lengths. It used an iterative algorithm which strongly 4462*3117ece4Schristos * favored lengths that were a multiple of 4 or 8. 4463*3117ece4Schristos * 4464*3117ece4Schristos * Instead of iterating over individual inputs, we use a set of single shot 4465*3117ece4Schristos * functions which piece together a range of lengths and operate in constant time. 4466*3117ece4Schristos * 4467*3117ece4Schristos * Additionally, the number of multiplies has been significantly reduced. This 4468*3117ece4Schristos * reduces latency, especially when emulating 64-bit multiplies on 32-bit. 4469*3117ece4Schristos * 4470*3117ece4Schristos * Depending on the platform, this may or may not be faster than XXH32, but it 4471*3117ece4Schristos * is almost guaranteed to be faster than XXH64. 4472*3117ece4Schristos */ 4473*3117ece4Schristos 4474*3117ece4Schristos /* 4475*3117ece4Schristos * At very short lengths, there isn't enough input to fully hide secrets, or use 4476*3117ece4Schristos * the entire secret. 4477*3117ece4Schristos * 4478*3117ece4Schristos * There is also only a limited amount of mixing we can do before significantly 4479*3117ece4Schristos * impacting performance. 4480*3117ece4Schristos * 4481*3117ece4Schristos * Therefore, we use different sections of the secret and always mix two secret 4482*3117ece4Schristos * samples with an XOR. This should have no effect on performance on the 4483*3117ece4Schristos * seedless or withSeed variants because everything _should_ be constant folded 4484*3117ece4Schristos * by modern compilers. 4485*3117ece4Schristos * 4486*3117ece4Schristos * The XOR mixing hides individual parts of the secret and increases entropy. 4487*3117ece4Schristos * 4488*3117ece4Schristos * This adds an extra layer of strength for custom secrets. 4489*3117ece4Schristos */ 4490*3117ece4Schristos XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t 4491*3117ece4Schristos XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) 4492*3117ece4Schristos { 4493*3117ece4Schristos XXH_ASSERT(input != NULL); 4494*3117ece4Schristos XXH_ASSERT(1 <= len && len <= 3); 4495*3117ece4Schristos XXH_ASSERT(secret != NULL); 4496*3117ece4Schristos /* 4497*3117ece4Schristos * len = 1: combined = { input[0], 0x01, input[0], input[0] } 4498*3117ece4Schristos * len = 2: combined = { input[1], 0x02, input[0], input[1] } 4499*3117ece4Schristos * len = 3: combined = { input[2], 0x03, input[0], input[1] } 4500*3117ece4Schristos */ 4501*3117ece4Schristos { xxh_u8 const c1 = input[0]; 4502*3117ece4Schristos xxh_u8 const c2 = input[len >> 1]; 4503*3117ece4Schristos xxh_u8 const c3 = input[len - 1]; 4504*3117ece4Schristos xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2 << 24) 4505*3117ece4Schristos | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8); 4506*3117ece4Schristos xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed; 4507*3117ece4Schristos xxh_u64 const keyed = (xxh_u64)combined ^ bitflip; 4508*3117ece4Schristos return XXH64_avalanche(keyed); 4509*3117ece4Schristos } 4510*3117ece4Schristos } 4511*3117ece4Schristos 4512*3117ece4Schristos XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t 4513*3117ece4Schristos XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) 4514*3117ece4Schristos { 4515*3117ece4Schristos XXH_ASSERT(input != NULL); 4516*3117ece4Schristos XXH_ASSERT(secret != NULL); 4517*3117ece4Schristos XXH_ASSERT(4 <= len && len <= 8); 4518*3117ece4Schristos seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32; 4519*3117ece4Schristos { xxh_u32 const input1 = XXH_readLE32(input); 4520*3117ece4Schristos xxh_u32 const input2 = XXH_readLE32(input + len - 4); 4521*3117ece4Schristos xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed; 4522*3117ece4Schristos xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32); 4523*3117ece4Schristos xxh_u64 const keyed = input64 ^ bitflip; 4524*3117ece4Schristos return XXH3_rrmxmx(keyed, len); 4525*3117ece4Schristos } 4526*3117ece4Schristos } 4527*3117ece4Schristos 4528*3117ece4Schristos XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t 4529*3117ece4Schristos XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) 4530*3117ece4Schristos { 4531*3117ece4Schristos XXH_ASSERT(input != NULL); 4532*3117ece4Schristos XXH_ASSERT(secret != NULL); 4533*3117ece4Schristos XXH_ASSERT(9 <= len && len <= 16); 4534*3117ece4Schristos { xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed; 4535*3117ece4Schristos xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed; 4536*3117ece4Schristos xxh_u64 const input_lo = XXH_readLE64(input) ^ bitflip1; 4537*3117ece4Schristos xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2; 4538*3117ece4Schristos xxh_u64 const acc = len 4539*3117ece4Schristos + XXH_swap64(input_lo) + input_hi 4540*3117ece4Schristos + XXH3_mul128_fold64(input_lo, input_hi); 4541*3117ece4Schristos return XXH3_avalanche(acc); 4542*3117ece4Schristos } 4543*3117ece4Schristos } 4544*3117ece4Schristos 4545*3117ece4Schristos XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t 4546*3117ece4Schristos XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) 4547*3117ece4Schristos { 4548*3117ece4Schristos XXH_ASSERT(len <= 16); 4549*3117ece4Schristos { if (XXH_likely(len > 8)) return XXH3_len_9to16_64b(input, len, secret, seed); 4550*3117ece4Schristos if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed); 4551*3117ece4Schristos if (len) return XXH3_len_1to3_64b(input, len, secret, seed); 4552*3117ece4Schristos return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64))); 4553*3117ece4Schristos } 4554*3117ece4Schristos } 4555*3117ece4Schristos 4556*3117ece4Schristos /* 4557*3117ece4Schristos * DISCLAIMER: There are known *seed-dependent* multicollisions here due to 4558*3117ece4Schristos * multiplication by zero, affecting hashes of lengths 17 to 240. 4559*3117ece4Schristos * 4560*3117ece4Schristos * However, they are very unlikely. 4561*3117ece4Schristos * 4562*3117ece4Schristos * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all 4563*3117ece4Schristos * unseeded non-cryptographic hashes, it does not attempt to defend itself 4564*3117ece4Schristos * against specially crafted inputs, only random inputs. 4565*3117ece4Schristos * 4566*3117ece4Schristos * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes 4567*3117ece4Schristos * cancelling out the secret is taken an arbitrary number of times (addressed 4568*3117ece4Schristos * in XXH3_accumulate_512), this collision is very unlikely with random inputs 4569*3117ece4Schristos * and/or proper seeding: 4570*3117ece4Schristos * 4571*3117ece4Schristos * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a 4572*3117ece4Schristos * function that is only called up to 16 times per hash with up to 240 bytes of 4573*3117ece4Schristos * input. 4574*3117ece4Schristos * 4575*3117ece4Schristos * This is not too bad for a non-cryptographic hash function, especially with 4576*3117ece4Schristos * only 64 bit outputs. 4577*3117ece4Schristos * 4578*3117ece4Schristos * The 128-bit variant (which trades some speed for strength) is NOT affected 4579*3117ece4Schristos * by this, although it is always a good idea to use a proper seed if you care 4580*3117ece4Schristos * about strength. 4581*3117ece4Schristos */ 4582*3117ece4Schristos XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input, 4583*3117ece4Schristos const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64) 4584*3117ece4Schristos { 4585*3117ece4Schristos #if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ 4586*3117ece4Schristos && defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */ \ 4587*3117ece4Schristos && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable like XXH32 hack */ 4588*3117ece4Schristos /* 4589*3117ece4Schristos * UGLY HACK: 4590*3117ece4Schristos * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in 4591*3117ece4Schristos * slower code. 4592*3117ece4Schristos * 4593*3117ece4Schristos * By forcing seed64 into a register, we disrupt the cost model and 4594*3117ece4Schristos * cause it to scalarize. See `XXH32_round()` 4595*3117ece4Schristos * 4596*3117ece4Schristos * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600, 4597*3117ece4Schristos * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on 4598*3117ece4Schristos * GCC 9.2, despite both emitting scalar code. 4599*3117ece4Schristos * 4600*3117ece4Schristos * GCC generates much better scalar code than Clang for the rest of XXH3, 4601*3117ece4Schristos * which is why finding a more optimal codepath is an interest. 4602*3117ece4Schristos */ 4603*3117ece4Schristos XXH_COMPILER_GUARD(seed64); 4604*3117ece4Schristos #endif 4605*3117ece4Schristos { xxh_u64 const input_lo = XXH_readLE64(input); 4606*3117ece4Schristos xxh_u64 const input_hi = XXH_readLE64(input+8); 4607*3117ece4Schristos return XXH3_mul128_fold64( 4608*3117ece4Schristos input_lo ^ (XXH_readLE64(secret) + seed64), 4609*3117ece4Schristos input_hi ^ (XXH_readLE64(secret+8) - seed64) 4610*3117ece4Schristos ); 4611*3117ece4Schristos } 4612*3117ece4Schristos } 4613*3117ece4Schristos 4614*3117ece4Schristos /* For mid range keys, XXH3 uses a Mum-hash variant. */ 4615*3117ece4Schristos XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t 4616*3117ece4Schristos XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len, 4617*3117ece4Schristos const xxh_u8* XXH_RESTRICT secret, size_t secretSize, 4618*3117ece4Schristos XXH64_hash_t seed) 4619*3117ece4Schristos { 4620*3117ece4Schristos XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; 4621*3117ece4Schristos XXH_ASSERT(16 < len && len <= 128); 4622*3117ece4Schristos 4623*3117ece4Schristos { xxh_u64 acc = len * XXH_PRIME64_1; 4624*3117ece4Schristos #if XXH_SIZE_OPT >= 1 4625*3117ece4Schristos /* Smaller and cleaner, but slightly slower. */ 4626*3117ece4Schristos unsigned int i = (unsigned int)(len - 1) / 32; 4627*3117ece4Schristos do { 4628*3117ece4Schristos acc += XXH3_mix16B(input+16 * i, secret+32*i, seed); 4629*3117ece4Schristos acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed); 4630*3117ece4Schristos } while (i-- != 0); 4631*3117ece4Schristos #else 4632*3117ece4Schristos if (len > 32) { 4633*3117ece4Schristos if (len > 64) { 4634*3117ece4Schristos if (len > 96) { 4635*3117ece4Schristos acc += XXH3_mix16B(input+48, secret+96, seed); 4636*3117ece4Schristos acc += XXH3_mix16B(input+len-64, secret+112, seed); 4637*3117ece4Schristos } 4638*3117ece4Schristos acc += XXH3_mix16B(input+32, secret+64, seed); 4639*3117ece4Schristos acc += XXH3_mix16B(input+len-48, secret+80, seed); 4640*3117ece4Schristos } 4641*3117ece4Schristos acc += XXH3_mix16B(input+16, secret+32, seed); 4642*3117ece4Schristos acc += XXH3_mix16B(input+len-32, secret+48, seed); 4643*3117ece4Schristos } 4644*3117ece4Schristos acc += XXH3_mix16B(input+0, secret+0, seed); 4645*3117ece4Schristos acc += XXH3_mix16B(input+len-16, secret+16, seed); 4646*3117ece4Schristos #endif 4647*3117ece4Schristos return XXH3_avalanche(acc); 4648*3117ece4Schristos } 4649*3117ece4Schristos } 4650*3117ece4Schristos 4651*3117ece4Schristos /*! 4652*3117ece4Schristos * @brief Maximum size of "short" key in bytes. 4653*3117ece4Schristos */ 4654*3117ece4Schristos #define XXH3_MIDSIZE_MAX 240 4655*3117ece4Schristos 4656*3117ece4Schristos XXH_NO_INLINE XXH_PUREF XXH64_hash_t 4657*3117ece4Schristos XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len, 4658*3117ece4Schristos const xxh_u8* XXH_RESTRICT secret, size_t secretSize, 4659*3117ece4Schristos XXH64_hash_t seed) 4660*3117ece4Schristos { 4661*3117ece4Schristos XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; 4662*3117ece4Schristos XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); 4663*3117ece4Schristos 4664*3117ece4Schristos #define XXH3_MIDSIZE_STARTOFFSET 3 4665*3117ece4Schristos #define XXH3_MIDSIZE_LASTOFFSET 17 4666*3117ece4Schristos 4667*3117ece4Schristos { xxh_u64 acc = len * XXH_PRIME64_1; 4668*3117ece4Schristos xxh_u64 acc_end; 4669*3117ece4Schristos unsigned int const nbRounds = (unsigned int)len / 16; 4670*3117ece4Schristos unsigned int i; 4671*3117ece4Schristos XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); 4672*3117ece4Schristos for (i=0; i<8; i++) { 4673*3117ece4Schristos acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed); 4674*3117ece4Schristos } 4675*3117ece4Schristos /* last bytes */ 4676*3117ece4Schristos acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed); 4677*3117ece4Schristos XXH_ASSERT(nbRounds >= 8); 4678*3117ece4Schristos acc = XXH3_avalanche(acc); 4679*3117ece4Schristos #if defined(__clang__) /* Clang */ \ 4680*3117ece4Schristos && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ 4681*3117ece4Schristos && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ 4682*3117ece4Schristos /* 4683*3117ece4Schristos * UGLY HACK: 4684*3117ece4Schristos * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86. 4685*3117ece4Schristos * In everywhere else, it uses scalar code. 4686*3117ece4Schristos * 4687*3117ece4Schristos * For 64->128-bit multiplies, even if the NEON was 100% optimal, it 4688*3117ece4Schristos * would still be slower than UMAAL (see XXH_mult64to128). 4689*3117ece4Schristos * 4690*3117ece4Schristos * Unfortunately, Clang doesn't handle the long multiplies properly and 4691*3117ece4Schristos * converts them to the nonexistent "vmulq_u64" intrinsic, which is then 4692*3117ece4Schristos * scalarized into an ugly mess of VMOV.32 instructions. 4693*3117ece4Schristos * 4694*3117ece4Schristos * This mess is difficult to avoid without turning autovectorization 4695*3117ece4Schristos * off completely, but they are usually relatively minor and/or not 4696*3117ece4Schristos * worth it to fix. 4697*3117ece4Schristos * 4698*3117ece4Schristos * This loop is the easiest to fix, as unlike XXH32, this pragma 4699*3117ece4Schristos * _actually works_ because it is a loop vectorization instead of an 4700*3117ece4Schristos * SLP vectorization. 4701*3117ece4Schristos */ 4702*3117ece4Schristos #pragma clang loop vectorize(disable) 4703*3117ece4Schristos #endif 4704*3117ece4Schristos for (i=8 ; i < nbRounds; i++) { 4705*3117ece4Schristos /* 4706*3117ece4Schristos * Prevents clang for unrolling the acc loop and interleaving with this one. 4707*3117ece4Schristos */ 4708*3117ece4Schristos XXH_COMPILER_GUARD(acc); 4709*3117ece4Schristos acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed); 4710*3117ece4Schristos } 4711*3117ece4Schristos return XXH3_avalanche(acc + acc_end); 4712*3117ece4Schristos } 4713*3117ece4Schristos } 4714*3117ece4Schristos 4715*3117ece4Schristos 4716*3117ece4Schristos /* ======= Long Keys ======= */ 4717*3117ece4Schristos 4718*3117ece4Schristos #define XXH_STRIPE_LEN 64 4719*3117ece4Schristos #define XXH_SECRET_CONSUME_RATE 8 /* nb of secret bytes consumed at each accumulation */ 4720*3117ece4Schristos #define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64)) 4721*3117ece4Schristos 4722*3117ece4Schristos #ifdef XXH_OLD_NAMES 4723*3117ece4Schristos # define STRIPE_LEN XXH_STRIPE_LEN 4724*3117ece4Schristos # define ACC_NB XXH_ACC_NB 4725*3117ece4Schristos #endif 4726*3117ece4Schristos 4727*3117ece4Schristos #ifndef XXH_PREFETCH_DIST 4728*3117ece4Schristos # ifdef __clang__ 4729*3117ece4Schristos # define XXH_PREFETCH_DIST 320 4730*3117ece4Schristos # else 4731*3117ece4Schristos # if (XXH_VECTOR == XXH_AVX512) 4732*3117ece4Schristos # define XXH_PREFETCH_DIST 512 4733*3117ece4Schristos # else 4734*3117ece4Schristos # define XXH_PREFETCH_DIST 384 4735*3117ece4Schristos # endif 4736*3117ece4Schristos # endif /* __clang__ */ 4737*3117ece4Schristos #endif /* XXH_PREFETCH_DIST */ 4738*3117ece4Schristos 4739*3117ece4Schristos /* 4740*3117ece4Schristos * These macros are to generate an XXH3_accumulate() function. 4741*3117ece4Schristos * The two arguments select the name suffix and target attribute. 4742*3117ece4Schristos * 4743*3117ece4Schristos * The name of this symbol is XXH3_accumulate_<name>() and it calls 4744*3117ece4Schristos * XXH3_accumulate_512_<name>(). 4745*3117ece4Schristos * 4746*3117ece4Schristos * It may be useful to hand implement this function if the compiler fails to 4747*3117ece4Schristos * optimize the inline function. 4748*3117ece4Schristos */ 4749*3117ece4Schristos #define XXH3_ACCUMULATE_TEMPLATE(name) \ 4750*3117ece4Schristos void \ 4751*3117ece4Schristos XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc, \ 4752*3117ece4Schristos const xxh_u8* XXH_RESTRICT input, \ 4753*3117ece4Schristos const xxh_u8* XXH_RESTRICT secret, \ 4754*3117ece4Schristos size_t nbStripes) \ 4755*3117ece4Schristos { \ 4756*3117ece4Schristos size_t n; \ 4757*3117ece4Schristos for (n = 0; n < nbStripes; n++ ) { \ 4758*3117ece4Schristos const xxh_u8* const in = input + n*XXH_STRIPE_LEN; \ 4759*3117ece4Schristos XXH_PREFETCH(in + XXH_PREFETCH_DIST); \ 4760*3117ece4Schristos XXH3_accumulate_512_##name( \ 4761*3117ece4Schristos acc, \ 4762*3117ece4Schristos in, \ 4763*3117ece4Schristos secret + n*XXH_SECRET_CONSUME_RATE); \ 4764*3117ece4Schristos } \ 4765*3117ece4Schristos } 4766*3117ece4Schristos 4767*3117ece4Schristos 4768*3117ece4Schristos XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64) 4769*3117ece4Schristos { 4770*3117ece4Schristos if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64); 4771*3117ece4Schristos XXH_memcpy(dst, &v64, sizeof(v64)); 4772*3117ece4Schristos } 4773*3117ece4Schristos 4774*3117ece4Schristos /* Several intrinsic functions below are supposed to accept __int64 as argument, 4775*3117ece4Schristos * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ . 4776*3117ece4Schristos * However, several environments do not define __int64 type, 4777*3117ece4Schristos * requiring a workaround. 4778*3117ece4Schristos */ 4779*3117ece4Schristos #if !defined (__VMS) \ 4780*3117ece4Schristos && (defined (__cplusplus) \ 4781*3117ece4Schristos || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) 4782*3117ece4Schristos typedef int64_t xxh_i64; 4783*3117ece4Schristos #else 4784*3117ece4Schristos /* the following type must have a width of 64-bit */ 4785*3117ece4Schristos typedef long long xxh_i64; 4786*3117ece4Schristos #endif 4787*3117ece4Schristos 4788*3117ece4Schristos 4789*3117ece4Schristos /* 4790*3117ece4Schristos * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized. 4791*3117ece4Schristos * 4792*3117ece4Schristos * It is a hardened version of UMAC, based off of FARSH's implementation. 4793*3117ece4Schristos * 4794*3117ece4Schristos * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD 4795*3117ece4Schristos * implementations, and it is ridiculously fast. 4796*3117ece4Schristos * 4797*3117ece4Schristos * We harden it by mixing the original input to the accumulators as well as the product. 4798*3117ece4Schristos * 4799*3117ece4Schristos * This means that in the (relatively likely) case of a multiply by zero, the 4800*3117ece4Schristos * original input is preserved. 4801*3117ece4Schristos * 4802*3117ece4Schristos * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve 4803*3117ece4Schristos * cross-pollination, as otherwise the upper and lower halves would be 4804*3117ece4Schristos * essentially independent. 4805*3117ece4Schristos * 4806*3117ece4Schristos * This doesn't matter on 64-bit hashes since they all get merged together in 4807*3117ece4Schristos * the end, so we skip the extra step. 4808*3117ece4Schristos * 4809*3117ece4Schristos * Both XXH3_64bits and XXH3_128bits use this subroutine. 4810*3117ece4Schristos */ 4811*3117ece4Schristos 4812*3117ece4Schristos #if (XXH_VECTOR == XXH_AVX512) \ 4813*3117ece4Schristos || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0) 4814*3117ece4Schristos 4815*3117ece4Schristos #ifndef XXH_TARGET_AVX512 4816*3117ece4Schristos # define XXH_TARGET_AVX512 /* disable attribute target */ 4817*3117ece4Schristos #endif 4818*3117ece4Schristos 4819*3117ece4Schristos XXH_FORCE_INLINE XXH_TARGET_AVX512 void 4820*3117ece4Schristos XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc, 4821*3117ece4Schristos const void* XXH_RESTRICT input, 4822*3117ece4Schristos const void* XXH_RESTRICT secret) 4823*3117ece4Schristos { 4824*3117ece4Schristos __m512i* const xacc = (__m512i *) acc; 4825*3117ece4Schristos XXH_ASSERT((((size_t)acc) & 63) == 0); 4826*3117ece4Schristos XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i)); 4827*3117ece4Schristos 4828*3117ece4Schristos { 4829*3117ece4Schristos /* data_vec = input[0]; */ 4830*3117ece4Schristos __m512i const data_vec = _mm512_loadu_si512 (input); 4831*3117ece4Schristos /* key_vec = secret[0]; */ 4832*3117ece4Schristos __m512i const key_vec = _mm512_loadu_si512 (secret); 4833*3117ece4Schristos /* data_key = data_vec ^ key_vec; */ 4834*3117ece4Schristos __m512i const data_key = _mm512_xor_si512 (data_vec, key_vec); 4835*3117ece4Schristos /* data_key_lo = data_key >> 32; */ 4836*3117ece4Schristos __m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32); 4837*3117ece4Schristos /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ 4838*3117ece4Schristos __m512i const product = _mm512_mul_epu32 (data_key, data_key_lo); 4839*3117ece4Schristos /* xacc[0] += swap(data_vec); */ 4840*3117ece4Schristos __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2)); 4841*3117ece4Schristos __m512i const sum = _mm512_add_epi64(*xacc, data_swap); 4842*3117ece4Schristos /* xacc[0] += product; */ 4843*3117ece4Schristos *xacc = _mm512_add_epi64(product, sum); 4844*3117ece4Schristos } 4845*3117ece4Schristos } 4846*3117ece4Schristos XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512) 4847*3117ece4Schristos 4848*3117ece4Schristos /* 4849*3117ece4Schristos * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing. 4850*3117ece4Schristos * 4851*3117ece4Schristos * Multiplication isn't perfect, as explained by Google in HighwayHash: 4852*3117ece4Schristos * 4853*3117ece4Schristos * // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to 4854*3117ece4Schristos * // varying degrees. In descending order of goodness, bytes 4855*3117ece4Schristos * // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32. 4856*3117ece4Schristos * // As expected, the upper and lower bytes are much worse. 4857*3117ece4Schristos * 4858*3117ece4Schristos * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291 4859*3117ece4Schristos * 4860*3117ece4Schristos * Since our algorithm uses a pseudorandom secret to add some variance into the 4861*3117ece4Schristos * mix, we don't need to (or want to) mix as often or as much as HighwayHash does. 4862*3117ece4Schristos * 4863*3117ece4Schristos * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid 4864*3117ece4Schristos * extraction. 4865*3117ece4Schristos * 4866*3117ece4Schristos * Both XXH3_64bits and XXH3_128bits use this subroutine. 4867*3117ece4Schristos */ 4868*3117ece4Schristos 4869*3117ece4Schristos XXH_FORCE_INLINE XXH_TARGET_AVX512 void 4870*3117ece4Schristos XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) 4871*3117ece4Schristos { 4872*3117ece4Schristos XXH_ASSERT((((size_t)acc) & 63) == 0); 4873*3117ece4Schristos XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i)); 4874*3117ece4Schristos { __m512i* const xacc = (__m512i*) acc; 4875*3117ece4Schristos const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1); 4876*3117ece4Schristos 4877*3117ece4Schristos /* xacc[0] ^= (xacc[0] >> 47) */ 4878*3117ece4Schristos __m512i const acc_vec = *xacc; 4879*3117ece4Schristos __m512i const shifted = _mm512_srli_epi64 (acc_vec, 47); 4880*3117ece4Schristos /* xacc[0] ^= secret; */ 4881*3117ece4Schristos __m512i const key_vec = _mm512_loadu_si512 (secret); 4882*3117ece4Schristos __m512i const data_key = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */); 4883*3117ece4Schristos 4884*3117ece4Schristos /* xacc[0] *= XXH_PRIME32_1; */ 4885*3117ece4Schristos __m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32); 4886*3117ece4Schristos __m512i const prod_lo = _mm512_mul_epu32 (data_key, prime32); 4887*3117ece4Schristos __m512i const prod_hi = _mm512_mul_epu32 (data_key_hi, prime32); 4888*3117ece4Schristos *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32)); 4889*3117ece4Schristos } 4890*3117ece4Schristos } 4891*3117ece4Schristos 4892*3117ece4Schristos XXH_FORCE_INLINE XXH_TARGET_AVX512 void 4893*3117ece4Schristos XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64) 4894*3117ece4Schristos { 4895*3117ece4Schristos XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0); 4896*3117ece4Schristos XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64); 4897*3117ece4Schristos XXH_ASSERT(((size_t)customSecret & 63) == 0); 4898*3117ece4Schristos (void)(&XXH_writeLE64); 4899*3117ece4Schristos { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i); 4900*3117ece4Schristos __m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64); 4901*3117ece4Schristos __m512i const seed = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos); 4902*3117ece4Schristos 4903*3117ece4Schristos const __m512i* const src = (const __m512i*) ((const void*) XXH3_kSecret); 4904*3117ece4Schristos __m512i* const dest = ( __m512i*) customSecret; 4905*3117ece4Schristos int i; 4906*3117ece4Schristos XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */ 4907*3117ece4Schristos XXH_ASSERT(((size_t)dest & 63) == 0); 4908*3117ece4Schristos for (i=0; i < nbRounds; ++i) { 4909*3117ece4Schristos dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed); 4910*3117ece4Schristos } } 4911*3117ece4Schristos } 4912*3117ece4Schristos 4913*3117ece4Schristos #endif 4914*3117ece4Schristos 4915*3117ece4Schristos #if (XXH_VECTOR == XXH_AVX2) \ 4916*3117ece4Schristos || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0) 4917*3117ece4Schristos 4918*3117ece4Schristos #ifndef XXH_TARGET_AVX2 4919*3117ece4Schristos # define XXH_TARGET_AVX2 /* disable attribute target */ 4920*3117ece4Schristos #endif 4921*3117ece4Schristos 4922*3117ece4Schristos XXH_FORCE_INLINE XXH_TARGET_AVX2 void 4923*3117ece4Schristos XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc, 4924*3117ece4Schristos const void* XXH_RESTRICT input, 4925*3117ece4Schristos const void* XXH_RESTRICT secret) 4926*3117ece4Schristos { 4927*3117ece4Schristos XXH_ASSERT((((size_t)acc) & 31) == 0); 4928*3117ece4Schristos { __m256i* const xacc = (__m256i *) acc; 4929*3117ece4Schristos /* Unaligned. This is mainly for pointer arithmetic, and because 4930*3117ece4Schristos * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ 4931*3117ece4Schristos const __m256i* const xinput = (const __m256i *) input; 4932*3117ece4Schristos /* Unaligned. This is mainly for pointer arithmetic, and because 4933*3117ece4Schristos * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ 4934*3117ece4Schristos const __m256i* const xsecret = (const __m256i *) secret; 4935*3117ece4Schristos 4936*3117ece4Schristos size_t i; 4937*3117ece4Schristos for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) { 4938*3117ece4Schristos /* data_vec = xinput[i]; */ 4939*3117ece4Schristos __m256i const data_vec = _mm256_loadu_si256 (xinput+i); 4940*3117ece4Schristos /* key_vec = xsecret[i]; */ 4941*3117ece4Schristos __m256i const key_vec = _mm256_loadu_si256 (xsecret+i); 4942*3117ece4Schristos /* data_key = data_vec ^ key_vec; */ 4943*3117ece4Schristos __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec); 4944*3117ece4Schristos /* data_key_lo = data_key >> 32; */ 4945*3117ece4Schristos __m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32); 4946*3117ece4Schristos /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ 4947*3117ece4Schristos __m256i const product = _mm256_mul_epu32 (data_key, data_key_lo); 4948*3117ece4Schristos /* xacc[i] += swap(data_vec); */ 4949*3117ece4Schristos __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2)); 4950*3117ece4Schristos __m256i const sum = _mm256_add_epi64(xacc[i], data_swap); 4951*3117ece4Schristos /* xacc[i] += product; */ 4952*3117ece4Schristos xacc[i] = _mm256_add_epi64(product, sum); 4953*3117ece4Schristos } } 4954*3117ece4Schristos } 4955*3117ece4Schristos XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2) 4956*3117ece4Schristos 4957*3117ece4Schristos XXH_FORCE_INLINE XXH_TARGET_AVX2 void 4958*3117ece4Schristos XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) 4959*3117ece4Schristos { 4960*3117ece4Schristos XXH_ASSERT((((size_t)acc) & 31) == 0); 4961*3117ece4Schristos { __m256i* const xacc = (__m256i*) acc; 4962*3117ece4Schristos /* Unaligned. This is mainly for pointer arithmetic, and because 4963*3117ece4Schristos * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ 4964*3117ece4Schristos const __m256i* const xsecret = (const __m256i *) secret; 4965*3117ece4Schristos const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1); 4966*3117ece4Schristos 4967*3117ece4Schristos size_t i; 4968*3117ece4Schristos for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) { 4969*3117ece4Schristos /* xacc[i] ^= (xacc[i] >> 47) */ 4970*3117ece4Schristos __m256i const acc_vec = xacc[i]; 4971*3117ece4Schristos __m256i const shifted = _mm256_srli_epi64 (acc_vec, 47); 4972*3117ece4Schristos __m256i const data_vec = _mm256_xor_si256 (acc_vec, shifted); 4973*3117ece4Schristos /* xacc[i] ^= xsecret; */ 4974*3117ece4Schristos __m256i const key_vec = _mm256_loadu_si256 (xsecret+i); 4975*3117ece4Schristos __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec); 4976*3117ece4Schristos 4977*3117ece4Schristos /* xacc[i] *= XXH_PRIME32_1; */ 4978*3117ece4Schristos __m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32); 4979*3117ece4Schristos __m256i const prod_lo = _mm256_mul_epu32 (data_key, prime32); 4980*3117ece4Schristos __m256i const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32); 4981*3117ece4Schristos xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32)); 4982*3117ece4Schristos } 4983*3117ece4Schristos } 4984*3117ece4Schristos } 4985*3117ece4Schristos 4986*3117ece4Schristos XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64) 4987*3117ece4Schristos { 4988*3117ece4Schristos XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0); 4989*3117ece4Schristos XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6); 4990*3117ece4Schristos XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64); 4991*3117ece4Schristos (void)(&XXH_writeLE64); 4992*3117ece4Schristos XXH_PREFETCH(customSecret); 4993*3117ece4Schristos { __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64); 4994*3117ece4Schristos 4995*3117ece4Schristos const __m256i* const src = (const __m256i*) ((const void*) XXH3_kSecret); 4996*3117ece4Schristos __m256i* dest = ( __m256i*) customSecret; 4997*3117ece4Schristos 4998*3117ece4Schristos # if defined(__GNUC__) || defined(__clang__) 4999*3117ece4Schristos /* 5000*3117ece4Schristos * On GCC & Clang, marking 'dest' as modified will cause the compiler: 5001*3117ece4Schristos * - do not extract the secret from sse registers in the internal loop 5002*3117ece4Schristos * - use less common registers, and avoid pushing these reg into stack 5003*3117ece4Schristos */ 5004*3117ece4Schristos XXH_COMPILER_GUARD(dest); 5005*3117ece4Schristos # endif 5006*3117ece4Schristos XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */ 5007*3117ece4Schristos XXH_ASSERT(((size_t)dest & 31) == 0); 5008*3117ece4Schristos 5009*3117ece4Schristos /* GCC -O2 need unroll loop manually */ 5010*3117ece4Schristos dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed); 5011*3117ece4Schristos dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed); 5012*3117ece4Schristos dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed); 5013*3117ece4Schristos dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed); 5014*3117ece4Schristos dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed); 5015*3117ece4Schristos dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed); 5016*3117ece4Schristos } 5017*3117ece4Schristos } 5018*3117ece4Schristos 5019*3117ece4Schristos #endif 5020*3117ece4Schristos 5021*3117ece4Schristos /* x86dispatch always generates SSE2 */ 5022*3117ece4Schristos #if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH) 5023*3117ece4Schristos 5024*3117ece4Schristos #ifndef XXH_TARGET_SSE2 5025*3117ece4Schristos # define XXH_TARGET_SSE2 /* disable attribute target */ 5026*3117ece4Schristos #endif 5027*3117ece4Schristos 5028*3117ece4Schristos XXH_FORCE_INLINE XXH_TARGET_SSE2 void 5029*3117ece4Schristos XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc, 5030*3117ece4Schristos const void* XXH_RESTRICT input, 5031*3117ece4Schristos const void* XXH_RESTRICT secret) 5032*3117ece4Schristos { 5033*3117ece4Schristos /* SSE2 is just a half-scale version of the AVX2 version. */ 5034*3117ece4Schristos XXH_ASSERT((((size_t)acc) & 15) == 0); 5035*3117ece4Schristos { __m128i* const xacc = (__m128i *) acc; 5036*3117ece4Schristos /* Unaligned. This is mainly for pointer arithmetic, and because 5037*3117ece4Schristos * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ 5038*3117ece4Schristos const __m128i* const xinput = (const __m128i *) input; 5039*3117ece4Schristos /* Unaligned. This is mainly for pointer arithmetic, and because 5040*3117ece4Schristos * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ 5041*3117ece4Schristos const __m128i* const xsecret = (const __m128i *) secret; 5042*3117ece4Schristos 5043*3117ece4Schristos size_t i; 5044*3117ece4Schristos for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) { 5045*3117ece4Schristos /* data_vec = xinput[i]; */ 5046*3117ece4Schristos __m128i const data_vec = _mm_loadu_si128 (xinput+i); 5047*3117ece4Schristos /* key_vec = xsecret[i]; */ 5048*3117ece4Schristos __m128i const key_vec = _mm_loadu_si128 (xsecret+i); 5049*3117ece4Schristos /* data_key = data_vec ^ key_vec; */ 5050*3117ece4Schristos __m128i const data_key = _mm_xor_si128 (data_vec, key_vec); 5051*3117ece4Schristos /* data_key_lo = data_key >> 32; */ 5052*3117ece4Schristos __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1)); 5053*3117ece4Schristos /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ 5054*3117ece4Schristos __m128i const product = _mm_mul_epu32 (data_key, data_key_lo); 5055*3117ece4Schristos /* xacc[i] += swap(data_vec); */ 5056*3117ece4Schristos __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2)); 5057*3117ece4Schristos __m128i const sum = _mm_add_epi64(xacc[i], data_swap); 5058*3117ece4Schristos /* xacc[i] += product; */ 5059*3117ece4Schristos xacc[i] = _mm_add_epi64(product, sum); 5060*3117ece4Schristos } } 5061*3117ece4Schristos } 5062*3117ece4Schristos XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2) 5063*3117ece4Schristos 5064*3117ece4Schristos XXH_FORCE_INLINE XXH_TARGET_SSE2 void 5065*3117ece4Schristos XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) 5066*3117ece4Schristos { 5067*3117ece4Schristos XXH_ASSERT((((size_t)acc) & 15) == 0); 5068*3117ece4Schristos { __m128i* const xacc = (__m128i*) acc; 5069*3117ece4Schristos /* Unaligned. This is mainly for pointer arithmetic, and because 5070*3117ece4Schristos * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ 5071*3117ece4Schristos const __m128i* const xsecret = (const __m128i *) secret; 5072*3117ece4Schristos const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1); 5073*3117ece4Schristos 5074*3117ece4Schristos size_t i; 5075*3117ece4Schristos for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) { 5076*3117ece4Schristos /* xacc[i] ^= (xacc[i] >> 47) */ 5077*3117ece4Schristos __m128i const acc_vec = xacc[i]; 5078*3117ece4Schristos __m128i const shifted = _mm_srli_epi64 (acc_vec, 47); 5079*3117ece4Schristos __m128i const data_vec = _mm_xor_si128 (acc_vec, shifted); 5080*3117ece4Schristos /* xacc[i] ^= xsecret[i]; */ 5081*3117ece4Schristos __m128i const key_vec = _mm_loadu_si128 (xsecret+i); 5082*3117ece4Schristos __m128i const data_key = _mm_xor_si128 (data_vec, key_vec); 5083*3117ece4Schristos 5084*3117ece4Schristos /* xacc[i] *= XXH_PRIME32_1; */ 5085*3117ece4Schristos __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1)); 5086*3117ece4Schristos __m128i const prod_lo = _mm_mul_epu32 (data_key, prime32); 5087*3117ece4Schristos __m128i const prod_hi = _mm_mul_epu32 (data_key_hi, prime32); 5088*3117ece4Schristos xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32)); 5089*3117ece4Schristos } 5090*3117ece4Schristos } 5091*3117ece4Schristos } 5092*3117ece4Schristos 5093*3117ece4Schristos XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64) 5094*3117ece4Schristos { 5095*3117ece4Schristos XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0); 5096*3117ece4Schristos (void)(&XXH_writeLE64); 5097*3117ece4Schristos { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i); 5098*3117ece4Schristos 5099*3117ece4Schristos # if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900 5100*3117ece4Schristos /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */ 5101*3117ece4Schristos XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) }; 5102*3117ece4Schristos __m128i const seed = _mm_load_si128((__m128i const*)seed64x2); 5103*3117ece4Schristos # else 5104*3117ece4Schristos __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64); 5105*3117ece4Schristos # endif 5106*3117ece4Schristos int i; 5107*3117ece4Schristos 5108*3117ece4Schristos const void* const src16 = XXH3_kSecret; 5109*3117ece4Schristos __m128i* dst16 = (__m128i*) customSecret; 5110*3117ece4Schristos # if defined(__GNUC__) || defined(__clang__) 5111*3117ece4Schristos /* 5112*3117ece4Schristos * On GCC & Clang, marking 'dest' as modified will cause the compiler: 5113*3117ece4Schristos * - do not extract the secret from sse registers in the internal loop 5114*3117ece4Schristos * - use less common registers, and avoid pushing these reg into stack 5115*3117ece4Schristos */ 5116*3117ece4Schristos XXH_COMPILER_GUARD(dst16); 5117*3117ece4Schristos # endif 5118*3117ece4Schristos XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */ 5119*3117ece4Schristos XXH_ASSERT(((size_t)dst16 & 15) == 0); 5120*3117ece4Schristos 5121*3117ece4Schristos for (i=0; i < nbRounds; ++i) { 5122*3117ece4Schristos dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed); 5123*3117ece4Schristos } } 5124*3117ece4Schristos } 5125*3117ece4Schristos 5126*3117ece4Schristos #endif 5127*3117ece4Schristos 5128*3117ece4Schristos #if (XXH_VECTOR == XXH_NEON) 5129*3117ece4Schristos 5130*3117ece4Schristos /* forward declarations for the scalar routines */ 5131*3117ece4Schristos XXH_FORCE_INLINE void 5132*3117ece4Schristos XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input, 5133*3117ece4Schristos void const* XXH_RESTRICT secret, size_t lane); 5134*3117ece4Schristos 5135*3117ece4Schristos XXH_FORCE_INLINE void 5136*3117ece4Schristos XXH3_scalarScrambleRound(void* XXH_RESTRICT acc, 5137*3117ece4Schristos void const* XXH_RESTRICT secret, size_t lane); 5138*3117ece4Schristos 5139*3117ece4Schristos /*! 5140*3117ece4Schristos * @internal 5141*3117ece4Schristos * @brief The bulk processing loop for NEON and WASM SIMD128. 5142*3117ece4Schristos * 5143*3117ece4Schristos * The NEON code path is actually partially scalar when running on AArch64. This 5144*3117ece4Schristos * is to optimize the pipelining and can have up to 15% speedup depending on the 5145*3117ece4Schristos * CPU, and it also mitigates some GCC codegen issues. 5146*3117ece4Schristos * 5147*3117ece4Schristos * @see XXH3_NEON_LANES for configuring this and details about this optimization. 5148*3117ece4Schristos * 5149*3117ece4Schristos * NEON's 32-bit to 64-bit long multiply takes a half vector of 32-bit 5150*3117ece4Schristos * integers instead of the other platforms which mask full 64-bit vectors, 5151*3117ece4Schristos * so the setup is more complicated than just shifting right. 5152*3117ece4Schristos * 5153*3117ece4Schristos * Additionally, there is an optimization for 4 lanes at once noted below. 5154*3117ece4Schristos * 5155*3117ece4Schristos * Since, as stated, the most optimal amount of lanes for Cortexes is 6, 5156*3117ece4Schristos * there needs to be *three* versions of the accumulate operation used 5157*3117ece4Schristos * for the remaining 2 lanes. 5158*3117ece4Schristos * 5159*3117ece4Schristos * WASM's SIMD128 uses SIMDe's arm_neon.h polyfill because the intrinsics overlap 5160*3117ece4Schristos * nearly perfectly. 5161*3117ece4Schristos */ 5162*3117ece4Schristos 5163*3117ece4Schristos XXH_FORCE_INLINE void 5164*3117ece4Schristos XXH3_accumulate_512_neon( void* XXH_RESTRICT acc, 5165*3117ece4Schristos const void* XXH_RESTRICT input, 5166*3117ece4Schristos const void* XXH_RESTRICT secret) 5167*3117ece4Schristos { 5168*3117ece4Schristos XXH_ASSERT((((size_t)acc) & 15) == 0); 5169*3117ece4Schristos XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0); 5170*3117ece4Schristos { /* GCC for darwin arm64 does not like aliasing here */ 5171*3117ece4Schristos xxh_aliasing_uint64x2_t* const xacc = (xxh_aliasing_uint64x2_t*) acc; 5172*3117ece4Schristos /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */ 5173*3117ece4Schristos uint8_t const* xinput = (const uint8_t *) input; 5174*3117ece4Schristos uint8_t const* xsecret = (const uint8_t *) secret; 5175*3117ece4Schristos 5176*3117ece4Schristos size_t i; 5177*3117ece4Schristos #ifdef __wasm_simd128__ 5178*3117ece4Schristos /* 5179*3117ece4Schristos * On WASM SIMD128, Clang emits direct address loads when XXH3_kSecret 5180*3117ece4Schristos * is constant propagated, which results in it converting it to this 5181*3117ece4Schristos * inside the loop: 5182*3117ece4Schristos * 5183*3117ece4Schristos * a = v128.load(XXH3_kSecret + 0 + $secret_offset, offset = 0) 5184*3117ece4Schristos * b = v128.load(XXH3_kSecret + 16 + $secret_offset, offset = 0) 5185*3117ece4Schristos * ... 5186*3117ece4Schristos * 5187*3117ece4Schristos * This requires a full 32-bit address immediate (and therefore a 6 byte 5188*3117ece4Schristos * instruction) as well as an add for each offset. 5189*3117ece4Schristos * 5190*3117ece4Schristos * Putting an asm guard prevents it from folding (at the cost of losing 5191*3117ece4Schristos * the alignment hint), and uses the free offset in `v128.load` instead 5192*3117ece4Schristos * of adding secret_offset each time which overall reduces code size by 5193*3117ece4Schristos * about a kilobyte and improves performance. 5194*3117ece4Schristos */ 5195*3117ece4Schristos XXH_COMPILER_GUARD(xsecret); 5196*3117ece4Schristos #endif 5197*3117ece4Schristos /* Scalar lanes use the normal scalarRound routine */ 5198*3117ece4Schristos for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) { 5199*3117ece4Schristos XXH3_scalarRound(acc, input, secret, i); 5200*3117ece4Schristos } 5201*3117ece4Schristos i = 0; 5202*3117ece4Schristos /* 4 NEON lanes at a time. */ 5203*3117ece4Schristos for (; i+1 < XXH3_NEON_LANES / 2; i+=2) { 5204*3117ece4Schristos /* data_vec = xinput[i]; */ 5205*3117ece4Schristos uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput + (i * 16)); 5206*3117ece4Schristos uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput + ((i+1) * 16)); 5207*3117ece4Schristos /* key_vec = xsecret[i]; */ 5208*3117ece4Schristos uint64x2_t key_vec_1 = XXH_vld1q_u64(xsecret + (i * 16)); 5209*3117ece4Schristos uint64x2_t key_vec_2 = XXH_vld1q_u64(xsecret + ((i+1) * 16)); 5210*3117ece4Schristos /* data_swap = swap(data_vec) */ 5211*3117ece4Schristos uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1); 5212*3117ece4Schristos uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1); 5213*3117ece4Schristos /* data_key = data_vec ^ key_vec; */ 5214*3117ece4Schristos uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1); 5215*3117ece4Schristos uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2); 5216*3117ece4Schristos 5217*3117ece4Schristos /* 5218*3117ece4Schristos * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a 5219*3117ece4Schristos * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to 5220*3117ece4Schristos * get one vector with the low 32 bits of each lane, and one vector 5221*3117ece4Schristos * with the high 32 bits of each lane. 5222*3117ece4Schristos * 5223*3117ece4Schristos * The intrinsic returns a double vector because the original ARMv7-a 5224*3117ece4Schristos * instruction modified both arguments in place. AArch64 and SIMD128 emit 5225*3117ece4Schristos * two instructions from this intrinsic. 5226*3117ece4Schristos * 5227*3117ece4Schristos * [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ] 5228*3117ece4Schristos * [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ] 5229*3117ece4Schristos */ 5230*3117ece4Schristos uint32x4x2_t unzipped = vuzpq_u32( 5231*3117ece4Schristos vreinterpretq_u32_u64(data_key_1), 5232*3117ece4Schristos vreinterpretq_u32_u64(data_key_2) 5233*3117ece4Schristos ); 5234*3117ece4Schristos /* data_key_lo = data_key & 0xFFFFFFFF */ 5235*3117ece4Schristos uint32x4_t data_key_lo = unzipped.val[0]; 5236*3117ece4Schristos /* data_key_hi = data_key >> 32 */ 5237*3117ece4Schristos uint32x4_t data_key_hi = unzipped.val[1]; 5238*3117ece4Schristos /* 5239*3117ece4Schristos * Then, we can split the vectors horizontally and multiply which, as for most 5240*3117ece4Schristos * widening intrinsics, have a variant that works on both high half vectors 5241*3117ece4Schristos * for free on AArch64. A similar instruction is available on SIMD128. 5242*3117ece4Schristos * 5243*3117ece4Schristos * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi 5244*3117ece4Schristos */ 5245*3117ece4Schristos uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi); 5246*3117ece4Schristos uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi); 5247*3117ece4Schristos /* 5248*3117ece4Schristos * Clang reorders 5249*3117ece4Schristos * a += b * c; // umlal swap.2d, dkl.2s, dkh.2s 5250*3117ece4Schristos * c += a; // add acc.2d, acc.2d, swap.2d 5251*3117ece4Schristos * to 5252*3117ece4Schristos * c += a; // add acc.2d, acc.2d, swap.2d 5253*3117ece4Schristos * c += b * c; // umlal acc.2d, dkl.2s, dkh.2s 5254*3117ece4Schristos * 5255*3117ece4Schristos * While it would make sense in theory since the addition is faster, 5256*3117ece4Schristos * for reasons likely related to umlal being limited to certain NEON 5257*3117ece4Schristos * pipelines, this is worse. A compiler guard fixes this. 5258*3117ece4Schristos */ 5259*3117ece4Schristos XXH_COMPILER_GUARD_CLANG_NEON(sum_1); 5260*3117ece4Schristos XXH_COMPILER_GUARD_CLANG_NEON(sum_2); 5261*3117ece4Schristos /* xacc[i] = acc_vec + sum; */ 5262*3117ece4Schristos xacc[i] = vaddq_u64(xacc[i], sum_1); 5263*3117ece4Schristos xacc[i+1] = vaddq_u64(xacc[i+1], sum_2); 5264*3117ece4Schristos } 5265*3117ece4Schristos /* Operate on the remaining NEON lanes 2 at a time. */ 5266*3117ece4Schristos for (; i < XXH3_NEON_LANES / 2; i++) { 5267*3117ece4Schristos /* data_vec = xinput[i]; */ 5268*3117ece4Schristos uint64x2_t data_vec = XXH_vld1q_u64(xinput + (i * 16)); 5269*3117ece4Schristos /* key_vec = xsecret[i]; */ 5270*3117ece4Schristos uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16)); 5271*3117ece4Schristos /* acc_vec_2 = swap(data_vec) */ 5272*3117ece4Schristos uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1); 5273*3117ece4Schristos /* data_key = data_vec ^ key_vec; */ 5274*3117ece4Schristos uint64x2_t data_key = veorq_u64(data_vec, key_vec); 5275*3117ece4Schristos /* For two lanes, just use VMOVN and VSHRN. */ 5276*3117ece4Schristos /* data_key_lo = data_key & 0xFFFFFFFF; */ 5277*3117ece4Schristos uint32x2_t data_key_lo = vmovn_u64(data_key); 5278*3117ece4Schristos /* data_key_hi = data_key >> 32; */ 5279*3117ece4Schristos uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32); 5280*3117ece4Schristos /* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */ 5281*3117ece4Schristos uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi); 5282*3117ece4Schristos /* Same Clang workaround as before */ 5283*3117ece4Schristos XXH_COMPILER_GUARD_CLANG_NEON(sum); 5284*3117ece4Schristos /* xacc[i] = acc_vec + sum; */ 5285*3117ece4Schristos xacc[i] = vaddq_u64 (xacc[i], sum); 5286*3117ece4Schristos } 5287*3117ece4Schristos } 5288*3117ece4Schristos } 5289*3117ece4Schristos XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon) 5290*3117ece4Schristos 5291*3117ece4Schristos XXH_FORCE_INLINE void 5292*3117ece4Schristos XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) 5293*3117ece4Schristos { 5294*3117ece4Schristos XXH_ASSERT((((size_t)acc) & 15) == 0); 5295*3117ece4Schristos 5296*3117ece4Schristos { xxh_aliasing_uint64x2_t* xacc = (xxh_aliasing_uint64x2_t*) acc; 5297*3117ece4Schristos uint8_t const* xsecret = (uint8_t const*) secret; 5298*3117ece4Schristos 5299*3117ece4Schristos size_t i; 5300*3117ece4Schristos /* WASM uses operator overloads and doesn't need these. */ 5301*3117ece4Schristos #ifndef __wasm_simd128__ 5302*3117ece4Schristos /* { prime32_1, prime32_1 } */ 5303*3117ece4Schristos uint32x2_t const kPrimeLo = vdup_n_u32(XXH_PRIME32_1); 5304*3117ece4Schristos /* { 0, prime32_1, 0, prime32_1 } */ 5305*3117ece4Schristos uint32x4_t const kPrimeHi = vreinterpretq_u32_u64(vdupq_n_u64((xxh_u64)XXH_PRIME32_1 << 32)); 5306*3117ece4Schristos #endif 5307*3117ece4Schristos 5308*3117ece4Schristos /* AArch64 uses both scalar and neon at the same time */ 5309*3117ece4Schristos for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) { 5310*3117ece4Schristos XXH3_scalarScrambleRound(acc, secret, i); 5311*3117ece4Schristos } 5312*3117ece4Schristos for (i=0; i < XXH3_NEON_LANES / 2; i++) { 5313*3117ece4Schristos /* xacc[i] ^= (xacc[i] >> 47); */ 5314*3117ece4Schristos uint64x2_t acc_vec = xacc[i]; 5315*3117ece4Schristos uint64x2_t shifted = vshrq_n_u64(acc_vec, 47); 5316*3117ece4Schristos uint64x2_t data_vec = veorq_u64(acc_vec, shifted); 5317*3117ece4Schristos 5318*3117ece4Schristos /* xacc[i] ^= xsecret[i]; */ 5319*3117ece4Schristos uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16)); 5320*3117ece4Schristos uint64x2_t data_key = veorq_u64(data_vec, key_vec); 5321*3117ece4Schristos /* xacc[i] *= XXH_PRIME32_1 */ 5322*3117ece4Schristos #ifdef __wasm_simd128__ 5323*3117ece4Schristos /* SIMD128 has multiply by u64x2, use it instead of expanding and scalarizing */ 5324*3117ece4Schristos xacc[i] = data_key * XXH_PRIME32_1; 5325*3117ece4Schristos #else 5326*3117ece4Schristos /* 5327*3117ece4Schristos * Expanded version with portable NEON intrinsics 5328*3117ece4Schristos * 5329*3117ece4Schristos * lo(x) * lo(y) + (hi(x) * lo(y) << 32) 5330*3117ece4Schristos * 5331*3117ece4Schristos * prod_hi = hi(data_key) * lo(prime) << 32 5332*3117ece4Schristos * 5333*3117ece4Schristos * Since we only need 32 bits of this multiply a trick can be used, reinterpreting the vector 5334*3117ece4Schristos * as a uint32x4_t and multiplying by { 0, prime, 0, prime } to cancel out the unwanted bits 5335*3117ece4Schristos * and avoid the shift. 5336*3117ece4Schristos */ 5337*3117ece4Schristos uint32x4_t prod_hi = vmulq_u32 (vreinterpretq_u32_u64(data_key), kPrimeHi); 5338*3117ece4Schristos /* Extract low bits for vmlal_u32 */ 5339*3117ece4Schristos uint32x2_t data_key_lo = vmovn_u64(data_key); 5340*3117ece4Schristos /* xacc[i] = prod_hi + lo(data_key) * XXH_PRIME32_1; */ 5341*3117ece4Schristos xacc[i] = vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, kPrimeLo); 5342*3117ece4Schristos #endif 5343*3117ece4Schristos } 5344*3117ece4Schristos } 5345*3117ece4Schristos } 5346*3117ece4Schristos #endif 5347*3117ece4Schristos 5348*3117ece4Schristos #if (XXH_VECTOR == XXH_VSX) 5349*3117ece4Schristos 5350*3117ece4Schristos XXH_FORCE_INLINE void 5351*3117ece4Schristos XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc, 5352*3117ece4Schristos const void* XXH_RESTRICT input, 5353*3117ece4Schristos const void* XXH_RESTRICT secret) 5354*3117ece4Schristos { 5355*3117ece4Schristos /* presumed aligned */ 5356*3117ece4Schristos xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc; 5357*3117ece4Schristos xxh_u8 const* const xinput = (xxh_u8 const*) input; /* no alignment restriction */ 5358*3117ece4Schristos xxh_u8 const* const xsecret = (xxh_u8 const*) secret; /* no alignment restriction */ 5359*3117ece4Schristos xxh_u64x2 const v32 = { 32, 32 }; 5360*3117ece4Schristos size_t i; 5361*3117ece4Schristos for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) { 5362*3117ece4Schristos /* data_vec = xinput[i]; */ 5363*3117ece4Schristos xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16*i); 5364*3117ece4Schristos /* key_vec = xsecret[i]; */ 5365*3117ece4Schristos xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i); 5366*3117ece4Schristos xxh_u64x2 const data_key = data_vec ^ key_vec; 5367*3117ece4Schristos /* shuffled = (data_key << 32) | (data_key >> 32); */ 5368*3117ece4Schristos xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32); 5369*3117ece4Schristos /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */ 5370*3117ece4Schristos xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled); 5371*3117ece4Schristos /* acc_vec = xacc[i]; */ 5372*3117ece4Schristos xxh_u64x2 acc_vec = xacc[i]; 5373*3117ece4Schristos acc_vec += product; 5374*3117ece4Schristos 5375*3117ece4Schristos /* swap high and low halves */ 5376*3117ece4Schristos #ifdef __s390x__ 5377*3117ece4Schristos acc_vec += vec_permi(data_vec, data_vec, 2); 5378*3117ece4Schristos #else 5379*3117ece4Schristos acc_vec += vec_xxpermdi(data_vec, data_vec, 2); 5380*3117ece4Schristos #endif 5381*3117ece4Schristos xacc[i] = acc_vec; 5382*3117ece4Schristos } 5383*3117ece4Schristos } 5384*3117ece4Schristos XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx) 5385*3117ece4Schristos 5386*3117ece4Schristos XXH_FORCE_INLINE void 5387*3117ece4Schristos XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) 5388*3117ece4Schristos { 5389*3117ece4Schristos XXH_ASSERT((((size_t)acc) & 15) == 0); 5390*3117ece4Schristos 5391*3117ece4Schristos { xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc; 5392*3117ece4Schristos const xxh_u8* const xsecret = (const xxh_u8*) secret; 5393*3117ece4Schristos /* constants */ 5394*3117ece4Schristos xxh_u64x2 const v32 = { 32, 32 }; 5395*3117ece4Schristos xxh_u64x2 const v47 = { 47, 47 }; 5396*3117ece4Schristos xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 }; 5397*3117ece4Schristos size_t i; 5398*3117ece4Schristos for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) { 5399*3117ece4Schristos /* xacc[i] ^= (xacc[i] >> 47); */ 5400*3117ece4Schristos xxh_u64x2 const acc_vec = xacc[i]; 5401*3117ece4Schristos xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47); 5402*3117ece4Schristos 5403*3117ece4Schristos /* xacc[i] ^= xsecret[i]; */ 5404*3117ece4Schristos xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i); 5405*3117ece4Schristos xxh_u64x2 const data_key = data_vec ^ key_vec; 5406*3117ece4Schristos 5407*3117ece4Schristos /* xacc[i] *= XXH_PRIME32_1 */ 5408*3117ece4Schristos /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF); */ 5409*3117ece4Schristos xxh_u64x2 const prod_even = XXH_vec_mule((xxh_u32x4)data_key, prime); 5410*3117ece4Schristos /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32); */ 5411*3117ece4Schristos xxh_u64x2 const prod_odd = XXH_vec_mulo((xxh_u32x4)data_key, prime); 5412*3117ece4Schristos xacc[i] = prod_odd + (prod_even << v32); 5413*3117ece4Schristos } } 5414*3117ece4Schristos } 5415*3117ece4Schristos 5416*3117ece4Schristos #endif 5417*3117ece4Schristos 5418*3117ece4Schristos #if (XXH_VECTOR == XXH_SVE) 5419*3117ece4Schristos 5420*3117ece4Schristos XXH_FORCE_INLINE void 5421*3117ece4Schristos XXH3_accumulate_512_sve( void* XXH_RESTRICT acc, 5422*3117ece4Schristos const void* XXH_RESTRICT input, 5423*3117ece4Schristos const void* XXH_RESTRICT secret) 5424*3117ece4Schristos { 5425*3117ece4Schristos uint64_t *xacc = (uint64_t *)acc; 5426*3117ece4Schristos const uint64_t *xinput = (const uint64_t *)(const void *)input; 5427*3117ece4Schristos const uint64_t *xsecret = (const uint64_t *)(const void *)secret; 5428*3117ece4Schristos svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1); 5429*3117ece4Schristos uint64_t element_count = svcntd(); 5430*3117ece4Schristos if (element_count >= 8) { 5431*3117ece4Schristos svbool_t mask = svptrue_pat_b64(SV_VL8); 5432*3117ece4Schristos svuint64_t vacc = svld1_u64(mask, xacc); 5433*3117ece4Schristos ACCRND(vacc, 0); 5434*3117ece4Schristos svst1_u64(mask, xacc, vacc); 5435*3117ece4Schristos } else if (element_count == 2) { /* sve128 */ 5436*3117ece4Schristos svbool_t mask = svptrue_pat_b64(SV_VL2); 5437*3117ece4Schristos svuint64_t acc0 = svld1_u64(mask, xacc + 0); 5438*3117ece4Schristos svuint64_t acc1 = svld1_u64(mask, xacc + 2); 5439*3117ece4Schristos svuint64_t acc2 = svld1_u64(mask, xacc + 4); 5440*3117ece4Schristos svuint64_t acc3 = svld1_u64(mask, xacc + 6); 5441*3117ece4Schristos ACCRND(acc0, 0); 5442*3117ece4Schristos ACCRND(acc1, 2); 5443*3117ece4Schristos ACCRND(acc2, 4); 5444*3117ece4Schristos ACCRND(acc3, 6); 5445*3117ece4Schristos svst1_u64(mask, xacc + 0, acc0); 5446*3117ece4Schristos svst1_u64(mask, xacc + 2, acc1); 5447*3117ece4Schristos svst1_u64(mask, xacc + 4, acc2); 5448*3117ece4Schristos svst1_u64(mask, xacc + 6, acc3); 5449*3117ece4Schristos } else { 5450*3117ece4Schristos svbool_t mask = svptrue_pat_b64(SV_VL4); 5451*3117ece4Schristos svuint64_t acc0 = svld1_u64(mask, xacc + 0); 5452*3117ece4Schristos svuint64_t acc1 = svld1_u64(mask, xacc + 4); 5453*3117ece4Schristos ACCRND(acc0, 0); 5454*3117ece4Schristos ACCRND(acc1, 4); 5455*3117ece4Schristos svst1_u64(mask, xacc + 0, acc0); 5456*3117ece4Schristos svst1_u64(mask, xacc + 4, acc1); 5457*3117ece4Schristos } 5458*3117ece4Schristos } 5459*3117ece4Schristos 5460*3117ece4Schristos XXH_FORCE_INLINE void 5461*3117ece4Schristos XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc, 5462*3117ece4Schristos const xxh_u8* XXH_RESTRICT input, 5463*3117ece4Schristos const xxh_u8* XXH_RESTRICT secret, 5464*3117ece4Schristos size_t nbStripes) 5465*3117ece4Schristos { 5466*3117ece4Schristos if (nbStripes != 0) { 5467*3117ece4Schristos uint64_t *xacc = (uint64_t *)acc; 5468*3117ece4Schristos const uint64_t *xinput = (const uint64_t *)(const void *)input; 5469*3117ece4Schristos const uint64_t *xsecret = (const uint64_t *)(const void *)secret; 5470*3117ece4Schristos svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1); 5471*3117ece4Schristos uint64_t element_count = svcntd(); 5472*3117ece4Schristos if (element_count >= 8) { 5473*3117ece4Schristos svbool_t mask = svptrue_pat_b64(SV_VL8); 5474*3117ece4Schristos svuint64_t vacc = svld1_u64(mask, xacc + 0); 5475*3117ece4Schristos do { 5476*3117ece4Schristos /* svprfd(svbool_t, void *, enum svfprop); */ 5477*3117ece4Schristos svprfd(mask, xinput + 128, SV_PLDL1STRM); 5478*3117ece4Schristos ACCRND(vacc, 0); 5479*3117ece4Schristos xinput += 8; 5480*3117ece4Schristos xsecret += 1; 5481*3117ece4Schristos nbStripes--; 5482*3117ece4Schristos } while (nbStripes != 0); 5483*3117ece4Schristos 5484*3117ece4Schristos svst1_u64(mask, xacc + 0, vacc); 5485*3117ece4Schristos } else if (element_count == 2) { /* sve128 */ 5486*3117ece4Schristos svbool_t mask = svptrue_pat_b64(SV_VL2); 5487*3117ece4Schristos svuint64_t acc0 = svld1_u64(mask, xacc + 0); 5488*3117ece4Schristos svuint64_t acc1 = svld1_u64(mask, xacc + 2); 5489*3117ece4Schristos svuint64_t acc2 = svld1_u64(mask, xacc + 4); 5490*3117ece4Schristos svuint64_t acc3 = svld1_u64(mask, xacc + 6); 5491*3117ece4Schristos do { 5492*3117ece4Schristos svprfd(mask, xinput + 128, SV_PLDL1STRM); 5493*3117ece4Schristos ACCRND(acc0, 0); 5494*3117ece4Schristos ACCRND(acc1, 2); 5495*3117ece4Schristos ACCRND(acc2, 4); 5496*3117ece4Schristos ACCRND(acc3, 6); 5497*3117ece4Schristos xinput += 8; 5498*3117ece4Schristos xsecret += 1; 5499*3117ece4Schristos nbStripes--; 5500*3117ece4Schristos } while (nbStripes != 0); 5501*3117ece4Schristos 5502*3117ece4Schristos svst1_u64(mask, xacc + 0, acc0); 5503*3117ece4Schristos svst1_u64(mask, xacc + 2, acc1); 5504*3117ece4Schristos svst1_u64(mask, xacc + 4, acc2); 5505*3117ece4Schristos svst1_u64(mask, xacc + 6, acc3); 5506*3117ece4Schristos } else { 5507*3117ece4Schristos svbool_t mask = svptrue_pat_b64(SV_VL4); 5508*3117ece4Schristos svuint64_t acc0 = svld1_u64(mask, xacc + 0); 5509*3117ece4Schristos svuint64_t acc1 = svld1_u64(mask, xacc + 4); 5510*3117ece4Schristos do { 5511*3117ece4Schristos svprfd(mask, xinput + 128, SV_PLDL1STRM); 5512*3117ece4Schristos ACCRND(acc0, 0); 5513*3117ece4Schristos ACCRND(acc1, 4); 5514*3117ece4Schristos xinput += 8; 5515*3117ece4Schristos xsecret += 1; 5516*3117ece4Schristos nbStripes--; 5517*3117ece4Schristos } while (nbStripes != 0); 5518*3117ece4Schristos 5519*3117ece4Schristos svst1_u64(mask, xacc + 0, acc0); 5520*3117ece4Schristos svst1_u64(mask, xacc + 4, acc1); 5521*3117ece4Schristos } 5522*3117ece4Schristos } 5523*3117ece4Schristos } 5524*3117ece4Schristos 5525*3117ece4Schristos #endif 5526*3117ece4Schristos 5527*3117ece4Schristos /* scalar variants - universal */ 5528*3117ece4Schristos 5529*3117ece4Schristos #if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__)) 5530*3117ece4Schristos /* 5531*3117ece4Schristos * In XXH3_scalarRound(), GCC and Clang have a similar codegen issue, where they 5532*3117ece4Schristos * emit an excess mask and a full 64-bit multiply-add (MADD X-form). 5533*3117ece4Schristos * 5534*3117ece4Schristos * While this might not seem like much, as AArch64 is a 64-bit architecture, only 5535*3117ece4Schristos * big Cortex designs have a full 64-bit multiplier. 5536*3117ece4Schristos * 5537*3117ece4Schristos * On the little cores, the smaller 32-bit multiplier is used, and full 64-bit 5538*3117ece4Schristos * multiplies expand to 2-3 multiplies in microcode. This has a major penalty 5539*3117ece4Schristos * of up to 4 latency cycles and 2 stall cycles in the multiply pipeline. 5540*3117ece4Schristos * 5541*3117ece4Schristos * Thankfully, AArch64 still provides the 32-bit long multiply-add (UMADDL) which does 5542*3117ece4Schristos * not have this penalty and does the mask automatically. 5543*3117ece4Schristos */ 5544*3117ece4Schristos XXH_FORCE_INLINE xxh_u64 5545*3117ece4Schristos XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc) 5546*3117ece4Schristos { 5547*3117ece4Schristos xxh_u64 ret; 5548*3117ece4Schristos /* note: %x = 64-bit register, %w = 32-bit register */ 5549*3117ece4Schristos __asm__("umaddl %x0, %w1, %w2, %x3" : "=r" (ret) : "r" (lhs), "r" (rhs), "r" (acc)); 5550*3117ece4Schristos return ret; 5551*3117ece4Schristos } 5552*3117ece4Schristos #else 5553*3117ece4Schristos XXH_FORCE_INLINE xxh_u64 5554*3117ece4Schristos XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc) 5555*3117ece4Schristos { 5556*3117ece4Schristos return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) + acc; 5557*3117ece4Schristos } 5558*3117ece4Schristos #endif 5559*3117ece4Schristos 5560*3117ece4Schristos /*! 5561*3117ece4Schristos * @internal 5562*3117ece4Schristos * @brief Scalar round for @ref XXH3_accumulate_512_scalar(). 5563*3117ece4Schristos * 5564*3117ece4Schristos * This is extracted to its own function because the NEON path uses a combination 5565*3117ece4Schristos * of NEON and scalar. 5566*3117ece4Schristos */ 5567*3117ece4Schristos XXH_FORCE_INLINE void 5568*3117ece4Schristos XXH3_scalarRound(void* XXH_RESTRICT acc, 5569*3117ece4Schristos void const* XXH_RESTRICT input, 5570*3117ece4Schristos void const* XXH_RESTRICT secret, 5571*3117ece4Schristos size_t lane) 5572*3117ece4Schristos { 5573*3117ece4Schristos xxh_u64* xacc = (xxh_u64*) acc; 5574*3117ece4Schristos xxh_u8 const* xinput = (xxh_u8 const*) input; 5575*3117ece4Schristos xxh_u8 const* xsecret = (xxh_u8 const*) secret; 5576*3117ece4Schristos XXH_ASSERT(lane < XXH_ACC_NB); 5577*3117ece4Schristos XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0); 5578*3117ece4Schristos { 5579*3117ece4Schristos xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8); 5580*3117ece4Schristos xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8); 5581*3117ece4Schristos xacc[lane ^ 1] += data_val; /* swap adjacent lanes */ 5582*3117ece4Schristos xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]); 5583*3117ece4Schristos } 5584*3117ece4Schristos } 5585*3117ece4Schristos 5586*3117ece4Schristos /*! 5587*3117ece4Schristos * @internal 5588*3117ece4Schristos * @brief Processes a 64 byte block of data using the scalar path. 5589*3117ece4Schristos */ 5590*3117ece4Schristos XXH_FORCE_INLINE void 5591*3117ece4Schristos XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc, 5592*3117ece4Schristos const void* XXH_RESTRICT input, 5593*3117ece4Schristos const void* XXH_RESTRICT secret) 5594*3117ece4Schristos { 5595*3117ece4Schristos size_t i; 5596*3117ece4Schristos /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */ 5597*3117ece4Schristos #if defined(__GNUC__) && !defined(__clang__) \ 5598*3117ece4Schristos && (defined(__arm__) || defined(__thumb2__)) \ 5599*3117ece4Schristos && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \ 5600*3117ece4Schristos && XXH_SIZE_OPT <= 0 5601*3117ece4Schristos # pragma GCC unroll 8 5602*3117ece4Schristos #endif 5603*3117ece4Schristos for (i=0; i < XXH_ACC_NB; i++) { 5604*3117ece4Schristos XXH3_scalarRound(acc, input, secret, i); 5605*3117ece4Schristos } 5606*3117ece4Schristos } 5607*3117ece4Schristos XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar) 5608*3117ece4Schristos 5609*3117ece4Schristos /*! 5610*3117ece4Schristos * @internal 5611*3117ece4Schristos * @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar(). 5612*3117ece4Schristos * 5613*3117ece4Schristos * This is extracted to its own function because the NEON path uses a combination 5614*3117ece4Schristos * of NEON and scalar. 5615*3117ece4Schristos */ 5616*3117ece4Schristos XXH_FORCE_INLINE void 5617*3117ece4Schristos XXH3_scalarScrambleRound(void* XXH_RESTRICT acc, 5618*3117ece4Schristos void const* XXH_RESTRICT secret, 5619*3117ece4Schristos size_t lane) 5620*3117ece4Schristos { 5621*3117ece4Schristos xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */ 5622*3117ece4Schristos const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */ 5623*3117ece4Schristos XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0); 5624*3117ece4Schristos XXH_ASSERT(lane < XXH_ACC_NB); 5625*3117ece4Schristos { 5626*3117ece4Schristos xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8); 5627*3117ece4Schristos xxh_u64 acc64 = xacc[lane]; 5628*3117ece4Schristos acc64 = XXH_xorshift64(acc64, 47); 5629*3117ece4Schristos acc64 ^= key64; 5630*3117ece4Schristos acc64 *= XXH_PRIME32_1; 5631*3117ece4Schristos xacc[lane] = acc64; 5632*3117ece4Schristos } 5633*3117ece4Schristos } 5634*3117ece4Schristos 5635*3117ece4Schristos /*! 5636*3117ece4Schristos * @internal 5637*3117ece4Schristos * @brief Scrambles the accumulators after a large chunk has been read 5638*3117ece4Schristos */ 5639*3117ece4Schristos XXH_FORCE_INLINE void 5640*3117ece4Schristos XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) 5641*3117ece4Schristos { 5642*3117ece4Schristos size_t i; 5643*3117ece4Schristos for (i=0; i < XXH_ACC_NB; i++) { 5644*3117ece4Schristos XXH3_scalarScrambleRound(acc, secret, i); 5645*3117ece4Schristos } 5646*3117ece4Schristos } 5647*3117ece4Schristos 5648*3117ece4Schristos XXH_FORCE_INLINE void 5649*3117ece4Schristos XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64) 5650*3117ece4Schristos { 5651*3117ece4Schristos /* 5652*3117ece4Schristos * We need a separate pointer for the hack below, 5653*3117ece4Schristos * which requires a non-const pointer. 5654*3117ece4Schristos * Any decent compiler will optimize this out otherwise. 5655*3117ece4Schristos */ 5656*3117ece4Schristos const xxh_u8* kSecretPtr = XXH3_kSecret; 5657*3117ece4Schristos XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0); 5658*3117ece4Schristos 5659*3117ece4Schristos #if defined(__GNUC__) && defined(__aarch64__) 5660*3117ece4Schristos /* 5661*3117ece4Schristos * UGLY HACK: 5662*3117ece4Schristos * GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are 5663*3117ece4Schristos * placed sequentially, in order, at the top of the unrolled loop. 5664*3117ece4Schristos * 5665*3117ece4Schristos * While MOVK is great for generating constants (2 cycles for a 64-bit 5666*3117ece4Schristos * constant compared to 4 cycles for LDR), it fights for bandwidth with 5667*3117ece4Schristos * the arithmetic instructions. 5668*3117ece4Schristos * 5669*3117ece4Schristos * I L S 5670*3117ece4Schristos * MOVK 5671*3117ece4Schristos * MOVK 5672*3117ece4Schristos * MOVK 5673*3117ece4Schristos * MOVK 5674*3117ece4Schristos * ADD 5675*3117ece4Schristos * SUB STR 5676*3117ece4Schristos * STR 5677*3117ece4Schristos * By forcing loads from memory (as the asm line causes the compiler to assume 5678*3117ece4Schristos * that XXH3_kSecretPtr has been changed), the pipelines are used more 5679*3117ece4Schristos * efficiently: 5680*3117ece4Schristos * I L S 5681*3117ece4Schristos * LDR 5682*3117ece4Schristos * ADD LDR 5683*3117ece4Schristos * SUB STR 5684*3117ece4Schristos * STR 5685*3117ece4Schristos * 5686*3117ece4Schristos * See XXH3_NEON_LANES for details on the pipsline. 5687*3117ece4Schristos * 5688*3117ece4Schristos * XXH3_64bits_withSeed, len == 256, Snapdragon 835 5689*3117ece4Schristos * without hack: 2654.4 MB/s 5690*3117ece4Schristos * with hack: 3202.9 MB/s 5691*3117ece4Schristos */ 5692*3117ece4Schristos XXH_COMPILER_GUARD(kSecretPtr); 5693*3117ece4Schristos #endif 5694*3117ece4Schristos { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16; 5695*3117ece4Schristos int i; 5696*3117ece4Schristos for (i=0; i < nbRounds; i++) { 5697*3117ece4Schristos /* 5698*3117ece4Schristos * The asm hack causes the compiler to assume that kSecretPtr aliases with 5699*3117ece4Schristos * customSecret, and on aarch64, this prevented LDP from merging two 5700*3117ece4Schristos * loads together for free. Putting the loads together before the stores 5701*3117ece4Schristos * properly generates LDP. 5702*3117ece4Schristos */ 5703*3117ece4Schristos xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i) + seed64; 5704*3117ece4Schristos xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64; 5705*3117ece4Schristos XXH_writeLE64((xxh_u8*)customSecret + 16*i, lo); 5706*3117ece4Schristos XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi); 5707*3117ece4Schristos } } 5708*3117ece4Schristos } 5709*3117ece4Schristos 5710*3117ece4Schristos 5711*3117ece4Schristos typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t); 5712*3117ece4Schristos typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*); 5713*3117ece4Schristos typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64); 5714*3117ece4Schristos 5715*3117ece4Schristos 5716*3117ece4Schristos #if (XXH_VECTOR == XXH_AVX512) 5717*3117ece4Schristos 5718*3117ece4Schristos #define XXH3_accumulate_512 XXH3_accumulate_512_avx512 5719*3117ece4Schristos #define XXH3_accumulate XXH3_accumulate_avx512 5720*3117ece4Schristos #define XXH3_scrambleAcc XXH3_scrambleAcc_avx512 5721*3117ece4Schristos #define XXH3_initCustomSecret XXH3_initCustomSecret_avx512 5722*3117ece4Schristos 5723*3117ece4Schristos #elif (XXH_VECTOR == XXH_AVX2) 5724*3117ece4Schristos 5725*3117ece4Schristos #define XXH3_accumulate_512 XXH3_accumulate_512_avx2 5726*3117ece4Schristos #define XXH3_accumulate XXH3_accumulate_avx2 5727*3117ece4Schristos #define XXH3_scrambleAcc XXH3_scrambleAcc_avx2 5728*3117ece4Schristos #define XXH3_initCustomSecret XXH3_initCustomSecret_avx2 5729*3117ece4Schristos 5730*3117ece4Schristos #elif (XXH_VECTOR == XXH_SSE2) 5731*3117ece4Schristos 5732*3117ece4Schristos #define XXH3_accumulate_512 XXH3_accumulate_512_sse2 5733*3117ece4Schristos #define XXH3_accumulate XXH3_accumulate_sse2 5734*3117ece4Schristos #define XXH3_scrambleAcc XXH3_scrambleAcc_sse2 5735*3117ece4Schristos #define XXH3_initCustomSecret XXH3_initCustomSecret_sse2 5736*3117ece4Schristos 5737*3117ece4Schristos #elif (XXH_VECTOR == XXH_NEON) 5738*3117ece4Schristos 5739*3117ece4Schristos #define XXH3_accumulate_512 XXH3_accumulate_512_neon 5740*3117ece4Schristos #define XXH3_accumulate XXH3_accumulate_neon 5741*3117ece4Schristos #define XXH3_scrambleAcc XXH3_scrambleAcc_neon 5742*3117ece4Schristos #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar 5743*3117ece4Schristos 5744*3117ece4Schristos #elif (XXH_VECTOR == XXH_VSX) 5745*3117ece4Schristos 5746*3117ece4Schristos #define XXH3_accumulate_512 XXH3_accumulate_512_vsx 5747*3117ece4Schristos #define XXH3_accumulate XXH3_accumulate_vsx 5748*3117ece4Schristos #define XXH3_scrambleAcc XXH3_scrambleAcc_vsx 5749*3117ece4Schristos #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar 5750*3117ece4Schristos 5751*3117ece4Schristos #elif (XXH_VECTOR == XXH_SVE) 5752*3117ece4Schristos #define XXH3_accumulate_512 XXH3_accumulate_512_sve 5753*3117ece4Schristos #define XXH3_accumulate XXH3_accumulate_sve 5754*3117ece4Schristos #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar 5755*3117ece4Schristos #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar 5756*3117ece4Schristos 5757*3117ece4Schristos #else /* scalar */ 5758*3117ece4Schristos 5759*3117ece4Schristos #define XXH3_accumulate_512 XXH3_accumulate_512_scalar 5760*3117ece4Schristos #define XXH3_accumulate XXH3_accumulate_scalar 5761*3117ece4Schristos #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar 5762*3117ece4Schristos #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar 5763*3117ece4Schristos 5764*3117ece4Schristos #endif 5765*3117ece4Schristos 5766*3117ece4Schristos #if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */ 5767*3117ece4Schristos # undef XXH3_initCustomSecret 5768*3117ece4Schristos # define XXH3_initCustomSecret XXH3_initCustomSecret_scalar 5769*3117ece4Schristos #endif 5770*3117ece4Schristos 5771*3117ece4Schristos XXH_FORCE_INLINE void 5772*3117ece4Schristos XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc, 5773*3117ece4Schristos const xxh_u8* XXH_RESTRICT input, size_t len, 5774*3117ece4Schristos const xxh_u8* XXH_RESTRICT secret, size_t secretSize, 5775*3117ece4Schristos XXH3_f_accumulate f_acc, 5776*3117ece4Schristos XXH3_f_scrambleAcc f_scramble) 5777*3117ece4Schristos { 5778*3117ece4Schristos size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE; 5779*3117ece4Schristos size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock; 5780*3117ece4Schristos size_t const nb_blocks = (len - 1) / block_len; 5781*3117ece4Schristos 5782*3117ece4Schristos size_t n; 5783*3117ece4Schristos 5784*3117ece4Schristos XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); 5785*3117ece4Schristos 5786*3117ece4Schristos for (n = 0; n < nb_blocks; n++) { 5787*3117ece4Schristos f_acc(acc, input + n*block_len, secret, nbStripesPerBlock); 5788*3117ece4Schristos f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN); 5789*3117ece4Schristos } 5790*3117ece4Schristos 5791*3117ece4Schristos /* last partial block */ 5792*3117ece4Schristos XXH_ASSERT(len > XXH_STRIPE_LEN); 5793*3117ece4Schristos { size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN; 5794*3117ece4Schristos XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE)); 5795*3117ece4Schristos f_acc(acc, input + nb_blocks*block_len, secret, nbStripes); 5796*3117ece4Schristos 5797*3117ece4Schristos /* last stripe */ 5798*3117ece4Schristos { const xxh_u8* const p = input + len - XXH_STRIPE_LEN; 5799*3117ece4Schristos #define XXH_SECRET_LASTACC_START 7 /* not aligned on 8, last secret is different from acc & scrambler */ 5800*3117ece4Schristos XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START); 5801*3117ece4Schristos } } 5802*3117ece4Schristos } 5803*3117ece4Schristos 5804*3117ece4Schristos XXH_FORCE_INLINE xxh_u64 5805*3117ece4Schristos XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret) 5806*3117ece4Schristos { 5807*3117ece4Schristos return XXH3_mul128_fold64( 5808*3117ece4Schristos acc[0] ^ XXH_readLE64(secret), 5809*3117ece4Schristos acc[1] ^ XXH_readLE64(secret+8) ); 5810*3117ece4Schristos } 5811*3117ece4Schristos 5812*3117ece4Schristos static XXH64_hash_t 5813*3117ece4Schristos XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start) 5814*3117ece4Schristos { 5815*3117ece4Schristos xxh_u64 result64 = start; 5816*3117ece4Schristos size_t i = 0; 5817*3117ece4Schristos 5818*3117ece4Schristos for (i = 0; i < 4; i++) { 5819*3117ece4Schristos result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i); 5820*3117ece4Schristos #if defined(__clang__) /* Clang */ \ 5821*3117ece4Schristos && (defined(__arm__) || defined(__thumb__)) /* ARMv7 */ \ 5822*3117ece4Schristos && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ 5823*3117ece4Schristos && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ 5824*3117ece4Schristos /* 5825*3117ece4Schristos * UGLY HACK: 5826*3117ece4Schristos * Prevent autovectorization on Clang ARMv7-a. Exact same problem as 5827*3117ece4Schristos * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b. 5828*3117ece4Schristos * XXH3_64bits, len == 256, Snapdragon 835: 5829*3117ece4Schristos * without hack: 2063.7 MB/s 5830*3117ece4Schristos * with hack: 2560.7 MB/s 5831*3117ece4Schristos */ 5832*3117ece4Schristos XXH_COMPILER_GUARD(result64); 5833*3117ece4Schristos #endif 5834*3117ece4Schristos } 5835*3117ece4Schristos 5836*3117ece4Schristos return XXH3_avalanche(result64); 5837*3117ece4Schristos } 5838*3117ece4Schristos 5839*3117ece4Schristos #define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \ 5840*3117ece4Schristos XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 } 5841*3117ece4Schristos 5842*3117ece4Schristos XXH_FORCE_INLINE XXH64_hash_t 5843*3117ece4Schristos XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len, 5844*3117ece4Schristos const void* XXH_RESTRICT secret, size_t secretSize, 5845*3117ece4Schristos XXH3_f_accumulate f_acc, 5846*3117ece4Schristos XXH3_f_scrambleAcc f_scramble) 5847*3117ece4Schristos { 5848*3117ece4Schristos XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC; 5849*3117ece4Schristos 5850*3117ece4Schristos XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble); 5851*3117ece4Schristos 5852*3117ece4Schristos /* converge into final hash */ 5853*3117ece4Schristos XXH_STATIC_ASSERT(sizeof(acc) == 64); 5854*3117ece4Schristos /* do not align on 8, so that the secret is different from the accumulator */ 5855*3117ece4Schristos #define XXH_SECRET_MERGEACCS_START 11 5856*3117ece4Schristos XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); 5857*3117ece4Schristos return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1); 5858*3117ece4Schristos } 5859*3117ece4Schristos 5860*3117ece4Schristos /* 5861*3117ece4Schristos * It's important for performance to transmit secret's size (when it's static) 5862*3117ece4Schristos * so that the compiler can properly optimize the vectorized loop. 5863*3117ece4Schristos * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set. 5864*3117ece4Schristos * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE 5865*3117ece4Schristos * breaks -Og, this is XXH_NO_INLINE. 5866*3117ece4Schristos */ 5867*3117ece4Schristos XXH3_WITH_SECRET_INLINE XXH64_hash_t 5868*3117ece4Schristos XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len, 5869*3117ece4Schristos XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) 5870*3117ece4Schristos { 5871*3117ece4Schristos (void)seed64; 5872*3117ece4Schristos return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc); 5873*3117ece4Schristos } 5874*3117ece4Schristos 5875*3117ece4Schristos /* 5876*3117ece4Schristos * It's preferable for performance that XXH3_hashLong is not inlined, 5877*3117ece4Schristos * as it results in a smaller function for small data, easier to the instruction cache. 5878*3117ece4Schristos * Note that inside this no_inline function, we do inline the internal loop, 5879*3117ece4Schristos * and provide a statically defined secret size to allow optimization of vector loop. 5880*3117ece4Schristos */ 5881*3117ece4Schristos XXH_NO_INLINE XXH_PUREF XXH64_hash_t 5882*3117ece4Schristos XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len, 5883*3117ece4Schristos XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) 5884*3117ece4Schristos { 5885*3117ece4Schristos (void)seed64; (void)secret; (void)secretLen; 5886*3117ece4Schristos return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc); 5887*3117ece4Schristos } 5888*3117ece4Schristos 5889*3117ece4Schristos /* 5890*3117ece4Schristos * XXH3_hashLong_64b_withSeed(): 5891*3117ece4Schristos * Generate a custom key based on alteration of default XXH3_kSecret with the seed, 5892*3117ece4Schristos * and then use this key for long mode hashing. 5893*3117ece4Schristos * 5894*3117ece4Schristos * This operation is decently fast but nonetheless costs a little bit of time. 5895*3117ece4Schristos * Try to avoid it whenever possible (typically when seed==0). 5896*3117ece4Schristos * 5897*3117ece4Schristos * It's important for performance that XXH3_hashLong is not inlined. Not sure 5898*3117ece4Schristos * why (uop cache maybe?), but the difference is large and easily measurable. 5899*3117ece4Schristos */ 5900*3117ece4Schristos XXH_FORCE_INLINE XXH64_hash_t 5901*3117ece4Schristos XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len, 5902*3117ece4Schristos XXH64_hash_t seed, 5903*3117ece4Schristos XXH3_f_accumulate f_acc, 5904*3117ece4Schristos XXH3_f_scrambleAcc f_scramble, 5905*3117ece4Schristos XXH3_f_initCustomSecret f_initSec) 5906*3117ece4Schristos { 5907*3117ece4Schristos #if XXH_SIZE_OPT <= 0 5908*3117ece4Schristos if (seed == 0) 5909*3117ece4Schristos return XXH3_hashLong_64b_internal(input, len, 5910*3117ece4Schristos XXH3_kSecret, sizeof(XXH3_kSecret), 5911*3117ece4Schristos f_acc, f_scramble); 5912*3117ece4Schristos #endif 5913*3117ece4Schristos { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; 5914*3117ece4Schristos f_initSec(secret, seed); 5915*3117ece4Schristos return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret), 5916*3117ece4Schristos f_acc, f_scramble); 5917*3117ece4Schristos } 5918*3117ece4Schristos } 5919*3117ece4Schristos 5920*3117ece4Schristos /* 5921*3117ece4Schristos * It's important for performance that XXH3_hashLong is not inlined. 5922*3117ece4Schristos */ 5923*3117ece4Schristos XXH_NO_INLINE XXH64_hash_t 5924*3117ece4Schristos XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len, 5925*3117ece4Schristos XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) 5926*3117ece4Schristos { 5927*3117ece4Schristos (void)secret; (void)secretLen; 5928*3117ece4Schristos return XXH3_hashLong_64b_withSeed_internal(input, len, seed, 5929*3117ece4Schristos XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret); 5930*3117ece4Schristos } 5931*3117ece4Schristos 5932*3117ece4Schristos 5933*3117ece4Schristos typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t, 5934*3117ece4Schristos XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t); 5935*3117ece4Schristos 5936*3117ece4Schristos XXH_FORCE_INLINE XXH64_hash_t 5937*3117ece4Schristos XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len, 5938*3117ece4Schristos XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen, 5939*3117ece4Schristos XXH3_hashLong64_f f_hashLong) 5940*3117ece4Schristos { 5941*3117ece4Schristos XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN); 5942*3117ece4Schristos /* 5943*3117ece4Schristos * If an action is to be taken if `secretLen` condition is not respected, 5944*3117ece4Schristos * it should be done here. 5945*3117ece4Schristos * For now, it's a contract pre-condition. 5946*3117ece4Schristos * Adding a check and a branch here would cost performance at every hash. 5947*3117ece4Schristos * Also, note that function signature doesn't offer room to return an error. 5948*3117ece4Schristos */ 5949*3117ece4Schristos if (len <= 16) 5950*3117ece4Schristos return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64); 5951*3117ece4Schristos if (len <= 128) 5952*3117ece4Schristos return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); 5953*3117ece4Schristos if (len <= XXH3_MIDSIZE_MAX) 5954*3117ece4Schristos return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); 5955*3117ece4Schristos return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen); 5956*3117ece4Schristos } 5957*3117ece4Schristos 5958*3117ece4Schristos 5959*3117ece4Schristos /* === Public entry point === */ 5960*3117ece4Schristos 5961*3117ece4Schristos /*! @ingroup XXH3_family */ 5962*3117ece4Schristos XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length) 5963*3117ece4Schristos { 5964*3117ece4Schristos return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default); 5965*3117ece4Schristos } 5966*3117ece4Schristos 5967*3117ece4Schristos /*! @ingroup XXH3_family */ 5968*3117ece4Schristos XXH_PUBLIC_API XXH64_hash_t 5969*3117ece4Schristos XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize) 5970*3117ece4Schristos { 5971*3117ece4Schristos return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret); 5972*3117ece4Schristos } 5973*3117ece4Schristos 5974*3117ece4Schristos /*! @ingroup XXH3_family */ 5975*3117ece4Schristos XXH_PUBLIC_API XXH64_hash_t 5976*3117ece4Schristos XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed) 5977*3117ece4Schristos { 5978*3117ece4Schristos return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed); 5979*3117ece4Schristos } 5980*3117ece4Schristos 5981*3117ece4Schristos XXH_PUBLIC_API XXH64_hash_t 5982*3117ece4Schristos XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed) 5983*3117ece4Schristos { 5984*3117ece4Schristos if (length <= XXH3_MIDSIZE_MAX) 5985*3117ece4Schristos return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL); 5986*3117ece4Schristos return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize); 5987*3117ece4Schristos } 5988*3117ece4Schristos 5989*3117ece4Schristos 5990*3117ece4Schristos /* === XXH3 streaming === */ 5991*3117ece4Schristos #ifndef XXH_NO_STREAM 5992*3117ece4Schristos /* 5993*3117ece4Schristos * Malloc's a pointer that is always aligned to align. 5994*3117ece4Schristos * 5995*3117ece4Schristos * This must be freed with `XXH_alignedFree()`. 5996*3117ece4Schristos * 5997*3117ece4Schristos * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte 5998*3117ece4Schristos * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2 5999*3117ece4Schristos * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON. 6000*3117ece4Schristos * 6001*3117ece4Schristos * This underalignment previously caused a rather obvious crash which went 6002*3117ece4Schristos * completely unnoticed due to XXH3_createState() not actually being tested. 6003*3117ece4Schristos * Credit to RedSpah for noticing this bug. 6004*3117ece4Schristos * 6005*3117ece4Schristos * The alignment is done manually: Functions like posix_memalign or _mm_malloc 6006*3117ece4Schristos * are avoided: To maintain portability, we would have to write a fallback 6007*3117ece4Schristos * like this anyways, and besides, testing for the existence of library 6008*3117ece4Schristos * functions without relying on external build tools is impossible. 6009*3117ece4Schristos * 6010*3117ece4Schristos * The method is simple: Overallocate, manually align, and store the offset 6011*3117ece4Schristos * to the original behind the returned pointer. 6012*3117ece4Schristos * 6013*3117ece4Schristos * Align must be a power of 2 and 8 <= align <= 128. 6014*3117ece4Schristos */ 6015*3117ece4Schristos static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align) 6016*3117ece4Schristos { 6017*3117ece4Schristos XXH_ASSERT(align <= 128 && align >= 8); /* range check */ 6018*3117ece4Schristos XXH_ASSERT((align & (align-1)) == 0); /* power of 2 */ 6019*3117ece4Schristos XXH_ASSERT(s != 0 && s < (s + align)); /* empty/overflow */ 6020*3117ece4Schristos { /* Overallocate to make room for manual realignment and an offset byte */ 6021*3117ece4Schristos xxh_u8* base = (xxh_u8*)XXH_malloc(s + align); 6022*3117ece4Schristos if (base != NULL) { 6023*3117ece4Schristos /* 6024*3117ece4Schristos * Get the offset needed to align this pointer. 6025*3117ece4Schristos * 6026*3117ece4Schristos * Even if the returned pointer is aligned, there will always be 6027*3117ece4Schristos * at least one byte to store the offset to the original pointer. 6028*3117ece4Schristos */ 6029*3117ece4Schristos size_t offset = align - ((size_t)base & (align - 1)); /* base % align */ 6030*3117ece4Schristos /* Add the offset for the now-aligned pointer */ 6031*3117ece4Schristos xxh_u8* ptr = base + offset; 6032*3117ece4Schristos 6033*3117ece4Schristos XXH_ASSERT((size_t)ptr % align == 0); 6034*3117ece4Schristos 6035*3117ece4Schristos /* Store the offset immediately before the returned pointer. */ 6036*3117ece4Schristos ptr[-1] = (xxh_u8)offset; 6037*3117ece4Schristos return ptr; 6038*3117ece4Schristos } 6039*3117ece4Schristos return NULL; 6040*3117ece4Schristos } 6041*3117ece4Schristos } 6042*3117ece4Schristos /* 6043*3117ece4Schristos * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass 6044*3117ece4Schristos * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout. 6045*3117ece4Schristos */ 6046*3117ece4Schristos static void XXH_alignedFree(void* p) 6047*3117ece4Schristos { 6048*3117ece4Schristos if (p != NULL) { 6049*3117ece4Schristos xxh_u8* ptr = (xxh_u8*)p; 6050*3117ece4Schristos /* Get the offset byte we added in XXH_malloc. */ 6051*3117ece4Schristos xxh_u8 offset = ptr[-1]; 6052*3117ece4Schristos /* Free the original malloc'd pointer */ 6053*3117ece4Schristos xxh_u8* base = ptr - offset; 6054*3117ece4Schristos XXH_free(base); 6055*3117ece4Schristos } 6056*3117ece4Schristos } 6057*3117ece4Schristos /*! @ingroup XXH3_family */ 6058*3117ece4Schristos /*! 6059*3117ece4Schristos * @brief Allocate an @ref XXH3_state_t. 6060*3117ece4Schristos * 6061*3117ece4Schristos * @return An allocated pointer of @ref XXH3_state_t on success. 6062*3117ece4Schristos * @return `NULL` on failure. 6063*3117ece4Schristos * 6064*3117ece4Schristos * @note Must be freed with XXH3_freeState(). 6065*3117ece4Schristos */ 6066*3117ece4Schristos XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void) 6067*3117ece4Schristos { 6068*3117ece4Schristos XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64); 6069*3117ece4Schristos if (state==NULL) return NULL; 6070*3117ece4Schristos XXH3_INITSTATE(state); 6071*3117ece4Schristos return state; 6072*3117ece4Schristos } 6073*3117ece4Schristos 6074*3117ece4Schristos /*! @ingroup XXH3_family */ 6075*3117ece4Schristos /*! 6076*3117ece4Schristos * @brief Frees an @ref XXH3_state_t. 6077*3117ece4Schristos * 6078*3117ece4Schristos * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState(). 6079*3117ece4Schristos * 6080*3117ece4Schristos * @return @ref XXH_OK. 6081*3117ece4Schristos * 6082*3117ece4Schristos * @note Must be allocated with XXH3_createState(). 6083*3117ece4Schristos */ 6084*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr) 6085*3117ece4Schristos { 6086*3117ece4Schristos XXH_alignedFree(statePtr); 6087*3117ece4Schristos return XXH_OK; 6088*3117ece4Schristos } 6089*3117ece4Schristos 6090*3117ece4Schristos /*! @ingroup XXH3_family */ 6091*3117ece4Schristos XXH_PUBLIC_API void 6092*3117ece4Schristos XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state) 6093*3117ece4Schristos { 6094*3117ece4Schristos XXH_memcpy(dst_state, src_state, sizeof(*dst_state)); 6095*3117ece4Schristos } 6096*3117ece4Schristos 6097*3117ece4Schristos static void 6098*3117ece4Schristos XXH3_reset_internal(XXH3_state_t* statePtr, 6099*3117ece4Schristos XXH64_hash_t seed, 6100*3117ece4Schristos const void* secret, size_t secretSize) 6101*3117ece4Schristos { 6102*3117ece4Schristos size_t const initStart = offsetof(XXH3_state_t, bufferedSize); 6103*3117ece4Schristos size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart; 6104*3117ece4Schristos XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart); 6105*3117ece4Schristos XXH_ASSERT(statePtr != NULL); 6106*3117ece4Schristos /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */ 6107*3117ece4Schristos memset((char*)statePtr + initStart, 0, initLength); 6108*3117ece4Schristos statePtr->acc[0] = XXH_PRIME32_3; 6109*3117ece4Schristos statePtr->acc[1] = XXH_PRIME64_1; 6110*3117ece4Schristos statePtr->acc[2] = XXH_PRIME64_2; 6111*3117ece4Schristos statePtr->acc[3] = XXH_PRIME64_3; 6112*3117ece4Schristos statePtr->acc[4] = XXH_PRIME64_4; 6113*3117ece4Schristos statePtr->acc[5] = XXH_PRIME32_2; 6114*3117ece4Schristos statePtr->acc[6] = XXH_PRIME64_5; 6115*3117ece4Schristos statePtr->acc[7] = XXH_PRIME32_1; 6116*3117ece4Schristos statePtr->seed = seed; 6117*3117ece4Schristos statePtr->useSeed = (seed != 0); 6118*3117ece4Schristos statePtr->extSecret = (const unsigned char*)secret; 6119*3117ece4Schristos XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); 6120*3117ece4Schristos statePtr->secretLimit = secretSize - XXH_STRIPE_LEN; 6121*3117ece4Schristos statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE; 6122*3117ece4Schristos } 6123*3117ece4Schristos 6124*3117ece4Schristos /*! @ingroup XXH3_family */ 6125*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode 6126*3117ece4Schristos XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr) 6127*3117ece4Schristos { 6128*3117ece4Schristos if (statePtr == NULL) return XXH_ERROR; 6129*3117ece4Schristos XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE); 6130*3117ece4Schristos return XXH_OK; 6131*3117ece4Schristos } 6132*3117ece4Schristos 6133*3117ece4Schristos /*! @ingroup XXH3_family */ 6134*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode 6135*3117ece4Schristos XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize) 6136*3117ece4Schristos { 6137*3117ece4Schristos if (statePtr == NULL) return XXH_ERROR; 6138*3117ece4Schristos XXH3_reset_internal(statePtr, 0, secret, secretSize); 6139*3117ece4Schristos if (secret == NULL) return XXH_ERROR; 6140*3117ece4Schristos if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; 6141*3117ece4Schristos return XXH_OK; 6142*3117ece4Schristos } 6143*3117ece4Schristos 6144*3117ece4Schristos /*! @ingroup XXH3_family */ 6145*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode 6146*3117ece4Schristos XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed) 6147*3117ece4Schristos { 6148*3117ece4Schristos if (statePtr == NULL) return XXH_ERROR; 6149*3117ece4Schristos if (seed==0) return XXH3_64bits_reset(statePtr); 6150*3117ece4Schristos if ((seed != statePtr->seed) || (statePtr->extSecret != NULL)) 6151*3117ece4Schristos XXH3_initCustomSecret(statePtr->customSecret, seed); 6152*3117ece4Schristos XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE); 6153*3117ece4Schristos return XXH_OK; 6154*3117ece4Schristos } 6155*3117ece4Schristos 6156*3117ece4Schristos /*! @ingroup XXH3_family */ 6157*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode 6158*3117ece4Schristos XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64) 6159*3117ece4Schristos { 6160*3117ece4Schristos if (statePtr == NULL) return XXH_ERROR; 6161*3117ece4Schristos if (secret == NULL) return XXH_ERROR; 6162*3117ece4Schristos if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; 6163*3117ece4Schristos XXH3_reset_internal(statePtr, seed64, secret, secretSize); 6164*3117ece4Schristos statePtr->useSeed = 1; /* always, even if seed64==0 */ 6165*3117ece4Schristos return XXH_OK; 6166*3117ece4Schristos } 6167*3117ece4Schristos 6168*3117ece4Schristos /*! 6169*3117ece4Schristos * @internal 6170*3117ece4Schristos * @brief Processes a large input for XXH3_update() and XXH3_digest_long(). 6171*3117ece4Schristos * 6172*3117ece4Schristos * Unlike XXH3_hashLong_internal_loop(), this can process data that overlaps a block. 6173*3117ece4Schristos * 6174*3117ece4Schristos * @param acc Pointer to the 8 accumulator lanes 6175*3117ece4Schristos * @param nbStripesSoFarPtr In/out pointer to the number of leftover stripes in the block* 6176*3117ece4Schristos * @param nbStripesPerBlock Number of stripes in a block 6177*3117ece4Schristos * @param input Input pointer 6178*3117ece4Schristos * @param nbStripes Number of stripes to process 6179*3117ece4Schristos * @param secret Secret pointer 6180*3117ece4Schristos * @param secretLimit Offset of the last block in @p secret 6181*3117ece4Schristos * @param f_acc Pointer to an XXH3_accumulate implementation 6182*3117ece4Schristos * @param f_scramble Pointer to an XXH3_scrambleAcc implementation 6183*3117ece4Schristos * @return Pointer past the end of @p input after processing 6184*3117ece4Schristos */ 6185*3117ece4Schristos XXH_FORCE_INLINE const xxh_u8 * 6186*3117ece4Schristos XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc, 6187*3117ece4Schristos size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock, 6188*3117ece4Schristos const xxh_u8* XXH_RESTRICT input, size_t nbStripes, 6189*3117ece4Schristos const xxh_u8* XXH_RESTRICT secret, size_t secretLimit, 6190*3117ece4Schristos XXH3_f_accumulate f_acc, 6191*3117ece4Schristos XXH3_f_scrambleAcc f_scramble) 6192*3117ece4Schristos { 6193*3117ece4Schristos const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE; 6194*3117ece4Schristos /* Process full blocks */ 6195*3117ece4Schristos if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) { 6196*3117ece4Schristos /* Process the initial partial block... */ 6197*3117ece4Schristos size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr; 6198*3117ece4Schristos 6199*3117ece4Schristos do { 6200*3117ece4Schristos /* Accumulate and scramble */ 6201*3117ece4Schristos f_acc(acc, input, initialSecret, nbStripesThisIter); 6202*3117ece4Schristos f_scramble(acc, secret + secretLimit); 6203*3117ece4Schristos input += nbStripesThisIter * XXH_STRIPE_LEN; 6204*3117ece4Schristos nbStripes -= nbStripesThisIter; 6205*3117ece4Schristos /* Then continue the loop with the full block size */ 6206*3117ece4Schristos nbStripesThisIter = nbStripesPerBlock; 6207*3117ece4Schristos initialSecret = secret; 6208*3117ece4Schristos } while (nbStripes >= nbStripesPerBlock); 6209*3117ece4Schristos *nbStripesSoFarPtr = 0; 6210*3117ece4Schristos } 6211*3117ece4Schristos /* Process a partial block */ 6212*3117ece4Schristos if (nbStripes > 0) { 6213*3117ece4Schristos f_acc(acc, input, initialSecret, nbStripes); 6214*3117ece4Schristos input += nbStripes * XXH_STRIPE_LEN; 6215*3117ece4Schristos *nbStripesSoFarPtr += nbStripes; 6216*3117ece4Schristos } 6217*3117ece4Schristos /* Return end pointer */ 6218*3117ece4Schristos return input; 6219*3117ece4Schristos } 6220*3117ece4Schristos 6221*3117ece4Schristos #ifndef XXH3_STREAM_USE_STACK 6222*3117ece4Schristos # if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */ 6223*3117ece4Schristos # define XXH3_STREAM_USE_STACK 1 6224*3117ece4Schristos # endif 6225*3117ece4Schristos #endif 6226*3117ece4Schristos /* 6227*3117ece4Schristos * Both XXH3_64bits_update and XXH3_128bits_update use this routine. 6228*3117ece4Schristos */ 6229*3117ece4Schristos XXH_FORCE_INLINE XXH_errorcode 6230*3117ece4Schristos XXH3_update(XXH3_state_t* XXH_RESTRICT const state, 6231*3117ece4Schristos const xxh_u8* XXH_RESTRICT input, size_t len, 6232*3117ece4Schristos XXH3_f_accumulate f_acc, 6233*3117ece4Schristos XXH3_f_scrambleAcc f_scramble) 6234*3117ece4Schristos { 6235*3117ece4Schristos if (input==NULL) { 6236*3117ece4Schristos XXH_ASSERT(len == 0); 6237*3117ece4Schristos return XXH_OK; 6238*3117ece4Schristos } 6239*3117ece4Schristos 6240*3117ece4Schristos XXH_ASSERT(state != NULL); 6241*3117ece4Schristos { const xxh_u8* const bEnd = input + len; 6242*3117ece4Schristos const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; 6243*3117ece4Schristos #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1 6244*3117ece4Schristos /* For some reason, gcc and MSVC seem to suffer greatly 6245*3117ece4Schristos * when operating accumulators directly into state. 6246*3117ece4Schristos * Operating into stack space seems to enable proper optimization. 6247*3117ece4Schristos * clang, on the other hand, doesn't seem to need this trick */ 6248*3117ece4Schristos XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; 6249*3117ece4Schristos XXH_memcpy(acc, state->acc, sizeof(acc)); 6250*3117ece4Schristos #else 6251*3117ece4Schristos xxh_u64* XXH_RESTRICT const acc = state->acc; 6252*3117ece4Schristos #endif 6253*3117ece4Schristos state->totalLen += len; 6254*3117ece4Schristos XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE); 6255*3117ece4Schristos 6256*3117ece4Schristos /* small input : just fill in tmp buffer */ 6257*3117ece4Schristos if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) { 6258*3117ece4Schristos XXH_memcpy(state->buffer + state->bufferedSize, input, len); 6259*3117ece4Schristos state->bufferedSize += (XXH32_hash_t)len; 6260*3117ece4Schristos return XXH_OK; 6261*3117ece4Schristos } 6262*3117ece4Schristos 6263*3117ece4Schristos /* total input is now > XXH3_INTERNALBUFFER_SIZE */ 6264*3117ece4Schristos #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN) 6265*3117ece4Schristos XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */ 6266*3117ece4Schristos 6267*3117ece4Schristos /* 6268*3117ece4Schristos * Internal buffer is partially filled (always, except at beginning) 6269*3117ece4Schristos * Complete it, then consume it. 6270*3117ece4Schristos */ 6271*3117ece4Schristos if (state->bufferedSize) { 6272*3117ece4Schristos size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize; 6273*3117ece4Schristos XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize); 6274*3117ece4Schristos input += loadSize; 6275*3117ece4Schristos XXH3_consumeStripes(acc, 6276*3117ece4Schristos &state->nbStripesSoFar, state->nbStripesPerBlock, 6277*3117ece4Schristos state->buffer, XXH3_INTERNALBUFFER_STRIPES, 6278*3117ece4Schristos secret, state->secretLimit, 6279*3117ece4Schristos f_acc, f_scramble); 6280*3117ece4Schristos state->bufferedSize = 0; 6281*3117ece4Schristos } 6282*3117ece4Schristos XXH_ASSERT(input < bEnd); 6283*3117ece4Schristos if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) { 6284*3117ece4Schristos size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN; 6285*3117ece4Schristos input = XXH3_consumeStripes(acc, 6286*3117ece4Schristos &state->nbStripesSoFar, state->nbStripesPerBlock, 6287*3117ece4Schristos input, nbStripes, 6288*3117ece4Schristos secret, state->secretLimit, 6289*3117ece4Schristos f_acc, f_scramble); 6290*3117ece4Schristos XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN); 6291*3117ece4Schristos 6292*3117ece4Schristos } 6293*3117ece4Schristos /* Some remaining input (always) : buffer it */ 6294*3117ece4Schristos XXH_ASSERT(input < bEnd); 6295*3117ece4Schristos XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE); 6296*3117ece4Schristos XXH_ASSERT(state->bufferedSize == 0); 6297*3117ece4Schristos XXH_memcpy(state->buffer, input, (size_t)(bEnd-input)); 6298*3117ece4Schristos state->bufferedSize = (XXH32_hash_t)(bEnd-input); 6299*3117ece4Schristos #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1 6300*3117ece4Schristos /* save stack accumulators into state */ 6301*3117ece4Schristos XXH_memcpy(state->acc, acc, sizeof(acc)); 6302*3117ece4Schristos #endif 6303*3117ece4Schristos } 6304*3117ece4Schristos 6305*3117ece4Schristos return XXH_OK; 6306*3117ece4Schristos } 6307*3117ece4Schristos 6308*3117ece4Schristos /*! @ingroup XXH3_family */ 6309*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode 6310*3117ece4Schristos XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len) 6311*3117ece4Schristos { 6312*3117ece4Schristos return XXH3_update(state, (const xxh_u8*)input, len, 6313*3117ece4Schristos XXH3_accumulate, XXH3_scrambleAcc); 6314*3117ece4Schristos } 6315*3117ece4Schristos 6316*3117ece4Schristos 6317*3117ece4Schristos XXH_FORCE_INLINE void 6318*3117ece4Schristos XXH3_digest_long (XXH64_hash_t* acc, 6319*3117ece4Schristos const XXH3_state_t* state, 6320*3117ece4Schristos const unsigned char* secret) 6321*3117ece4Schristos { 6322*3117ece4Schristos xxh_u8 lastStripe[XXH_STRIPE_LEN]; 6323*3117ece4Schristos const xxh_u8* lastStripePtr; 6324*3117ece4Schristos 6325*3117ece4Schristos /* 6326*3117ece4Schristos * Digest on a local copy. This way, the state remains unaltered, and it can 6327*3117ece4Schristos * continue ingesting more input afterwards. 6328*3117ece4Schristos */ 6329*3117ece4Schristos XXH_memcpy(acc, state->acc, sizeof(state->acc)); 6330*3117ece4Schristos if (state->bufferedSize >= XXH_STRIPE_LEN) { 6331*3117ece4Schristos /* Consume remaining stripes then point to remaining data in buffer */ 6332*3117ece4Schristos size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN; 6333*3117ece4Schristos size_t nbStripesSoFar = state->nbStripesSoFar; 6334*3117ece4Schristos XXH3_consumeStripes(acc, 6335*3117ece4Schristos &nbStripesSoFar, state->nbStripesPerBlock, 6336*3117ece4Schristos state->buffer, nbStripes, 6337*3117ece4Schristos secret, state->secretLimit, 6338*3117ece4Schristos XXH3_accumulate, XXH3_scrambleAcc); 6339*3117ece4Schristos lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN; 6340*3117ece4Schristos } else { /* bufferedSize < XXH_STRIPE_LEN */ 6341*3117ece4Schristos /* Copy to temp buffer */ 6342*3117ece4Schristos size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize; 6343*3117ece4Schristos XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */ 6344*3117ece4Schristos XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize); 6345*3117ece4Schristos XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize); 6346*3117ece4Schristos lastStripePtr = lastStripe; 6347*3117ece4Schristos } 6348*3117ece4Schristos /* Last stripe */ 6349*3117ece4Schristos XXH3_accumulate_512(acc, 6350*3117ece4Schristos lastStripePtr, 6351*3117ece4Schristos secret + state->secretLimit - XXH_SECRET_LASTACC_START); 6352*3117ece4Schristos } 6353*3117ece4Schristos 6354*3117ece4Schristos /*! @ingroup XXH3_family */ 6355*3117ece4Schristos XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* state) 6356*3117ece4Schristos { 6357*3117ece4Schristos const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; 6358*3117ece4Schristos if (state->totalLen > XXH3_MIDSIZE_MAX) { 6359*3117ece4Schristos XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB]; 6360*3117ece4Schristos XXH3_digest_long(acc, state, secret); 6361*3117ece4Schristos return XXH3_mergeAccs(acc, 6362*3117ece4Schristos secret + XXH_SECRET_MERGEACCS_START, 6363*3117ece4Schristos (xxh_u64)state->totalLen * XXH_PRIME64_1); 6364*3117ece4Schristos } 6365*3117ece4Schristos /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */ 6366*3117ece4Schristos if (state->useSeed) 6367*3117ece4Schristos return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed); 6368*3117ece4Schristos return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen), 6369*3117ece4Schristos secret, state->secretLimit + XXH_STRIPE_LEN); 6370*3117ece4Schristos } 6371*3117ece4Schristos #endif /* !XXH_NO_STREAM */ 6372*3117ece4Schristos 6373*3117ece4Schristos 6374*3117ece4Schristos /* ========================================== 6375*3117ece4Schristos * XXH3 128 bits (a.k.a XXH128) 6376*3117ece4Schristos * ========================================== 6377*3117ece4Schristos * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant, 6378*3117ece4Schristos * even without counting the significantly larger output size. 6379*3117ece4Schristos * 6380*3117ece4Schristos * For example, extra steps are taken to avoid the seed-dependent collisions 6381*3117ece4Schristos * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B). 6382*3117ece4Schristos * 6383*3117ece4Schristos * This strength naturally comes at the cost of some speed, especially on short 6384*3117ece4Schristos * lengths. Note that longer hashes are about as fast as the 64-bit version 6385*3117ece4Schristos * due to it using only a slight modification of the 64-bit loop. 6386*3117ece4Schristos * 6387*3117ece4Schristos * XXH128 is also more oriented towards 64-bit machines. It is still extremely 6388*3117ece4Schristos * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64). 6389*3117ece4Schristos */ 6390*3117ece4Schristos 6391*3117ece4Schristos XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t 6392*3117ece4Schristos XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) 6393*3117ece4Schristos { 6394*3117ece4Schristos /* A doubled version of 1to3_64b with different constants. */ 6395*3117ece4Schristos XXH_ASSERT(input != NULL); 6396*3117ece4Schristos XXH_ASSERT(1 <= len && len <= 3); 6397*3117ece4Schristos XXH_ASSERT(secret != NULL); 6398*3117ece4Schristos /* 6399*3117ece4Schristos * len = 1: combinedl = { input[0], 0x01, input[0], input[0] } 6400*3117ece4Schristos * len = 2: combinedl = { input[1], 0x02, input[0], input[1] } 6401*3117ece4Schristos * len = 3: combinedl = { input[2], 0x03, input[0], input[1] } 6402*3117ece4Schristos */ 6403*3117ece4Schristos { xxh_u8 const c1 = input[0]; 6404*3117ece4Schristos xxh_u8 const c2 = input[len >> 1]; 6405*3117ece4Schristos xxh_u8 const c3 = input[len - 1]; 6406*3117ece4Schristos xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24) 6407*3117ece4Schristos | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8); 6408*3117ece4Schristos xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13); 6409*3117ece4Schristos xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed; 6410*3117ece4Schristos xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed; 6411*3117ece4Schristos xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl; 6412*3117ece4Schristos xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph; 6413*3117ece4Schristos XXH128_hash_t h128; 6414*3117ece4Schristos h128.low64 = XXH64_avalanche(keyed_lo); 6415*3117ece4Schristos h128.high64 = XXH64_avalanche(keyed_hi); 6416*3117ece4Schristos return h128; 6417*3117ece4Schristos } 6418*3117ece4Schristos } 6419*3117ece4Schristos 6420*3117ece4Schristos XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t 6421*3117ece4Schristos XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) 6422*3117ece4Schristos { 6423*3117ece4Schristos XXH_ASSERT(input != NULL); 6424*3117ece4Schristos XXH_ASSERT(secret != NULL); 6425*3117ece4Schristos XXH_ASSERT(4 <= len && len <= 8); 6426*3117ece4Schristos seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32; 6427*3117ece4Schristos { xxh_u32 const input_lo = XXH_readLE32(input); 6428*3117ece4Schristos xxh_u32 const input_hi = XXH_readLE32(input + len - 4); 6429*3117ece4Schristos xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32); 6430*3117ece4Schristos xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed; 6431*3117ece4Schristos xxh_u64 const keyed = input_64 ^ bitflip; 6432*3117ece4Schristos 6433*3117ece4Schristos /* Shift len to the left to ensure it is even, this avoids even multiplies. */ 6434*3117ece4Schristos XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2)); 6435*3117ece4Schristos 6436*3117ece4Schristos m128.high64 += (m128.low64 << 1); 6437*3117ece4Schristos m128.low64 ^= (m128.high64 >> 3); 6438*3117ece4Schristos 6439*3117ece4Schristos m128.low64 = XXH_xorshift64(m128.low64, 35); 6440*3117ece4Schristos m128.low64 *= PRIME_MX2; 6441*3117ece4Schristos m128.low64 = XXH_xorshift64(m128.low64, 28); 6442*3117ece4Schristos m128.high64 = XXH3_avalanche(m128.high64); 6443*3117ece4Schristos return m128; 6444*3117ece4Schristos } 6445*3117ece4Schristos } 6446*3117ece4Schristos 6447*3117ece4Schristos XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t 6448*3117ece4Schristos XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) 6449*3117ece4Schristos { 6450*3117ece4Schristos XXH_ASSERT(input != NULL); 6451*3117ece4Schristos XXH_ASSERT(secret != NULL); 6452*3117ece4Schristos XXH_ASSERT(9 <= len && len <= 16); 6453*3117ece4Schristos { xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed; 6454*3117ece4Schristos xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed; 6455*3117ece4Schristos xxh_u64 const input_lo = XXH_readLE64(input); 6456*3117ece4Schristos xxh_u64 input_hi = XXH_readLE64(input + len - 8); 6457*3117ece4Schristos XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1); 6458*3117ece4Schristos /* 6459*3117ece4Schristos * Put len in the middle of m128 to ensure that the length gets mixed to 6460*3117ece4Schristos * both the low and high bits in the 128x64 multiply below. 6461*3117ece4Schristos */ 6462*3117ece4Schristos m128.low64 += (xxh_u64)(len - 1) << 54; 6463*3117ece4Schristos input_hi ^= bitfliph; 6464*3117ece4Schristos /* 6465*3117ece4Schristos * Add the high 32 bits of input_hi to the high 32 bits of m128, then 6466*3117ece4Schristos * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to 6467*3117ece4Schristos * the high 64 bits of m128. 6468*3117ece4Schristos * 6469*3117ece4Schristos * The best approach to this operation is different on 32-bit and 64-bit. 6470*3117ece4Schristos */ 6471*3117ece4Schristos if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */ 6472*3117ece4Schristos /* 6473*3117ece4Schristos * 32-bit optimized version, which is more readable. 6474*3117ece4Schristos * 6475*3117ece4Schristos * On 32-bit, it removes an ADC and delays a dependency between the two 6476*3117ece4Schristos * halves of m128.high64, but it generates an extra mask on 64-bit. 6477*3117ece4Schristos */ 6478*3117ece4Schristos m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2); 6479*3117ece4Schristos } else { 6480*3117ece4Schristos /* 6481*3117ece4Schristos * 64-bit optimized (albeit more confusing) version. 6482*3117ece4Schristos * 6483*3117ece4Schristos * Uses some properties of addition and multiplication to remove the mask: 6484*3117ece4Schristos * 6485*3117ece4Schristos * Let: 6486*3117ece4Schristos * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF) 6487*3117ece4Schristos * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000) 6488*3117ece4Schristos * c = XXH_PRIME32_2 6489*3117ece4Schristos * 6490*3117ece4Schristos * a + (b * c) 6491*3117ece4Schristos * Inverse Property: x + y - x == y 6492*3117ece4Schristos * a + (b * (1 + c - 1)) 6493*3117ece4Schristos * Distributive Property: x * (y + z) == (x * y) + (x * z) 6494*3117ece4Schristos * a + (b * 1) + (b * (c - 1)) 6495*3117ece4Schristos * Identity Property: x * 1 == x 6496*3117ece4Schristos * a + b + (b * (c - 1)) 6497*3117ece4Schristos * 6498*3117ece4Schristos * Substitute a, b, and c: 6499*3117ece4Schristos * input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1)) 6500*3117ece4Schristos * 6501*3117ece4Schristos * Since input_hi.hi + input_hi.lo == input_hi, we get this: 6502*3117ece4Schristos * input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1)) 6503*3117ece4Schristos */ 6504*3117ece4Schristos m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1); 6505*3117ece4Schristos } 6506*3117ece4Schristos /* m128 ^= XXH_swap64(m128 >> 64); */ 6507*3117ece4Schristos m128.low64 ^= XXH_swap64(m128.high64); 6508*3117ece4Schristos 6509*3117ece4Schristos { /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */ 6510*3117ece4Schristos XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2); 6511*3117ece4Schristos h128.high64 += m128.high64 * XXH_PRIME64_2; 6512*3117ece4Schristos 6513*3117ece4Schristos h128.low64 = XXH3_avalanche(h128.low64); 6514*3117ece4Schristos h128.high64 = XXH3_avalanche(h128.high64); 6515*3117ece4Schristos return h128; 6516*3117ece4Schristos } } 6517*3117ece4Schristos } 6518*3117ece4Schristos 6519*3117ece4Schristos /* 6520*3117ece4Schristos * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN 6521*3117ece4Schristos */ 6522*3117ece4Schristos XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t 6523*3117ece4Schristos XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) 6524*3117ece4Schristos { 6525*3117ece4Schristos XXH_ASSERT(len <= 16); 6526*3117ece4Schristos { if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed); 6527*3117ece4Schristos if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed); 6528*3117ece4Schristos if (len) return XXH3_len_1to3_128b(input, len, secret, seed); 6529*3117ece4Schristos { XXH128_hash_t h128; 6530*3117ece4Schristos xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72); 6531*3117ece4Schristos xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88); 6532*3117ece4Schristos h128.low64 = XXH64_avalanche(seed ^ bitflipl); 6533*3117ece4Schristos h128.high64 = XXH64_avalanche( seed ^ bitfliph); 6534*3117ece4Schristos return h128; 6535*3117ece4Schristos } } 6536*3117ece4Schristos } 6537*3117ece4Schristos 6538*3117ece4Schristos /* 6539*3117ece4Schristos * A bit slower than XXH3_mix16B, but handles multiply by zero better. 6540*3117ece4Schristos */ 6541*3117ece4Schristos XXH_FORCE_INLINE XXH128_hash_t 6542*3117ece4Schristos XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2, 6543*3117ece4Schristos const xxh_u8* secret, XXH64_hash_t seed) 6544*3117ece4Schristos { 6545*3117ece4Schristos acc.low64 += XXH3_mix16B (input_1, secret+0, seed); 6546*3117ece4Schristos acc.low64 ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8); 6547*3117ece4Schristos acc.high64 += XXH3_mix16B (input_2, secret+16, seed); 6548*3117ece4Schristos acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8); 6549*3117ece4Schristos return acc; 6550*3117ece4Schristos } 6551*3117ece4Schristos 6552*3117ece4Schristos 6553*3117ece4Schristos XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t 6554*3117ece4Schristos XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len, 6555*3117ece4Schristos const xxh_u8* XXH_RESTRICT secret, size_t secretSize, 6556*3117ece4Schristos XXH64_hash_t seed) 6557*3117ece4Schristos { 6558*3117ece4Schristos XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; 6559*3117ece4Schristos XXH_ASSERT(16 < len && len <= 128); 6560*3117ece4Schristos 6561*3117ece4Schristos { XXH128_hash_t acc; 6562*3117ece4Schristos acc.low64 = len * XXH_PRIME64_1; 6563*3117ece4Schristos acc.high64 = 0; 6564*3117ece4Schristos 6565*3117ece4Schristos #if XXH_SIZE_OPT >= 1 6566*3117ece4Schristos { 6567*3117ece4Schristos /* Smaller, but slightly slower. */ 6568*3117ece4Schristos unsigned int i = (unsigned int)(len - 1) / 32; 6569*3117ece4Schristos do { 6570*3117ece4Schristos acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed); 6571*3117ece4Schristos } while (i-- != 0); 6572*3117ece4Schristos } 6573*3117ece4Schristos #else 6574*3117ece4Schristos if (len > 32) { 6575*3117ece4Schristos if (len > 64) { 6576*3117ece4Schristos if (len > 96) { 6577*3117ece4Schristos acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed); 6578*3117ece4Schristos } 6579*3117ece4Schristos acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed); 6580*3117ece4Schristos } 6581*3117ece4Schristos acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed); 6582*3117ece4Schristos } 6583*3117ece4Schristos acc = XXH128_mix32B(acc, input, input+len-16, secret, seed); 6584*3117ece4Schristos #endif 6585*3117ece4Schristos { XXH128_hash_t h128; 6586*3117ece4Schristos h128.low64 = acc.low64 + acc.high64; 6587*3117ece4Schristos h128.high64 = (acc.low64 * XXH_PRIME64_1) 6588*3117ece4Schristos + (acc.high64 * XXH_PRIME64_4) 6589*3117ece4Schristos + ((len - seed) * XXH_PRIME64_2); 6590*3117ece4Schristos h128.low64 = XXH3_avalanche(h128.low64); 6591*3117ece4Schristos h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64); 6592*3117ece4Schristos return h128; 6593*3117ece4Schristos } 6594*3117ece4Schristos } 6595*3117ece4Schristos } 6596*3117ece4Schristos 6597*3117ece4Schristos XXH_NO_INLINE XXH_PUREF XXH128_hash_t 6598*3117ece4Schristos XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len, 6599*3117ece4Schristos const xxh_u8* XXH_RESTRICT secret, size_t secretSize, 6600*3117ece4Schristos XXH64_hash_t seed) 6601*3117ece4Schristos { 6602*3117ece4Schristos XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; 6603*3117ece4Schristos XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); 6604*3117ece4Schristos 6605*3117ece4Schristos { XXH128_hash_t acc; 6606*3117ece4Schristos unsigned i; 6607*3117ece4Schristos acc.low64 = len * XXH_PRIME64_1; 6608*3117ece4Schristos acc.high64 = 0; 6609*3117ece4Schristos /* 6610*3117ece4Schristos * We set as `i` as offset + 32. We do this so that unchanged 6611*3117ece4Schristos * `len` can be used as upper bound. This reaches a sweet spot 6612*3117ece4Schristos * where both x86 and aarch64 get simple agen and good codegen 6613*3117ece4Schristos * for the loop. 6614*3117ece4Schristos */ 6615*3117ece4Schristos for (i = 32; i < 160; i += 32) { 6616*3117ece4Schristos acc = XXH128_mix32B(acc, 6617*3117ece4Schristos input + i - 32, 6618*3117ece4Schristos input + i - 16, 6619*3117ece4Schristos secret + i - 32, 6620*3117ece4Schristos seed); 6621*3117ece4Schristos } 6622*3117ece4Schristos acc.low64 = XXH3_avalanche(acc.low64); 6623*3117ece4Schristos acc.high64 = XXH3_avalanche(acc.high64); 6624*3117ece4Schristos /* 6625*3117ece4Schristos * NB: `i <= len` will duplicate the last 32-bytes if 6626*3117ece4Schristos * len % 32 was zero. This is an unfortunate necessity to keep 6627*3117ece4Schristos * the hash result stable. 6628*3117ece4Schristos */ 6629*3117ece4Schristos for (i=160; i <= len; i += 32) { 6630*3117ece4Schristos acc = XXH128_mix32B(acc, 6631*3117ece4Schristos input + i - 32, 6632*3117ece4Schristos input + i - 16, 6633*3117ece4Schristos secret + XXH3_MIDSIZE_STARTOFFSET + i - 160, 6634*3117ece4Schristos seed); 6635*3117ece4Schristos } 6636*3117ece4Schristos /* last bytes */ 6637*3117ece4Schristos acc = XXH128_mix32B(acc, 6638*3117ece4Schristos input + len - 16, 6639*3117ece4Schristos input + len - 32, 6640*3117ece4Schristos secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16, 6641*3117ece4Schristos (XXH64_hash_t)0 - seed); 6642*3117ece4Schristos 6643*3117ece4Schristos { XXH128_hash_t h128; 6644*3117ece4Schristos h128.low64 = acc.low64 + acc.high64; 6645*3117ece4Schristos h128.high64 = (acc.low64 * XXH_PRIME64_1) 6646*3117ece4Schristos + (acc.high64 * XXH_PRIME64_4) 6647*3117ece4Schristos + ((len - seed) * XXH_PRIME64_2); 6648*3117ece4Schristos h128.low64 = XXH3_avalanche(h128.low64); 6649*3117ece4Schristos h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64); 6650*3117ece4Schristos return h128; 6651*3117ece4Schristos } 6652*3117ece4Schristos } 6653*3117ece4Schristos } 6654*3117ece4Schristos 6655*3117ece4Schristos XXH_FORCE_INLINE XXH128_hash_t 6656*3117ece4Schristos XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len, 6657*3117ece4Schristos const xxh_u8* XXH_RESTRICT secret, size_t secretSize, 6658*3117ece4Schristos XXH3_f_accumulate f_acc, 6659*3117ece4Schristos XXH3_f_scrambleAcc f_scramble) 6660*3117ece4Schristos { 6661*3117ece4Schristos XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC; 6662*3117ece4Schristos 6663*3117ece4Schristos XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble); 6664*3117ece4Schristos 6665*3117ece4Schristos /* converge into final hash */ 6666*3117ece4Schristos XXH_STATIC_ASSERT(sizeof(acc) == 64); 6667*3117ece4Schristos XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); 6668*3117ece4Schristos { XXH128_hash_t h128; 6669*3117ece4Schristos h128.low64 = XXH3_mergeAccs(acc, 6670*3117ece4Schristos secret + XXH_SECRET_MERGEACCS_START, 6671*3117ece4Schristos (xxh_u64)len * XXH_PRIME64_1); 6672*3117ece4Schristos h128.high64 = XXH3_mergeAccs(acc, 6673*3117ece4Schristos secret + secretSize 6674*3117ece4Schristos - sizeof(acc) - XXH_SECRET_MERGEACCS_START, 6675*3117ece4Schristos ~((xxh_u64)len * XXH_PRIME64_2)); 6676*3117ece4Schristos return h128; 6677*3117ece4Schristos } 6678*3117ece4Schristos } 6679*3117ece4Schristos 6680*3117ece4Schristos /* 6681*3117ece4Schristos * It's important for performance that XXH3_hashLong() is not inlined. 6682*3117ece4Schristos */ 6683*3117ece4Schristos XXH_NO_INLINE XXH_PUREF XXH128_hash_t 6684*3117ece4Schristos XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len, 6685*3117ece4Schristos XXH64_hash_t seed64, 6686*3117ece4Schristos const void* XXH_RESTRICT secret, size_t secretLen) 6687*3117ece4Schristos { 6688*3117ece4Schristos (void)seed64; (void)secret; (void)secretLen; 6689*3117ece4Schristos return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), 6690*3117ece4Schristos XXH3_accumulate, XXH3_scrambleAcc); 6691*3117ece4Schristos } 6692*3117ece4Schristos 6693*3117ece4Schristos /* 6694*3117ece4Schristos * It's important for performance to pass @p secretLen (when it's static) 6695*3117ece4Schristos * to the compiler, so that it can properly optimize the vectorized loop. 6696*3117ece4Schristos * 6697*3117ece4Schristos * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE 6698*3117ece4Schristos * breaks -Og, this is XXH_NO_INLINE. 6699*3117ece4Schristos */ 6700*3117ece4Schristos XXH3_WITH_SECRET_INLINE XXH128_hash_t 6701*3117ece4Schristos XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len, 6702*3117ece4Schristos XXH64_hash_t seed64, 6703*3117ece4Schristos const void* XXH_RESTRICT secret, size_t secretLen) 6704*3117ece4Schristos { 6705*3117ece4Schristos (void)seed64; 6706*3117ece4Schristos return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen, 6707*3117ece4Schristos XXH3_accumulate, XXH3_scrambleAcc); 6708*3117ece4Schristos } 6709*3117ece4Schristos 6710*3117ece4Schristos XXH_FORCE_INLINE XXH128_hash_t 6711*3117ece4Schristos XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len, 6712*3117ece4Schristos XXH64_hash_t seed64, 6713*3117ece4Schristos XXH3_f_accumulate f_acc, 6714*3117ece4Schristos XXH3_f_scrambleAcc f_scramble, 6715*3117ece4Schristos XXH3_f_initCustomSecret f_initSec) 6716*3117ece4Schristos { 6717*3117ece4Schristos if (seed64 == 0) 6718*3117ece4Schristos return XXH3_hashLong_128b_internal(input, len, 6719*3117ece4Schristos XXH3_kSecret, sizeof(XXH3_kSecret), 6720*3117ece4Schristos f_acc, f_scramble); 6721*3117ece4Schristos { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; 6722*3117ece4Schristos f_initSec(secret, seed64); 6723*3117ece4Schristos return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret), 6724*3117ece4Schristos f_acc, f_scramble); 6725*3117ece4Schristos } 6726*3117ece4Schristos } 6727*3117ece4Schristos 6728*3117ece4Schristos /* 6729*3117ece4Schristos * It's important for performance that XXH3_hashLong is not inlined. 6730*3117ece4Schristos */ 6731*3117ece4Schristos XXH_NO_INLINE XXH128_hash_t 6732*3117ece4Schristos XXH3_hashLong_128b_withSeed(const void* input, size_t len, 6733*3117ece4Schristos XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen) 6734*3117ece4Schristos { 6735*3117ece4Schristos (void)secret; (void)secretLen; 6736*3117ece4Schristos return XXH3_hashLong_128b_withSeed_internal(input, len, seed64, 6737*3117ece4Schristos XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret); 6738*3117ece4Schristos } 6739*3117ece4Schristos 6740*3117ece4Schristos typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t, 6741*3117ece4Schristos XXH64_hash_t, const void* XXH_RESTRICT, size_t); 6742*3117ece4Schristos 6743*3117ece4Schristos XXH_FORCE_INLINE XXH128_hash_t 6744*3117ece4Schristos XXH3_128bits_internal(const void* input, size_t len, 6745*3117ece4Schristos XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen, 6746*3117ece4Schristos XXH3_hashLong128_f f_hl128) 6747*3117ece4Schristos { 6748*3117ece4Schristos XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN); 6749*3117ece4Schristos /* 6750*3117ece4Schristos * If an action is to be taken if `secret` conditions are not respected, 6751*3117ece4Schristos * it should be done here. 6752*3117ece4Schristos * For now, it's a contract pre-condition. 6753*3117ece4Schristos * Adding a check and a branch here would cost performance at every hash. 6754*3117ece4Schristos */ 6755*3117ece4Schristos if (len <= 16) 6756*3117ece4Schristos return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64); 6757*3117ece4Schristos if (len <= 128) 6758*3117ece4Schristos return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); 6759*3117ece4Schristos if (len <= XXH3_MIDSIZE_MAX) 6760*3117ece4Schristos return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); 6761*3117ece4Schristos return f_hl128(input, len, seed64, secret, secretLen); 6762*3117ece4Schristos } 6763*3117ece4Schristos 6764*3117ece4Schristos 6765*3117ece4Schristos /* === Public XXH128 API === */ 6766*3117ece4Schristos 6767*3117ece4Schristos /*! @ingroup XXH3_family */ 6768*3117ece4Schristos XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* input, size_t len) 6769*3117ece4Schristos { 6770*3117ece4Schristos return XXH3_128bits_internal(input, len, 0, 6771*3117ece4Schristos XXH3_kSecret, sizeof(XXH3_kSecret), 6772*3117ece4Schristos XXH3_hashLong_128b_default); 6773*3117ece4Schristos } 6774*3117ece4Schristos 6775*3117ece4Schristos /*! @ingroup XXH3_family */ 6776*3117ece4Schristos XXH_PUBLIC_API XXH128_hash_t 6777*3117ece4Schristos XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize) 6778*3117ece4Schristos { 6779*3117ece4Schristos return XXH3_128bits_internal(input, len, 0, 6780*3117ece4Schristos (const xxh_u8*)secret, secretSize, 6781*3117ece4Schristos XXH3_hashLong_128b_withSecret); 6782*3117ece4Schristos } 6783*3117ece4Schristos 6784*3117ece4Schristos /*! @ingroup XXH3_family */ 6785*3117ece4Schristos XXH_PUBLIC_API XXH128_hash_t 6786*3117ece4Schristos XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed) 6787*3117ece4Schristos { 6788*3117ece4Schristos return XXH3_128bits_internal(input, len, seed, 6789*3117ece4Schristos XXH3_kSecret, sizeof(XXH3_kSecret), 6790*3117ece4Schristos XXH3_hashLong_128b_withSeed); 6791*3117ece4Schristos } 6792*3117ece4Schristos 6793*3117ece4Schristos /*! @ingroup XXH3_family */ 6794*3117ece4Schristos XXH_PUBLIC_API XXH128_hash_t 6795*3117ece4Schristos XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed) 6796*3117ece4Schristos { 6797*3117ece4Schristos if (len <= XXH3_MIDSIZE_MAX) 6798*3117ece4Schristos return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL); 6799*3117ece4Schristos return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize); 6800*3117ece4Schristos } 6801*3117ece4Schristos 6802*3117ece4Schristos /*! @ingroup XXH3_family */ 6803*3117ece4Schristos XXH_PUBLIC_API XXH128_hash_t 6804*3117ece4Schristos XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed) 6805*3117ece4Schristos { 6806*3117ece4Schristos return XXH3_128bits_withSeed(input, len, seed); 6807*3117ece4Schristos } 6808*3117ece4Schristos 6809*3117ece4Schristos 6810*3117ece4Schristos /* === XXH3 128-bit streaming === */ 6811*3117ece4Schristos #ifndef XXH_NO_STREAM 6812*3117ece4Schristos /* 6813*3117ece4Schristos * All initialization and update functions are identical to 64-bit streaming variant. 6814*3117ece4Schristos * The only difference is the finalization routine. 6815*3117ece4Schristos */ 6816*3117ece4Schristos 6817*3117ece4Schristos /*! @ingroup XXH3_family */ 6818*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode 6819*3117ece4Schristos XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr) 6820*3117ece4Schristos { 6821*3117ece4Schristos return XXH3_64bits_reset(statePtr); 6822*3117ece4Schristos } 6823*3117ece4Schristos 6824*3117ece4Schristos /*! @ingroup XXH3_family */ 6825*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode 6826*3117ece4Schristos XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize) 6827*3117ece4Schristos { 6828*3117ece4Schristos return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize); 6829*3117ece4Schristos } 6830*3117ece4Schristos 6831*3117ece4Schristos /*! @ingroup XXH3_family */ 6832*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode 6833*3117ece4Schristos XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed) 6834*3117ece4Schristos { 6835*3117ece4Schristos return XXH3_64bits_reset_withSeed(statePtr, seed); 6836*3117ece4Schristos } 6837*3117ece4Schristos 6838*3117ece4Schristos /*! @ingroup XXH3_family */ 6839*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode 6840*3117ece4Schristos XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed) 6841*3117ece4Schristos { 6842*3117ece4Schristos return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed); 6843*3117ece4Schristos } 6844*3117ece4Schristos 6845*3117ece4Schristos /*! @ingroup XXH3_family */ 6846*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode 6847*3117ece4Schristos XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len) 6848*3117ece4Schristos { 6849*3117ece4Schristos return XXH3_64bits_update(state, input, len); 6850*3117ece4Schristos } 6851*3117ece4Schristos 6852*3117ece4Schristos /*! @ingroup XXH3_family */ 6853*3117ece4Schristos XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* state) 6854*3117ece4Schristos { 6855*3117ece4Schristos const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; 6856*3117ece4Schristos if (state->totalLen > XXH3_MIDSIZE_MAX) { 6857*3117ece4Schristos XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB]; 6858*3117ece4Schristos XXH3_digest_long(acc, state, secret); 6859*3117ece4Schristos XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); 6860*3117ece4Schristos { XXH128_hash_t h128; 6861*3117ece4Schristos h128.low64 = XXH3_mergeAccs(acc, 6862*3117ece4Schristos secret + XXH_SECRET_MERGEACCS_START, 6863*3117ece4Schristos (xxh_u64)state->totalLen * XXH_PRIME64_1); 6864*3117ece4Schristos h128.high64 = XXH3_mergeAccs(acc, 6865*3117ece4Schristos secret + state->secretLimit + XXH_STRIPE_LEN 6866*3117ece4Schristos - sizeof(acc) - XXH_SECRET_MERGEACCS_START, 6867*3117ece4Schristos ~((xxh_u64)state->totalLen * XXH_PRIME64_2)); 6868*3117ece4Schristos return h128; 6869*3117ece4Schristos } 6870*3117ece4Schristos } 6871*3117ece4Schristos /* len <= XXH3_MIDSIZE_MAX : short code */ 6872*3117ece4Schristos if (state->seed) 6873*3117ece4Schristos return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed); 6874*3117ece4Schristos return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen), 6875*3117ece4Schristos secret, state->secretLimit + XXH_STRIPE_LEN); 6876*3117ece4Schristos } 6877*3117ece4Schristos #endif /* !XXH_NO_STREAM */ 6878*3117ece4Schristos /* 128-bit utility functions */ 6879*3117ece4Schristos 6880*3117ece4Schristos #include <string.h> /* memcmp, memcpy */ 6881*3117ece4Schristos 6882*3117ece4Schristos /* return : 1 is equal, 0 if different */ 6883*3117ece4Schristos /*! @ingroup XXH3_family */ 6884*3117ece4Schristos XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2) 6885*3117ece4Schristos { 6886*3117ece4Schristos /* note : XXH128_hash_t is compact, it has no padding byte */ 6887*3117ece4Schristos return !(memcmp(&h1, &h2, sizeof(h1))); 6888*3117ece4Schristos } 6889*3117ece4Schristos 6890*3117ece4Schristos /* This prototype is compatible with stdlib's qsort(). 6891*3117ece4Schristos * @return : >0 if *h128_1 > *h128_2 6892*3117ece4Schristos * <0 if *h128_1 < *h128_2 6893*3117ece4Schristos * =0 if *h128_1 == *h128_2 */ 6894*3117ece4Schristos /*! @ingroup XXH3_family */ 6895*3117ece4Schristos XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2) 6896*3117ece4Schristos { 6897*3117ece4Schristos XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1; 6898*3117ece4Schristos XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2; 6899*3117ece4Schristos int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64); 6900*3117ece4Schristos /* note : bets that, in most cases, hash values are different */ 6901*3117ece4Schristos if (hcmp) return hcmp; 6902*3117ece4Schristos return (h1.low64 > h2.low64) - (h2.low64 > h1.low64); 6903*3117ece4Schristos } 6904*3117ece4Schristos 6905*3117ece4Schristos 6906*3117ece4Schristos /*====== Canonical representation ======*/ 6907*3117ece4Schristos /*! @ingroup XXH3_family */ 6908*3117ece4Schristos XXH_PUBLIC_API void 6909*3117ece4Schristos XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash) 6910*3117ece4Schristos { 6911*3117ece4Schristos XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t)); 6912*3117ece4Schristos if (XXH_CPU_LITTLE_ENDIAN) { 6913*3117ece4Schristos hash.high64 = XXH_swap64(hash.high64); 6914*3117ece4Schristos hash.low64 = XXH_swap64(hash.low64); 6915*3117ece4Schristos } 6916*3117ece4Schristos XXH_memcpy(dst, &hash.high64, sizeof(hash.high64)); 6917*3117ece4Schristos XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64)); 6918*3117ece4Schristos } 6919*3117ece4Schristos 6920*3117ece4Schristos /*! @ingroup XXH3_family */ 6921*3117ece4Schristos XXH_PUBLIC_API XXH128_hash_t 6922*3117ece4Schristos XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src) 6923*3117ece4Schristos { 6924*3117ece4Schristos XXH128_hash_t h; 6925*3117ece4Schristos h.high64 = XXH_readBE64(src); 6926*3117ece4Schristos h.low64 = XXH_readBE64(src->digest + 8); 6927*3117ece4Schristos return h; 6928*3117ece4Schristos } 6929*3117ece4Schristos 6930*3117ece4Schristos 6931*3117ece4Schristos 6932*3117ece4Schristos /* ========================================== 6933*3117ece4Schristos * Secret generators 6934*3117ece4Schristos * ========================================== 6935*3117ece4Schristos */ 6936*3117ece4Schristos #define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x)) 6937*3117ece4Schristos 6938*3117ece4Schristos XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128) 6939*3117ece4Schristos { 6940*3117ece4Schristos XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 ); 6941*3117ece4Schristos XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 ); 6942*3117ece4Schristos } 6943*3117ece4Schristos 6944*3117ece4Schristos /*! @ingroup XXH3_family */ 6945*3117ece4Schristos XXH_PUBLIC_API XXH_errorcode 6946*3117ece4Schristos XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize) 6947*3117ece4Schristos { 6948*3117ece4Schristos #if (XXH_DEBUGLEVEL >= 1) 6949*3117ece4Schristos XXH_ASSERT(secretBuffer != NULL); 6950*3117ece4Schristos XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); 6951*3117ece4Schristos #else 6952*3117ece4Schristos /* production mode, assert() are disabled */ 6953*3117ece4Schristos if (secretBuffer == NULL) return XXH_ERROR; 6954*3117ece4Schristos if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; 6955*3117ece4Schristos #endif 6956*3117ece4Schristos 6957*3117ece4Schristos if (customSeedSize == 0) { 6958*3117ece4Schristos customSeed = XXH3_kSecret; 6959*3117ece4Schristos customSeedSize = XXH_SECRET_DEFAULT_SIZE; 6960*3117ece4Schristos } 6961*3117ece4Schristos #if (XXH_DEBUGLEVEL >= 1) 6962*3117ece4Schristos XXH_ASSERT(customSeed != NULL); 6963*3117ece4Schristos #else 6964*3117ece4Schristos if (customSeed == NULL) return XXH_ERROR; 6965*3117ece4Schristos #endif 6966*3117ece4Schristos 6967*3117ece4Schristos /* Fill secretBuffer with a copy of customSeed - repeat as needed */ 6968*3117ece4Schristos { size_t pos = 0; 6969*3117ece4Schristos while (pos < secretSize) { 6970*3117ece4Schristos size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize); 6971*3117ece4Schristos memcpy((char*)secretBuffer + pos, customSeed, toCopy); 6972*3117ece4Schristos pos += toCopy; 6973*3117ece4Schristos } } 6974*3117ece4Schristos 6975*3117ece4Schristos { size_t const nbSeg16 = secretSize / 16; 6976*3117ece4Schristos size_t n; 6977*3117ece4Schristos XXH128_canonical_t scrambler; 6978*3117ece4Schristos XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0)); 6979*3117ece4Schristos for (n=0; n<nbSeg16; n++) { 6980*3117ece4Schristos XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n); 6981*3117ece4Schristos XXH3_combine16((char*)secretBuffer + n*16, h128); 6982*3117ece4Schristos } 6983*3117ece4Schristos /* last segment */ 6984*3117ece4Schristos XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler)); 6985*3117ece4Schristos } 6986*3117ece4Schristos return XXH_OK; 6987*3117ece4Schristos } 6988*3117ece4Schristos 6989*3117ece4Schristos /*! @ingroup XXH3_family */ 6990*3117ece4Schristos XXH_PUBLIC_API void 6991*3117ece4Schristos XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed) 6992*3117ece4Schristos { 6993*3117ece4Schristos XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; 6994*3117ece4Schristos XXH3_initCustomSecret(secret, seed); 6995*3117ece4Schristos XXH_ASSERT(secretBuffer != NULL); 6996*3117ece4Schristos memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE); 6997*3117ece4Schristos } 6998*3117ece4Schristos 6999*3117ece4Schristos 7000*3117ece4Schristos 7001*3117ece4Schristos /* Pop our optimization override from above */ 7002*3117ece4Schristos #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \ 7003*3117ece4Schristos && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ 7004*3117ece4Schristos && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */ 7005*3117ece4Schristos # pragma GCC pop_options 7006*3117ece4Schristos #endif 7007*3117ece4Schristos 7008*3117ece4Schristos #endif /* XXH_NO_LONG_LONG */ 7009*3117ece4Schristos 7010*3117ece4Schristos #endif /* XXH_NO_XXH3 */ 7011*3117ece4Schristos 7012*3117ece4Schristos /*! 7013*3117ece4Schristos * @} 7014*3117ece4Schristos */ 7015*3117ece4Schristos #endif /* XXH_IMPLEMENTATION */ 7016*3117ece4Schristos 7017*3117ece4Schristos 7018*3117ece4Schristos #if defined (__cplusplus) 7019*3117ece4Schristos } /* extern "C" */ 7020*3117ece4Schristos #endif 7021