1*e4b17023SJohn Marino // Definition of _Hash_bytes. -*- C++ -*- 2*e4b17023SJohn Marino 3*e4b17023SJohn Marino // Copyright (C) 2010, 2011 Free Software Foundation, Inc. 4*e4b17023SJohn Marino // 5*e4b17023SJohn Marino // This file is part of the GNU ISO C++ Library. This library is free 6*e4b17023SJohn Marino // software; you can redistribute it and/or modify it under the 7*e4b17023SJohn Marino // terms of the GNU General Public License as published by the 8*e4b17023SJohn Marino // Free Software Foundation; either version 3, or (at your option) 9*e4b17023SJohn Marino // any later version. 10*e4b17023SJohn Marino 11*e4b17023SJohn Marino // This library is distributed in the hope that it will be useful, 12*e4b17023SJohn Marino // but WITHOUT ANY WARRANTY; without even the implied warranty of 13*e4b17023SJohn Marino // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14*e4b17023SJohn Marino // GNU General Public License for more details. 15*e4b17023SJohn Marino 16*e4b17023SJohn Marino // Under Section 7 of GPL version 3, you are granted additional 17*e4b17023SJohn Marino // permissions described in the GCC Runtime Library Exception, version 18*e4b17023SJohn Marino // 3.1, as published by the Free Software Foundation. 19*e4b17023SJohn Marino 20*e4b17023SJohn Marino // You should have received a copy of the GNU General Public License and 21*e4b17023SJohn Marino // a copy of the GCC Runtime Library Exception along with this program; 22*e4b17023SJohn Marino // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23*e4b17023SJohn Marino // <http://www.gnu.org/licenses/>. 24*e4b17023SJohn Marino 25*e4b17023SJohn Marino // This file defines Hash_bytes, a primitive used for defining hash 26*e4b17023SJohn Marino // functions. Based on public domain MurmurHashUnaligned2, by Austin 27*e4b17023SJohn Marino // Appleby. http://murmurhash.googlepages.com/ 28*e4b17023SJohn Marino 29*e4b17023SJohn Marino // This file also defines _Fnv_hash_bytes, another primitive with 30*e4b17023SJohn Marino // exactly the same interface but using a different hash algorithm, 31*e4b17023SJohn Marino // Fowler / Noll / Vo (FNV) Hash (type FNV-1a). The Murmur hash 32*e4b17023SJohn Marino // function apears to be better in both speed and hash quality, and 33*e4b17023SJohn Marino // FNV is provided primarily for backward compatibility. 34*e4b17023SJohn Marino 35*e4b17023SJohn Marino #include <bits/hash_bytes.h> 36*e4b17023SJohn Marino 37*e4b17023SJohn Marino namespace 38*e4b17023SJohn Marino { 39*e4b17023SJohn Marino inline std::size_t unaligned_load(const char * p)40*e4b17023SJohn Marino unaligned_load(const char* p) 41*e4b17023SJohn Marino { 42*e4b17023SJohn Marino std::size_t result; 43*e4b17023SJohn Marino __builtin_memcpy(&result, p, sizeof(result)); 44*e4b17023SJohn Marino return result; 45*e4b17023SJohn Marino } 46*e4b17023SJohn Marino 47*e4b17023SJohn Marino #if __SIZEOF_SIZE_T__ == 8 48*e4b17023SJohn Marino // Loads n bytes, where 1 <= n < 8. 49*e4b17023SJohn Marino inline std::size_t load_bytes(const char * p,int n)50*e4b17023SJohn Marino load_bytes(const char* p, int n) 51*e4b17023SJohn Marino { 52*e4b17023SJohn Marino std::size_t result = 0; 53*e4b17023SJohn Marino --n; 54*e4b17023SJohn Marino do 55*e4b17023SJohn Marino result = (result << 8) + static_cast<unsigned char>(p[n]); 56*e4b17023SJohn Marino while (--n >= 0); 57*e4b17023SJohn Marino return result; 58*e4b17023SJohn Marino } 59*e4b17023SJohn Marino 60*e4b17023SJohn Marino inline std::size_t shift_mix(std::size_t v)61*e4b17023SJohn Marino shift_mix(std::size_t v) 62*e4b17023SJohn Marino { return v ^ (v >> 47);} 63*e4b17023SJohn Marino #endif 64*e4b17023SJohn Marino } 65*e4b17023SJohn Marino 66*e4b17023SJohn Marino namespace std 67*e4b17023SJohn Marino { 68*e4b17023SJohn Marino _GLIBCXX_BEGIN_NAMESPACE_VERSION 69*e4b17023SJohn Marino 70*e4b17023SJohn Marino #if __SIZEOF_SIZE_T__ == 4 71*e4b17023SJohn Marino 72*e4b17023SJohn Marino // Implementation of Murmur hash for 32-bit size_t. 73*e4b17023SJohn Marino size_t _Hash_bytes(const void * ptr,size_t len,size_t seed)74*e4b17023SJohn Marino _Hash_bytes(const void* ptr, size_t len, size_t seed) 75*e4b17023SJohn Marino { 76*e4b17023SJohn Marino const size_t m = 0x5bd1e995; 77*e4b17023SJohn Marino size_t hash = seed ^ len; 78*e4b17023SJohn Marino const char* buf = static_cast<const char*>(ptr); 79*e4b17023SJohn Marino 80*e4b17023SJohn Marino // Mix 4 bytes at a time into the hash. 81*e4b17023SJohn Marino while(len >= 4) 82*e4b17023SJohn Marino { 83*e4b17023SJohn Marino size_t k = unaligned_load(buf); 84*e4b17023SJohn Marino k *= m; 85*e4b17023SJohn Marino k ^= k >> 24; 86*e4b17023SJohn Marino k *= m; 87*e4b17023SJohn Marino hash *= m; 88*e4b17023SJohn Marino hash ^= k; 89*e4b17023SJohn Marino buf += 4; 90*e4b17023SJohn Marino len -= 4; 91*e4b17023SJohn Marino } 92*e4b17023SJohn Marino 93*e4b17023SJohn Marino // Handle the last few bytes of the input array. 94*e4b17023SJohn Marino switch(len) 95*e4b17023SJohn Marino { 96*e4b17023SJohn Marino case 3: 97*e4b17023SJohn Marino hash ^= static_cast<unsigned char>(buf[2]) << 16; 98*e4b17023SJohn Marino case 2: 99*e4b17023SJohn Marino hash ^= static_cast<unsigned char>(buf[1]) << 8; 100*e4b17023SJohn Marino case 1: 101*e4b17023SJohn Marino hash ^= static_cast<unsigned char>(buf[0]); 102*e4b17023SJohn Marino hash *= m; 103*e4b17023SJohn Marino }; 104*e4b17023SJohn Marino 105*e4b17023SJohn Marino // Do a few final mixes of the hash. 106*e4b17023SJohn Marino hash ^= hash >> 13; 107*e4b17023SJohn Marino hash *= m; 108*e4b17023SJohn Marino hash ^= hash >> 15; 109*e4b17023SJohn Marino return hash; 110*e4b17023SJohn Marino } 111*e4b17023SJohn Marino 112*e4b17023SJohn Marino // Implementation of FNV hash for 32-bit size_t. 113*e4b17023SJohn Marino size_t _Fnv_hash_bytes(const void * ptr,size_t len,size_t hash)114*e4b17023SJohn Marino _Fnv_hash_bytes(const void* ptr, size_t len, size_t hash) 115*e4b17023SJohn Marino { 116*e4b17023SJohn Marino const char* cptr = static_cast<const char*>(ptr); 117*e4b17023SJohn Marino for (; len; --len) 118*e4b17023SJohn Marino { 119*e4b17023SJohn Marino hash ^= static_cast<size_t>(*cptr++); 120*e4b17023SJohn Marino hash *= static_cast<size_t>(16777619UL); 121*e4b17023SJohn Marino } 122*e4b17023SJohn Marino return hash; 123*e4b17023SJohn Marino } 124*e4b17023SJohn Marino 125*e4b17023SJohn Marino #elif __SIZEOF_SIZE_T__ == 8 126*e4b17023SJohn Marino 127*e4b17023SJohn Marino // Implementation of Murmur hash for 64-bit size_t. 128*e4b17023SJohn Marino size_t 129*e4b17023SJohn Marino _Hash_bytes(const void* ptr, size_t len, size_t seed) 130*e4b17023SJohn Marino { 131*e4b17023SJohn Marino static const size_t mul = (0xc6a4a793UL << 32UL) + 0x5bd1e995UL; 132*e4b17023SJohn Marino const char* const buf = static_cast<const char*>(ptr); 133*e4b17023SJohn Marino 134*e4b17023SJohn Marino // Remove the bytes not divisible by the sizeof(size_t). This 135*e4b17023SJohn Marino // allows the main loop to process the data as 64-bit integers. 136*e4b17023SJohn Marino const int len_aligned = len & ~0x7; 137*e4b17023SJohn Marino const char* const end = buf + len_aligned; 138*e4b17023SJohn Marino size_t hash = seed ^ (len * mul); 139*e4b17023SJohn Marino for (const char* p = buf; p != end; p += 8) 140*e4b17023SJohn Marino { 141*e4b17023SJohn Marino const size_t data = shift_mix(unaligned_load(p) * mul) * mul; 142*e4b17023SJohn Marino hash ^= data; 143*e4b17023SJohn Marino hash *= mul; 144*e4b17023SJohn Marino } 145*e4b17023SJohn Marino if ((len & 0x7) != 0) 146*e4b17023SJohn Marino { 147*e4b17023SJohn Marino const size_t data = load_bytes(end, len & 0x7); 148*e4b17023SJohn Marino hash ^= data; 149*e4b17023SJohn Marino hash *= mul; 150*e4b17023SJohn Marino } 151*e4b17023SJohn Marino hash = shift_mix(hash) * mul; 152*e4b17023SJohn Marino hash = shift_mix(hash); 153*e4b17023SJohn Marino return hash; 154*e4b17023SJohn Marino } 155*e4b17023SJohn Marino 156*e4b17023SJohn Marino // Implementation of FNV hash for 64-bit size_t. 157*e4b17023SJohn Marino size_t 158*e4b17023SJohn Marino _Fnv_hash_bytes(const void* ptr, size_t len, size_t hash) 159*e4b17023SJohn Marino { 160*e4b17023SJohn Marino const char* cptr = static_cast<const char*>(ptr); 161*e4b17023SJohn Marino for (; len; --len) 162*e4b17023SJohn Marino { 163*e4b17023SJohn Marino hash ^= static_cast<size_t>(*cptr++); 164*e4b17023SJohn Marino hash *= static_cast<size_t>(1099511628211ULL); 165*e4b17023SJohn Marino } 166*e4b17023SJohn Marino return hash; 167*e4b17023SJohn Marino } 168*e4b17023SJohn Marino 169*e4b17023SJohn Marino #else 170*e4b17023SJohn Marino 171*e4b17023SJohn Marino // Dummy hash implementation for unusual sizeof(size_t). 172*e4b17023SJohn Marino size_t 173*e4b17023SJohn Marino _Hash_bytes(const void* ptr, size_t len, size_t seed) 174*e4b17023SJohn Marino { 175*e4b17023SJohn Marino size_t hash = seed; 176*e4b17023SJohn Marino const char* cptr = reinterpret_cast<const char*>(ptr); 177*e4b17023SJohn Marino for (; len; --len) 178*e4b17023SJohn Marino hash = (hash * 131) + *cptr++; 179*e4b17023SJohn Marino return hash; 180*e4b17023SJohn Marino } 181*e4b17023SJohn Marino 182*e4b17023SJohn Marino size_t 183*e4b17023SJohn Marino _Fnv_hash_bytes(const void* ptr, size_t len, size_t seed) 184*e4b17023SJohn Marino { return _Hash_bytes(ptr, len, seed); } 185*e4b17023SJohn Marino 186*e4b17023SJohn Marino #endif /* __SIZEOF_SIZE_T__ */ 187*e4b17023SJohn Marino 188*e4b17023SJohn Marino _GLIBCXX_END_NAMESPACE_VERSION 189*e4b17023SJohn Marino } // namespace 190