1 //===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements an interface allowing to conveniently build hashes of 10 // various data types, without relying on the underlying hasher type to know 11 // about hashed data types. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_SUPPORT_HASHBUILDER_H 16 #define LLVM_SUPPORT_HASHBUILDER_H 17 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/Support/Endian.h" 23 #include "llvm/Support/type_traits.h" 24 25 #include <iterator> 26 #include <utility> 27 28 namespace llvm { 29 30 namespace hashbuilder_detail { 31 /// Trait to indicate whether a type's bits can be hashed directly (after 32 /// endianness correction). 33 template <typename U> 34 struct IsHashableData 35 : std::integral_constant<bool, is_integral_or_enum<U>::value> {}; 36 37 } // namespace hashbuilder_detail 38 39 /// Declares the hasher member, and functions forwarding directly to the hasher. 40 template <typename HasherT> class HashBuilderBase { 41 public: 42 HasherT &getHasher() { return Hasher; } 43 44 /// Forward to `HasherT::update(ArrayRef<uint8_t>)`. 45 /// 46 /// This may not take the size of `Data` into account. 47 /// Users of this function should pay attention to respect endianness 48 /// contraints. 49 void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); } 50 51 /// Forward to `HasherT::update(ArrayRef<uint8_t>)`. 52 /// 53 /// This may not take the size of `Data` into account. 54 /// Users of this function should pay attention to respect endianness 55 /// contraints. 56 void update(StringRef Data) { 57 update(makeArrayRef(reinterpret_cast<const uint8_t *>(Data.data()), 58 Data.size())); 59 } 60 61 /// Forward to `HasherT::final()` if available. 62 template <typename HasherT_ = HasherT> StringRef final() { 63 return this->getHasher().final(); 64 } 65 66 /// Forward to `HasherT::result()` if available. 67 template <typename HasherT_ = HasherT> StringRef result() { 68 return this->getHasher().result(); 69 } 70 71 protected: 72 explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {} 73 74 template <typename... ArgTypes> 75 explicit HashBuilderBase(ArgTypes &&...Args) 76 : OptionalHasher(in_place, std::forward<ArgTypes>(Args)...), 77 Hasher(*OptionalHasher) {} 78 79 private: 80 Optional<HasherT> OptionalHasher; 81 HasherT &Hasher; 82 }; 83 84 /// Implementation of the `HashBuilder` interface. 85 /// 86 /// `support::endianness::native` is not supported. `HashBuilder` is 87 /// expected to canonicalize `support::endianness::native` to one of 88 /// `support::endianness::big` or `support::endianness::little`. 89 template <typename HasherT, support::endianness Endianness> 90 class HashBuilderImpl : public HashBuilderBase<HasherT> { 91 static_assert(Endianness != support::endianness::native, 92 "HashBuilder should canonicalize endianness"); 93 94 public: 95 explicit HashBuilderImpl(HasherT &Hasher) 96 : HashBuilderBase<HasherT>(Hasher) {} 97 template <typename... ArgTypes> 98 explicit HashBuilderImpl(ArgTypes &&...Args) 99 : HashBuilderBase<HasherT>(Args...) {} 100 101 /// Implement hashing for hashable data types, e.g. integral or enum values. 102 template <typename T> 103 std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value, 104 HashBuilderImpl &> 105 add(T Value) { 106 return adjustForEndiannessAndAdd(Value); 107 } 108 109 /// Support hashing `ArrayRef`. 110 /// 111 /// `Value.size()` is taken into account to ensure cases like 112 /// ``` 113 /// builder.add({1}); 114 /// builder.add({2, 3}); 115 /// ``` 116 /// and 117 /// ``` 118 /// builder.add({1, 2}); 119 /// builder.add({3}); 120 /// ``` 121 /// do not collide. 122 template <typename T> HashBuilderImpl &add(ArrayRef<T> Value) { 123 // As of implementation time, simply calling `addRange(Value)` would also go 124 // through the `update` fast path. But that would rely on the implementation 125 // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call 126 // `update` to guarantee the fast path. 127 add(Value.size()); 128 if (hashbuilder_detail::IsHashableData<T>::value && 129 Endianness == support::endian::system_endianness()) { 130 this->update( 131 makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()), 132 Value.size() * sizeof(T))); 133 } else { 134 for (auto &V : Value) 135 add(V); 136 } 137 return *this; 138 } 139 140 /// Support hashing `StringRef`. 141 /// 142 /// `Value.size()` is taken into account to ensure cases like 143 /// ``` 144 /// builder.add("a"); 145 /// builder.add("bc"); 146 /// ``` 147 /// and 148 /// ``` 149 /// builder.add("ab"); 150 /// builder.add("c"); 151 /// ``` 152 /// do not collide. 153 HashBuilderImpl &add(StringRef Value) { 154 // As of implementation time, simply calling `addRange(Value)` would also go 155 // through `update`. But that would rely on the implementation of 156 // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to 157 // guarantee the fast path. 158 add(Value.size()); 159 this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()), 160 Value.size())); 161 return *this; 162 } 163 164 template <typename T> 165 using HasAddHashT = 166 decltype(addHash(std::declval<HashBuilderImpl &>(), std::declval<T &>())); 167 /// Implement hashing for user-defined `struct`s. 168 /// 169 /// Any user-define `struct` can participate in hashing via `HashBuilder` by 170 /// providing a `addHash` templated function. 171 /// 172 /// ``` 173 /// template <typename HasherT, support::endianness Endianness> 174 /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder, 175 /// const UserDefinedStruct &Value); 176 /// ``` 177 /// 178 /// For example: 179 /// ``` 180 /// struct SimpleStruct { 181 /// char c; 182 /// int i; 183 /// }; 184 /// 185 /// template <typename HasherT, support::endianness Endianness> 186 /// void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, 187 /// const SimpleStruct &Value) { 188 /// HBuilder.add(Value.c); 189 /// HBuilder.add(Value.i); 190 /// } 191 /// ``` 192 /// 193 /// To avoid endianness issues, specializations of `addHash` should 194 /// generally rely on exising `add`, `addRange`, and `addRangeElements` 195 /// functions. If directly using `update`, an implementation must correctly 196 /// handle endianness. 197 /// 198 /// ``` 199 /// struct __attribute__ ((packed)) StructWithFastHash { 200 /// int I; 201 /// char C; 202 /// 203 /// // If possible, we want to hash both `I` and `C` in a single 204 /// // `update` call for performance concerns. 205 /// template <typename HasherT, support::endianness Endianness> 206 /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, 207 /// const StructWithFastHash &Value) { 208 /// if (Endianness == support::endian::system_endianness()) { 209 /// HBuilder.update(makeArrayRef( 210 /// reinterpret_cast<const uint8_t *>(&Value), sizeof(Value))); 211 /// } else { 212 /// // Rely on existing `add` methods to handle endianness. 213 /// HBuilder.add(Value.I); 214 /// HBuilder.add(Value.C); 215 /// } 216 /// } 217 /// }; 218 /// ``` 219 /// 220 /// To avoid collisions, specialization of `addHash` for variable-size 221 /// types must take the size into account. 222 /// 223 /// For example: 224 /// ``` 225 /// struct CustomContainer { 226 /// private: 227 /// size_t Size; 228 /// int Elements[100]; 229 /// 230 /// public: 231 /// CustomContainer(size_t Size) : Size(Size) { 232 /// for (size_t I = 0; I != Size; ++I) 233 /// Elements[I] = I; 234 /// } 235 /// template <typename HasherT, support::endianness Endianness> 236 /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, 237 /// const CustomContainer &Value) { 238 /// if (Endianness == support::endian::system_endianness()) { 239 /// HBuilder.update(makeArrayRef( 240 /// reinterpret_cast<const uint8_t *>(&Value.Size), 241 /// sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0]))); 242 /// } else { 243 /// // `addRange` will take care of encoding the size. 244 /// HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] + 245 /// Value.Size); 246 /// } 247 /// } 248 /// }; 249 /// ``` 250 template <typename T> 251 std::enable_if_t<is_detected<HasAddHashT, T>::value && 252 !hashbuilder_detail::IsHashableData<T>::value, 253 HashBuilderImpl &> 254 add(const T &Value) { 255 addHash(*this, Value); 256 return *this; 257 } 258 259 template <typename T1, typename T2> 260 HashBuilderImpl &add(const std::pair<T1, T2> &Value) { 261 add(Value.first); 262 add(Value.second); 263 return *this; 264 } 265 266 template <typename... Ts> HashBuilderImpl &add(const std::tuple<Ts...> &Arg) { 267 return addTupleHelper(Arg, typename std::index_sequence_for<Ts...>()); 268 } 269 270 /// A convenenience variadic helper. 271 /// It simply iterates over its arguments, in order. 272 /// ``` 273 /// add(Arg1, Arg2); 274 /// ``` 275 /// is equivalent to 276 /// ``` 277 /// add(Arg1) 278 /// add(Arg2) 279 /// ``` 280 template <typename T, typename... Ts> 281 typename std::enable_if<(sizeof...(Ts) >= 1), HashBuilderImpl &>::type 282 add(const T &FirstArg, const Ts &...Args) { 283 add(FirstArg); 284 add(Args...); 285 return *this; 286 } 287 288 template <typename ForwardIteratorT> 289 HashBuilderImpl &addRange(ForwardIteratorT First, ForwardIteratorT Last) { 290 add(std::distance(First, Last)); 291 return addRangeElements(First, Last); 292 } 293 294 template <typename RangeT> HashBuilderImpl &addRange(const RangeT &Range) { 295 return addRange(adl_begin(Range), adl_end(Range)); 296 } 297 298 template <typename ForwardIteratorT> 299 HashBuilderImpl &addRangeElements(ForwardIteratorT First, 300 ForwardIteratorT Last) { 301 return addRangeElementsImpl( 302 First, Last, 303 typename std::iterator_traits<ForwardIteratorT>::iterator_category()); 304 } 305 306 template <typename RangeT> 307 HashBuilderImpl &addRangeElements(const RangeT &Range) { 308 return addRangeElements(adl_begin(Range), adl_end(Range)); 309 } 310 311 template <typename T> 312 using HasByteSwapT = decltype(support::endian::byte_swap( 313 std::declval<T &>(), support::endianness::little)); 314 /// Adjust `Value` for the target endianness and add it to the hash. 315 template <typename T> 316 std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilderImpl &> 317 adjustForEndiannessAndAdd(const T &Value) { 318 T SwappedValue = support::endian::byte_swap(Value, Endianness); 319 this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue), 320 sizeof(SwappedValue))); 321 return *this; 322 } 323 324 private: 325 template <typename... Ts, std::size_t... Indices> 326 HashBuilderImpl &addTupleHelper(const std::tuple<Ts...> &Arg, 327 std::index_sequence<Indices...>) { 328 add(std::get<Indices>(Arg)...); 329 return *this; 330 } 331 332 // FIXME: Once available, specialize this function for `contiguous_iterator`s, 333 // and use it for `ArrayRef` and `StringRef`. 334 template <typename ForwardIteratorT> 335 HashBuilderImpl &addRangeElementsImpl(ForwardIteratorT First, 336 ForwardIteratorT Last, 337 std::forward_iterator_tag) { 338 for (auto It = First; It != Last; ++It) 339 add(*It); 340 return *this; 341 } 342 343 template <typename T> 344 std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value && 345 Endianness == support::endian::system_endianness(), 346 HashBuilderImpl &> 347 addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) { 348 this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(First), 349 (Last - First) * sizeof(T))); 350 return *this; 351 } 352 }; 353 354 /// Interface to help hash various types through a hasher type. 355 /// 356 /// Via provided specializations of `add`, `addRange`, and `addRangeElements` 357 /// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed 358 /// without requiring any knowledge of hashed types from the hasher type. 359 /// 360 /// The only method expected from the templated hasher type `HasherT` is: 361 /// * void update(ArrayRef<uint8_t> Data) 362 /// 363 /// Additionally, the following methods will be forwarded to the hasher type: 364 /// * decltype(std::declval<HasherT &>().final()) final() 365 /// * decltype(std::declval<HasherT &>().result()) result() 366 /// 367 /// From a user point of view, the interface provides the following: 368 /// * `template<typename T> add(const T &Value)` 369 /// The `add` function implements hashing of various types. 370 /// * `template <typename ItT> void addRange(ItT First, ItT Last)` 371 /// The `addRange` function is designed to aid hashing a range of values. 372 /// It explicitly adds the size of the range in the hash. 373 /// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)` 374 /// The `addRangeElements` function is also designed to aid hashing a range of 375 /// values. In contrast to `addRange`, it **ignores** the size of the range, 376 /// behaving as if elements were added one at a time with `add`. 377 /// 378 /// User-defined `struct` types can participate in this interface by providing 379 /// an `addHash` templated function. See the associated template specialization 380 /// for details. 381 /// 382 /// This interface does not impose requirements on the hasher 383 /// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for 384 /// variable-size types; for example for 385 /// ``` 386 /// builder.add({1}); 387 /// builder.add({2, 3}); 388 /// ``` 389 /// and 390 /// ``` 391 /// builder.add({1, 2}); 392 /// builder.add({3}); 393 /// ``` 394 /// . Thus, specializations of `add` and `addHash` for variable-size types must 395 /// not assume that the hasher type considers the size as part of the hash; they 396 /// must explicitly add the size to the hash. See for example specializations 397 /// for `ArrayRef` and `StringRef`. 398 /// 399 /// Additionally, since types are eventually forwarded to the hasher's 400 /// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash 401 /// computation (for example when computing `add((int)123)`). 402 /// Specifiying a non-`native` `Endianness` template parameter allows to compute 403 /// stable hash across platforms with different endianness. 404 template <class HasherT, support::endianness Endianness> 405 using HashBuilder = 406 HashBuilderImpl<HasherT, (Endianness == support::endianness::native 407 ? support::endian::system_endianness() 408 : Endianness)>; 409 410 namespace hashbuilder_detail { 411 class HashCodeHasher { 412 public: 413 HashCodeHasher() : Code(0) {} 414 void update(ArrayRef<uint8_t> Data) { 415 hash_code DataCode = hash_value(Data); 416 Code = hash_combine(Code, DataCode); 417 } 418 hash_code Code; 419 }; 420 421 using HashCodeHashBuilder = HashBuilder<hashbuilder_detail::HashCodeHasher, 422 support::endianness::native>; 423 } // namespace hashbuilder_detail 424 425 /// Provide a default implementation of `hash_value` when `addHash(const T &)` 426 /// is supported. 427 template <typename T> 428 std::enable_if_t< 429 is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value, 430 hash_code> 431 hash_value(const T &Value) { 432 hashbuilder_detail::HashCodeHashBuilder HBuilder; 433 HBuilder.add(Value); 434 return HBuilder.getHasher().Code; 435 } 436 } // end namespace llvm 437 438 #endif // LLVM_SUPPORT_HASHBUILDER_H 439