xref: /freebsd-src/contrib/llvm-project/llvm/include/llvm/Support/HashBuilder.h (revision 8cc087a1eee9ec1ca9f7ac1e63ad51bdb5a682eb)
1 //===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements an interface allowing to conveniently build hashes of
10 // various data types, without relying on the underlying hasher type to know
11 // about hashed data types.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_SUPPORT_HASHBUILDER_H
16 #define LLVM_SUPPORT_HASHBUILDER_H
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Support/Endian.h"
23 #include "llvm/Support/type_traits.h"
24 
25 #include <iterator>
26 #include <utility>
27 
28 namespace llvm {
29 
30 namespace hashbuilder_detail {
31 /// Trait to indicate whether a type's bits can be hashed directly (after
32 /// endianness correction).
33 template <typename U>
34 struct IsHashableData
35     : std::integral_constant<bool, is_integral_or_enum<U>::value> {};
36 
37 } // namespace hashbuilder_detail
38 
39 /// Declares the hasher member, and functions forwarding directly to the hasher.
40 template <typename HasherT> class HashBuilderBase {
41 public:
42   HasherT &getHasher() { return Hasher; }
43 
44   /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
45   ///
46   /// This may not take the size of `Data` into account.
47   /// Users of this function should pay attention to respect endianness
48   /// contraints.
49   void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); }
50 
51   /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
52   ///
53   /// This may not take the size of `Data` into account.
54   /// Users of this function should pay attention to respect endianness
55   /// contraints.
56   void update(StringRef Data) {
57     update(makeArrayRef(reinterpret_cast<const uint8_t *>(Data.data()),
58                         Data.size()));
59   }
60 
61   /// Forward to `HasherT::final()` if available.
62   template <typename HasherT_ = HasherT> StringRef final() {
63     return this->getHasher().final();
64   }
65 
66   /// Forward to `HasherT::result()` if available.
67   template <typename HasherT_ = HasherT> StringRef result() {
68     return this->getHasher().result();
69   }
70 
71 protected:
72   explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {}
73 
74   template <typename... ArgTypes>
75   explicit HashBuilderBase(ArgTypes &&...Args)
76       : OptionalHasher(in_place, std::forward<ArgTypes>(Args)...),
77         Hasher(*OptionalHasher) {}
78 
79 private:
80   Optional<HasherT> OptionalHasher;
81   HasherT &Hasher;
82 };
83 
84 /// Implementation of the `HashBuilder` interface.
85 ///
86 /// `support::endianness::native` is not supported. `HashBuilder` is
87 /// expected to canonicalize `support::endianness::native` to one of
88 /// `support::endianness::big` or `support::endianness::little`.
89 template <typename HasherT, support::endianness Endianness>
90 class HashBuilderImpl : public HashBuilderBase<HasherT> {
91   static_assert(Endianness != support::endianness::native,
92                 "HashBuilder should canonicalize endianness");
93 
94 public:
95   explicit HashBuilderImpl(HasherT &Hasher)
96       : HashBuilderBase<HasherT>(Hasher) {}
97   template <typename... ArgTypes>
98   explicit HashBuilderImpl(ArgTypes &&...Args)
99       : HashBuilderBase<HasherT>(Args...) {}
100 
101   /// Implement hashing for hashable data types, e.g. integral or enum values.
102   template <typename T>
103   std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value,
104                    HashBuilderImpl &>
105   add(T Value) {
106     return adjustForEndiannessAndAdd(Value);
107   }
108 
109   /// Support hashing `ArrayRef`.
110   ///
111   /// `Value.size()` is taken into account to ensure cases like
112   /// ```
113   /// builder.add({1});
114   /// builder.add({2, 3});
115   /// ```
116   /// and
117   /// ```
118   /// builder.add({1, 2});
119   /// builder.add({3});
120   /// ```
121   /// do not collide.
122   template <typename T> HashBuilderImpl &add(ArrayRef<T> Value) {
123     // As of implementation time, simply calling `addRange(Value)` would also go
124     // through the `update` fast path. But that would rely on the implementation
125     // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call
126     // `update` to guarantee the fast path.
127     add(Value.size());
128     if (hashbuilder_detail::IsHashableData<T>::value &&
129         Endianness == support::endian::system_endianness()) {
130       this->update(
131           makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
132                        Value.size() * sizeof(T)));
133     } else {
134       for (auto &V : Value)
135         add(V);
136     }
137     return *this;
138   }
139 
140   /// Support hashing `StringRef`.
141   ///
142   /// `Value.size()` is taken into account to ensure cases like
143   /// ```
144   /// builder.add("a");
145   /// builder.add("bc");
146   /// ```
147   /// and
148   /// ```
149   /// builder.add("ab");
150   /// builder.add("c");
151   /// ```
152   /// do not collide.
153   HashBuilderImpl &add(StringRef Value) {
154     // As of implementation time, simply calling `addRange(Value)` would also go
155     // through `update`. But that would rely on the implementation of
156     // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to
157     // guarantee the fast path.
158     add(Value.size());
159     this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
160                               Value.size()));
161     return *this;
162   }
163 
164   template <typename T>
165   using HasAddHashT =
166       decltype(addHash(std::declval<HashBuilderImpl &>(), std::declval<T &>()));
167   /// Implement hashing for user-defined `struct`s.
168   ///
169   /// Any user-define `struct` can participate in hashing via `HashBuilder` by
170   /// providing a `addHash` templated function.
171   ///
172   /// ```
173   /// template <typename HasherT, support::endianness Endianness>
174   /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder,
175   ///              const UserDefinedStruct &Value);
176   /// ```
177   ///
178   /// For example:
179   /// ```
180   /// struct SimpleStruct {
181   ///   char c;
182   ///   int i;
183   /// };
184   ///
185   /// template <typename HasherT, support::endianness Endianness>
186   /// void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
187   ///              const SimpleStruct &Value) {
188   ///   HBuilder.add(Value.c);
189   ///   HBuilder.add(Value.i);
190   /// }
191   /// ```
192   ///
193   /// To avoid endianness issues, specializations of `addHash` should
194   /// generally rely on exising `add`, `addRange`, and `addRangeElements`
195   /// functions. If directly using `update`, an implementation must correctly
196   /// handle endianness.
197   ///
198   /// ```
199   /// struct __attribute__ ((packed)) StructWithFastHash {
200   ///   int I;
201   ///   char C;
202   ///
203   ///   // If possible, we want to hash both `I` and `C` in a single
204   ///   // `update` call for performance concerns.
205   ///   template <typename HasherT, support::endianness Endianness>
206   ///   friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
207   ///                       const StructWithFastHash &Value) {
208   ///     if (Endianness == support::endian::system_endianness()) {
209   ///       HBuilder.update(makeArrayRef(
210   ///           reinterpret_cast<const uint8_t *>(&Value), sizeof(Value)));
211   ///     } else {
212   ///       // Rely on existing `add` methods to handle endianness.
213   ///       HBuilder.add(Value.I);
214   ///       HBuilder.add(Value.C);
215   ///     }
216   ///   }
217   /// };
218   /// ```
219   ///
220   /// To avoid collisions, specialization of `addHash` for variable-size
221   /// types must take the size into account.
222   ///
223   /// For example:
224   /// ```
225   /// struct CustomContainer {
226   /// private:
227   ///   size_t Size;
228   ///   int Elements[100];
229   ///
230   /// public:
231   ///   CustomContainer(size_t Size) : Size(Size) {
232   ///     for (size_t I = 0; I != Size; ++I)
233   ///       Elements[I] = I;
234   ///   }
235   ///   template <typename HasherT, support::endianness Endianness>
236   ///   friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
237   ///                       const CustomContainer &Value) {
238   ///     if (Endianness == support::endian::system_endianness()) {
239   ///       HBuilder.update(makeArrayRef(
240   ///           reinterpret_cast<const uint8_t *>(&Value.Size),
241   ///           sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0])));
242   ///     } else {
243   ///       // `addRange` will take care of encoding the size.
244   ///       HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] +
245   ///       Value.Size);
246   ///     }
247   ///   }
248   /// };
249   /// ```
250   template <typename T>
251   std::enable_if_t<is_detected<HasAddHashT, T>::value &&
252                        !hashbuilder_detail::IsHashableData<T>::value,
253                    HashBuilderImpl &>
254   add(const T &Value) {
255     addHash(*this, Value);
256     return *this;
257   }
258 
259   template <typename T1, typename T2>
260   HashBuilderImpl &add(const std::pair<T1, T2> &Value) {
261     add(Value.first);
262     add(Value.second);
263     return *this;
264   }
265 
266   template <typename... Ts> HashBuilderImpl &add(const std::tuple<Ts...> &Arg) {
267     return addTupleHelper(Arg, typename std::index_sequence_for<Ts...>());
268   }
269 
270   /// A convenenience variadic helper.
271   /// It simply iterates over its arguments, in order.
272   /// ```
273   /// add(Arg1, Arg2);
274   /// ```
275   /// is equivalent to
276   /// ```
277   /// add(Arg1)
278   /// add(Arg2)
279   /// ```
280   template <typename T, typename... Ts>
281   typename std::enable_if<(sizeof...(Ts) >= 1), HashBuilderImpl &>::type
282   add(const T &FirstArg, const Ts &...Args) {
283     add(FirstArg);
284     add(Args...);
285     return *this;
286   }
287 
288   template <typename ForwardIteratorT>
289   HashBuilderImpl &addRange(ForwardIteratorT First, ForwardIteratorT Last) {
290     add(std::distance(First, Last));
291     return addRangeElements(First, Last);
292   }
293 
294   template <typename RangeT> HashBuilderImpl &addRange(const RangeT &Range) {
295     return addRange(adl_begin(Range), adl_end(Range));
296   }
297 
298   template <typename ForwardIteratorT>
299   HashBuilderImpl &addRangeElements(ForwardIteratorT First,
300                                     ForwardIteratorT Last) {
301     return addRangeElementsImpl(
302         First, Last,
303         typename std::iterator_traits<ForwardIteratorT>::iterator_category());
304   }
305 
306   template <typename RangeT>
307   HashBuilderImpl &addRangeElements(const RangeT &Range) {
308     return addRangeElements(adl_begin(Range), adl_end(Range));
309   }
310 
311   template <typename T>
312   using HasByteSwapT = decltype(support::endian::byte_swap(
313       std::declval<T &>(), support::endianness::little));
314   /// Adjust `Value` for the target endianness and add it to the hash.
315   template <typename T>
316   std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilderImpl &>
317   adjustForEndiannessAndAdd(const T &Value) {
318     T SwappedValue = support::endian::byte_swap(Value, Endianness);
319     this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue),
320                               sizeof(SwappedValue)));
321     return *this;
322   }
323 
324 private:
325   template <typename... Ts, std::size_t... Indices>
326   HashBuilderImpl &addTupleHelper(const std::tuple<Ts...> &Arg,
327                                   std::index_sequence<Indices...>) {
328     add(std::get<Indices>(Arg)...);
329     return *this;
330   }
331 
332   // FIXME: Once available, specialize this function for `contiguous_iterator`s,
333   // and use it for `ArrayRef` and `StringRef`.
334   template <typename ForwardIteratorT>
335   HashBuilderImpl &addRangeElementsImpl(ForwardIteratorT First,
336                                         ForwardIteratorT Last,
337                                         std::forward_iterator_tag) {
338     for (auto It = First; It != Last; ++It)
339       add(*It);
340     return *this;
341   }
342 
343   template <typename T>
344   std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value &&
345                        Endianness == support::endian::system_endianness(),
346                    HashBuilderImpl &>
347   addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) {
348     this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(First),
349                               (Last - First) * sizeof(T)));
350     return *this;
351   }
352 };
353 
354 /// Interface to help hash various types through a hasher type.
355 ///
356 /// Via provided specializations of `add`, `addRange`, and `addRangeElements`
357 /// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed
358 /// without requiring any knowledge of hashed types from the hasher type.
359 ///
360 /// The only method expected from the templated hasher type `HasherT` is:
361 /// * void update(ArrayRef<uint8_t> Data)
362 ///
363 /// Additionally, the following methods will be forwarded to the hasher type:
364 /// * decltype(std::declval<HasherT &>().final()) final()
365 /// * decltype(std::declval<HasherT &>().result()) result()
366 ///
367 /// From a user point of view, the interface provides the following:
368 /// * `template<typename T> add(const T &Value)`
369 ///   The `add` function implements hashing of various types.
370 /// * `template <typename ItT> void addRange(ItT First, ItT Last)`
371 ///   The `addRange` function is designed to aid hashing a range of values.
372 ///   It explicitly adds the size of the range in the hash.
373 /// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)`
374 ///   The `addRangeElements` function is also designed to aid hashing a range of
375 ///   values. In contrast to `addRange`, it **ignores** the size of the range,
376 ///   behaving as if elements were added one at a time with `add`.
377 ///
378 /// User-defined `struct` types can participate in this interface by providing
379 /// an `addHash` templated function. See the associated template specialization
380 /// for details.
381 ///
382 /// This interface does not impose requirements on the hasher
383 /// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for
384 /// variable-size types; for example for
385 /// ```
386 /// builder.add({1});
387 /// builder.add({2, 3});
388 /// ```
389 /// and
390 /// ```
391 /// builder.add({1, 2});
392 /// builder.add({3});
393 /// ```
394 /// . Thus, specializations of `add` and `addHash` for variable-size types must
395 /// not assume that the hasher type considers the size as part of the hash; they
396 /// must explicitly add the size to the hash. See for example specializations
397 /// for `ArrayRef` and `StringRef`.
398 ///
399 /// Additionally, since types are eventually forwarded to the hasher's
400 /// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash
401 /// computation (for example when computing `add((int)123)`).
402 /// Specifiying a non-`native` `Endianness` template parameter allows to compute
403 /// stable hash across platforms with different endianness.
404 template <class HasherT, support::endianness Endianness>
405 using HashBuilder =
406     HashBuilderImpl<HasherT, (Endianness == support::endianness::native
407                                   ? support::endian::system_endianness()
408                                   : Endianness)>;
409 
410 namespace hashbuilder_detail {
411 class HashCodeHasher {
412 public:
413   HashCodeHasher() : Code(0) {}
414   void update(ArrayRef<uint8_t> Data) {
415     hash_code DataCode = hash_value(Data);
416     Code = hash_combine(Code, DataCode);
417   }
418   hash_code Code;
419 };
420 
421 using HashCodeHashBuilder = HashBuilder<hashbuilder_detail::HashCodeHasher,
422                                         support::endianness::native>;
423 } // namespace hashbuilder_detail
424 
425 /// Provide a default implementation of `hash_value` when `addHash(const T &)`
426 /// is supported.
427 template <typename T>
428 std::enable_if_t<
429     is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value,
430     hash_code>
431 hash_value(const T &Value) {
432   hashbuilder_detail::HashCodeHashBuilder HBuilder;
433   HBuilder.add(Value);
434   return HBuilder.getHasher().Code;
435 }
436 } // end namespace llvm
437 
438 #endif // LLVM_SUPPORT_HASHBUILDER_H
439