xref: /netbsd-src/external/apache2/llvm/dist/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //===- TypeHashing.h ---------------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
10 #define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
11 
12 #include "llvm/ADT/DenseMapInfo.h"
13 #include "llvm/ADT/Hashing.h"
14 
15 #include "llvm/DebugInfo/CodeView/CodeView.h"
16 #include "llvm/DebugInfo/CodeView/TypeCollection.h"
17 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
18 
19 #include "llvm/Support/FormatProviders.h"
20 
21 #include <type_traits>
22 
23 namespace llvm {
24 namespace codeview {
25 
26 /// A locally hashed type represents a straightforward hash code of a serialized
27 /// record.  The record is simply serialized, and then the bytes are hashed by
28 /// a standard algorithm.  This is sufficient for the case of de-duplicating
29 /// records within a single sequence of types, because if two records both have
30 /// a back-reference to the same type in the same stream, they will both have
31 /// the same numeric value for the TypeIndex of the back reference.
32 struct LocallyHashedType {
33   hash_code Hash;
34   ArrayRef<uint8_t> RecordData;
35 
36   /// Given a type, compute its local hash.
37   static LocallyHashedType hashType(ArrayRef<uint8_t> RecordData);
38 
39   /// Given a sequence of types, compute all of the local hashes.
40   template <typename Range>
hashTypesLocallyHashedType41   static std::vector<LocallyHashedType> hashTypes(Range &&Records) {
42     std::vector<LocallyHashedType> Hashes;
43     Hashes.reserve(std::distance(std::begin(Records), std::end(Records)));
44     for (const auto &R : Records)
45       Hashes.push_back(hashType(R));
46 
47     return Hashes;
48   }
49 
50   static std::vector<LocallyHashedType>
hashTypeCollectionLocallyHashedType51   hashTypeCollection(TypeCollection &Types) {
52     std::vector<LocallyHashedType> Hashes;
53     Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
54       Hashes.push_back(hashType(Type.RecordData));
55     });
56     return Hashes;
57   }
58 };
59 
60 enum class GlobalTypeHashAlg : uint16_t {
61   SHA1 = 0, // standard 20-byte SHA1 hash
62   SHA1_8    // last 8-bytes of standard SHA1 hash
63 };
64 
65 /// A globally hashed type represents a hash value that is sufficient to
66 /// uniquely identify a record across multiple type streams or type sequences.
67 /// This works by, for any given record A which references B, replacing the
68 /// TypeIndex that refers to B with a previously-computed global hash for B.  As
69 /// this is a recursive algorithm (e.g. the global hash of B also depends on the
70 /// global hashes of the types that B refers to), a global hash can uniquely
71 /// identify identify that A occurs in another stream that has a completely
72 /// different graph structure.  Although the hash itself is slower to compute,
73 /// probing is much faster with a globally hashed type, because the hash itself
74 /// is considered "as good as" the original type.  Since type records can be
75 /// quite large, this makes the equality comparison of the hash much faster than
76 /// equality comparison of a full record.
77 struct GloballyHashedType {
78   GloballyHashedType() = default;
GloballyHashedTypeGloballyHashedType79   GloballyHashedType(StringRef H)
80       : GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {}
GloballyHashedTypeGloballyHashedType81   GloballyHashedType(ArrayRef<uint8_t> H) {
82     assert(H.size() == 8);
83     ::memcpy(Hash.data(), H.data(), 8);
84   }
85   std::array<uint8_t, 8> Hash;
86 
emptyGloballyHashedType87   bool empty() const { return *(const uint64_t*)Hash.data() == 0; }
88 
89   friend inline bool operator==(const GloballyHashedType &L,
90                                 const GloballyHashedType &R) {
91     return L.Hash == R.Hash;
92   }
93 
94   friend inline bool operator!=(const GloballyHashedType &L,
95                                 const GloballyHashedType &R) {
96     return !(L.Hash == R.Hash);
97   }
98 
99   /// Given a sequence of bytes representing a record, compute a global hash for
100   /// this record.  Due to the nature of global hashes incorporating the hashes
101   /// of referenced records, this function requires a list of types and ids
102   /// that RecordData might reference, indexable by TypeIndex.
103   static GloballyHashedType hashType(ArrayRef<uint8_t> RecordData,
104                                      ArrayRef<GloballyHashedType> PreviousTypes,
105                                      ArrayRef<GloballyHashedType> PreviousIds);
106 
107   /// Given a sequence of bytes representing a record, compute a global hash for
108   /// this record.  Due to the nature of global hashes incorporating the hashes
109   /// of referenced records, this function requires a list of types and ids
110   /// that RecordData might reference, indexable by TypeIndex.
hashTypeGloballyHashedType111   static GloballyHashedType hashType(CVType Type,
112                                      ArrayRef<GloballyHashedType> PreviousTypes,
113                                      ArrayRef<GloballyHashedType> PreviousIds) {
114     return hashType(Type.RecordData, PreviousTypes, PreviousIds);
115   }
116 
117   /// Given a sequence of combined type and ID records, compute global hashes
118   /// for each of them, returning the results in a vector of hashed types.
119   template <typename Range>
hashTypesGloballyHashedType120   static std::vector<GloballyHashedType> hashTypes(Range &&Records) {
121     std::vector<GloballyHashedType> Hashes;
122     bool UnresolvedRecords = false;
123     for (const auto &R : Records) {
124       GloballyHashedType H = hashType(R, Hashes, Hashes);
125       if (H.empty())
126         UnresolvedRecords = true;
127       Hashes.push_back(H);
128     }
129 
130     // In some rare cases, there might be records with forward references in the
131     // stream. Several passes might be needed to fully hash each record in the
132     // Type stream. However this occurs on very small OBJs generated by MASM,
133     // with a dozen records at most. Therefore this codepath isn't
134     // time-critical, as it isn't taken in 99% of cases.
135     while (UnresolvedRecords) {
136       UnresolvedRecords = false;
137       auto HashIt = Hashes.begin();
138       for (const auto &R : Records) {
139         if (HashIt->empty()) {
140           GloballyHashedType H = hashType(R, Hashes, Hashes);
141           if (H.empty())
142             UnresolvedRecords = true;
143           else
144             *HashIt = H;
145         }
146         ++HashIt;
147       }
148     }
149 
150     return Hashes;
151   }
152 
153   /// Given a sequence of combined type and ID records, compute global hashes
154   /// for each of them, returning the results in a vector of hashed types.
155   template <typename Range>
156   static std::vector<GloballyHashedType>
hashIdsGloballyHashedType157   hashIds(Range &&Records, ArrayRef<GloballyHashedType> TypeHashes) {
158     std::vector<GloballyHashedType> IdHashes;
159     for (const auto &R : Records)
160       IdHashes.push_back(hashType(R, TypeHashes, IdHashes));
161 
162     return IdHashes;
163   }
164 
165   static std::vector<GloballyHashedType>
hashTypeCollectionGloballyHashedType166   hashTypeCollection(TypeCollection &Types) {
167     std::vector<GloballyHashedType> Hashes;
168     Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
169       Hashes.push_back(hashType(Type.RecordData, Hashes, Hashes));
170     });
171     return Hashes;
172   }
173 };
174 static_assert(std::is_trivially_copyable<GloballyHashedType>::value,
175               "GloballyHashedType must be trivially copyable so that we can "
176               "reinterpret_cast arrays of hash data to arrays of "
177               "GloballyHashedType");
178 } // namespace codeview
179 
180 template <> struct DenseMapInfo<codeview::LocallyHashedType> {
181   static codeview::LocallyHashedType Empty;
182   static codeview::LocallyHashedType Tombstone;
183 
184   static codeview::LocallyHashedType getEmptyKey() { return Empty; }
185 
186   static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; }
187 
188   static unsigned getHashValue(codeview::LocallyHashedType Val) {
189     return Val.Hash;
190   }
191 
192   static bool isEqual(codeview::LocallyHashedType LHS,
193                       codeview::LocallyHashedType RHS) {
194     if (LHS.Hash != RHS.Hash)
195       return false;
196     return LHS.RecordData == RHS.RecordData;
197   }
198 };
199 
200 template <> struct DenseMapInfo<codeview::GloballyHashedType> {
201   static codeview::GloballyHashedType Empty;
202   static codeview::GloballyHashedType Tombstone;
203 
204   static codeview::GloballyHashedType getEmptyKey() { return Empty; }
205 
206   static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; }
207 
208   static unsigned getHashValue(codeview::GloballyHashedType Val) {
209     return *reinterpret_cast<const unsigned *>(Val.Hash.data());
210   }
211 
212   static bool isEqual(codeview::GloballyHashedType LHS,
213                       codeview::GloballyHashedType RHS) {
214     return LHS == RHS;
215   }
216 };
217 
218 template <> struct format_provider<codeview::LocallyHashedType> {
219 public:
220   static void format(const codeview::LocallyHashedType &V,
221                      llvm::raw_ostream &Stream, StringRef Style) {
222     write_hex(Stream, V.Hash, HexPrintStyle::Upper, 8);
223   }
224 };
225 
226 template <> struct format_provider<codeview::GloballyHashedType> {
227 public:
228   static void format(const codeview::GloballyHashedType &V,
229                      llvm::raw_ostream &Stream, StringRef Style) {
230     for (uint8_t B : V.Hash) {
231       write_hex(Stream, B, HexPrintStyle::Upper, 2);
232     }
233   }
234 };
235 
236 } // namespace llvm
237 
238 #endif
239