1 //===- TypeHashing.h ---------------------------------------------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H 10 #define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H 11 12 #include "llvm/ADT/DenseMapInfo.h" 13 #include "llvm/ADT/Hashing.h" 14 15 #include "llvm/DebugInfo/CodeView/CodeView.h" 16 #include "llvm/DebugInfo/CodeView/TypeCollection.h" 17 #include "llvm/DebugInfo/CodeView/TypeIndex.h" 18 19 #include "llvm/Support/FormatProviders.h" 20 21 #include <type_traits> 22 23 namespace llvm { 24 namespace codeview { 25 26 /// A locally hashed type represents a straightforward hash code of a serialized 27 /// record. The record is simply serialized, and then the bytes are hashed by 28 /// a standard algorithm. This is sufficient for the case of de-duplicating 29 /// records within a single sequence of types, because if two records both have 30 /// a back-reference to the same type in the same stream, they will both have 31 /// the same numeric value for the TypeIndex of the back reference. 32 struct LocallyHashedType { 33 hash_code Hash; 34 ArrayRef<uint8_t> RecordData; 35 36 /// Given a type, compute its local hash. 37 static LocallyHashedType hashType(ArrayRef<uint8_t> RecordData); 38 39 /// Given a sequence of types, compute all of the local hashes. 40 template <typename Range> hashTypesLocallyHashedType41 static std::vector<LocallyHashedType> hashTypes(Range &&Records) { 42 std::vector<LocallyHashedType> Hashes; 43 Hashes.reserve(std::distance(std::begin(Records), std::end(Records))); 44 for (const auto &R : Records) 45 Hashes.push_back(hashType(R)); 46 47 return Hashes; 48 } 49 50 static std::vector<LocallyHashedType> hashTypeCollectionLocallyHashedType51 hashTypeCollection(TypeCollection &Types) { 52 std::vector<LocallyHashedType> Hashes; 53 Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) { 54 Hashes.push_back(hashType(Type.RecordData)); 55 }); 56 return Hashes; 57 } 58 }; 59 60 enum class GlobalTypeHashAlg : uint16_t { 61 SHA1 = 0, // standard 20-byte SHA1 hash 62 SHA1_8 // last 8-bytes of standard SHA1 hash 63 }; 64 65 /// A globally hashed type represents a hash value that is sufficient to 66 /// uniquely identify a record across multiple type streams or type sequences. 67 /// This works by, for any given record A which references B, replacing the 68 /// TypeIndex that refers to B with a previously-computed global hash for B. As 69 /// this is a recursive algorithm (e.g. the global hash of B also depends on the 70 /// global hashes of the types that B refers to), a global hash can uniquely 71 /// identify identify that A occurs in another stream that has a completely 72 /// different graph structure. Although the hash itself is slower to compute, 73 /// probing is much faster with a globally hashed type, because the hash itself 74 /// is considered "as good as" the original type. Since type records can be 75 /// quite large, this makes the equality comparison of the hash much faster than 76 /// equality comparison of a full record. 77 struct GloballyHashedType { 78 GloballyHashedType() = default; GloballyHashedTypeGloballyHashedType79 GloballyHashedType(StringRef H) 80 : GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {} GloballyHashedTypeGloballyHashedType81 GloballyHashedType(ArrayRef<uint8_t> H) { 82 assert(H.size() == 8); 83 ::memcpy(Hash.data(), H.data(), 8); 84 } 85 std::array<uint8_t, 8> Hash; 86 emptyGloballyHashedType87 bool empty() const { return *(const uint64_t*)Hash.data() == 0; } 88 89 friend inline bool operator==(const GloballyHashedType &L, 90 const GloballyHashedType &R) { 91 return L.Hash == R.Hash; 92 } 93 94 friend inline bool operator!=(const GloballyHashedType &L, 95 const GloballyHashedType &R) { 96 return !(L.Hash == R.Hash); 97 } 98 99 /// Given a sequence of bytes representing a record, compute a global hash for 100 /// this record. Due to the nature of global hashes incorporating the hashes 101 /// of referenced records, this function requires a list of types and ids 102 /// that RecordData might reference, indexable by TypeIndex. 103 static GloballyHashedType hashType(ArrayRef<uint8_t> RecordData, 104 ArrayRef<GloballyHashedType> PreviousTypes, 105 ArrayRef<GloballyHashedType> PreviousIds); 106 107 /// Given a sequence of bytes representing a record, compute a global hash for 108 /// this record. Due to the nature of global hashes incorporating the hashes 109 /// of referenced records, this function requires a list of types and ids 110 /// that RecordData might reference, indexable by TypeIndex. hashTypeGloballyHashedType111 static GloballyHashedType hashType(CVType Type, 112 ArrayRef<GloballyHashedType> PreviousTypes, 113 ArrayRef<GloballyHashedType> PreviousIds) { 114 return hashType(Type.RecordData, PreviousTypes, PreviousIds); 115 } 116 117 /// Given a sequence of combined type and ID records, compute global hashes 118 /// for each of them, returning the results in a vector of hashed types. 119 template <typename Range> hashTypesGloballyHashedType120 static std::vector<GloballyHashedType> hashTypes(Range &&Records) { 121 std::vector<GloballyHashedType> Hashes; 122 bool UnresolvedRecords = false; 123 for (const auto &R : Records) { 124 GloballyHashedType H = hashType(R, Hashes, Hashes); 125 if (H.empty()) 126 UnresolvedRecords = true; 127 Hashes.push_back(H); 128 } 129 130 // In some rare cases, there might be records with forward references in the 131 // stream. Several passes might be needed to fully hash each record in the 132 // Type stream. However this occurs on very small OBJs generated by MASM, 133 // with a dozen records at most. Therefore this codepath isn't 134 // time-critical, as it isn't taken in 99% of cases. 135 while (UnresolvedRecords) { 136 UnresolvedRecords = false; 137 auto HashIt = Hashes.begin(); 138 for (const auto &R : Records) { 139 if (HashIt->empty()) { 140 GloballyHashedType H = hashType(R, Hashes, Hashes); 141 if (H.empty()) 142 UnresolvedRecords = true; 143 else 144 *HashIt = H; 145 } 146 ++HashIt; 147 } 148 } 149 150 return Hashes; 151 } 152 153 /// Given a sequence of combined type and ID records, compute global hashes 154 /// for each of them, returning the results in a vector of hashed types. 155 template <typename Range> 156 static std::vector<GloballyHashedType> hashIdsGloballyHashedType157 hashIds(Range &&Records, ArrayRef<GloballyHashedType> TypeHashes) { 158 std::vector<GloballyHashedType> IdHashes; 159 for (const auto &R : Records) 160 IdHashes.push_back(hashType(R, TypeHashes, IdHashes)); 161 162 return IdHashes; 163 } 164 165 static std::vector<GloballyHashedType> hashTypeCollectionGloballyHashedType166 hashTypeCollection(TypeCollection &Types) { 167 std::vector<GloballyHashedType> Hashes; 168 Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) { 169 Hashes.push_back(hashType(Type.RecordData, Hashes, Hashes)); 170 }); 171 return Hashes; 172 } 173 }; 174 static_assert(std::is_trivially_copyable<GloballyHashedType>::value, 175 "GloballyHashedType must be trivially copyable so that we can " 176 "reinterpret_cast arrays of hash data to arrays of " 177 "GloballyHashedType"); 178 } // namespace codeview 179 180 template <> struct DenseMapInfo<codeview::LocallyHashedType> { 181 static codeview::LocallyHashedType Empty; 182 static codeview::LocallyHashedType Tombstone; 183 184 static codeview::LocallyHashedType getEmptyKey() { return Empty; } 185 186 static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; } 187 188 static unsigned getHashValue(codeview::LocallyHashedType Val) { 189 return Val.Hash; 190 } 191 192 static bool isEqual(codeview::LocallyHashedType LHS, 193 codeview::LocallyHashedType RHS) { 194 if (LHS.Hash != RHS.Hash) 195 return false; 196 return LHS.RecordData == RHS.RecordData; 197 } 198 }; 199 200 template <> struct DenseMapInfo<codeview::GloballyHashedType> { 201 static codeview::GloballyHashedType Empty; 202 static codeview::GloballyHashedType Tombstone; 203 204 static codeview::GloballyHashedType getEmptyKey() { return Empty; } 205 206 static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; } 207 208 static unsigned getHashValue(codeview::GloballyHashedType Val) { 209 return *reinterpret_cast<const unsigned *>(Val.Hash.data()); 210 } 211 212 static bool isEqual(codeview::GloballyHashedType LHS, 213 codeview::GloballyHashedType RHS) { 214 return LHS == RHS; 215 } 216 }; 217 218 template <> struct format_provider<codeview::LocallyHashedType> { 219 public: 220 static void format(const codeview::LocallyHashedType &V, 221 llvm::raw_ostream &Stream, StringRef Style) { 222 write_hex(Stream, V.Hash, HexPrintStyle::Upper, 8); 223 } 224 }; 225 226 template <> struct format_provider<codeview::GloballyHashedType> { 227 public: 228 static void format(const codeview::GloballyHashedType &V, 229 llvm::raw_ostream &Stream, StringRef Style) { 230 for (uint8_t B : V.Hash) { 231 write_hex(Stream, B, HexPrintStyle::Upper, 2); 232 } 233 } 234 }; 235 236 } // namespace llvm 237 238 #endif 239