1 //==- include/llvm/CodeGen/AccelTable.h - Accelerator Tables -----*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file contains support for writing accelerator tables. 10 /// 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_CODEGEN_ACCELTABLE_H 14 #define LLVM_CODEGEN_ACCELTABLE_H 15 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/MapVector.h" 18 #include "llvm/ADT/STLFunctionalExtras.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/BinaryFormat/Dwarf.h" 21 #include "llvm/CodeGen/DIE.h" 22 #include "llvm/CodeGen/DwarfStringPoolEntry.h" 23 #include "llvm/Support/Allocator.h" 24 #include "llvm/Support/DJB.h" 25 #include "llvm/Support/Debug.h" 26 #include <cstdint> 27 #include <variant> 28 #include <vector> 29 30 /// \file 31 /// The DWARF and Apple accelerator tables are an indirect hash table optimized 32 /// for null lookup rather than access to known data. The Apple accelerator 33 /// tables are a precursor of the newer DWARF v5 accelerator tables. Both 34 /// formats share common design ideas. 35 /// 36 /// The Apple accelerator table are output into an on-disk format that looks 37 /// like this: 38 /// 39 /// .------------------. 40 /// | HEADER | 41 /// |------------------| 42 /// | BUCKETS | 43 /// |------------------| 44 /// | HASHES | 45 /// |------------------| 46 /// | OFFSETS | 47 /// |------------------| 48 /// | DATA | 49 /// `------------------' 50 /// 51 /// The header contains a magic number, version, type of hash function, 52 /// the number of buckets, total number of hashes, and room for a special struct 53 /// of data and the length of that struct. 54 /// 55 /// The buckets contain an index (e.g. 6) into the hashes array. The hashes 56 /// section contains all of the 32-bit hash values in contiguous memory, and the 57 /// offsets contain the offset into the data area for the particular hash. 58 /// 59 /// For a lookup example, we could hash a function name and take it modulo the 60 /// number of buckets giving us our bucket. From there we take the bucket value 61 /// as an index into the hashes table and look at each successive hash as long 62 /// as the hash value is still the same modulo result (bucket value) as earlier. 63 /// If we have a match we look at that same entry in the offsets table and grab 64 /// the offset in the data for our final match. 65 /// 66 /// The DWARF v5 accelerator table consists of zero or more name indices that 67 /// are output into an on-disk format that looks like this: 68 /// 69 /// .------------------. 70 /// | HEADER | 71 /// |------------------| 72 /// | CU LIST | 73 /// |------------------| 74 /// | LOCAL TU LIST | 75 /// |------------------| 76 /// | FOREIGN TU LIST | 77 /// |------------------| 78 /// | HASH TABLE | 79 /// |------------------| 80 /// | NAME TABLE | 81 /// |------------------| 82 /// | ABBREV TABLE | 83 /// |------------------| 84 /// | ENTRY POOL | 85 /// `------------------' 86 /// 87 /// For the full documentation please refer to the DWARF 5 standard. 88 /// 89 /// 90 /// This file defines the class template AccelTable, which is represents an 91 /// abstract view of an Accelerator table, without any notion of an on-disk 92 /// layout. This class is parameterized by an entry type, which should derive 93 /// from AccelTableData. This is the type of individual entries in the table, 94 /// and it should store the data necessary to emit them. AppleAccelTableData is 95 /// the base class for Apple Accelerator Table entries, which have a uniform 96 /// structure based on a sequence of Atoms. There are different sub-classes 97 /// derived from AppleAccelTable, which differ in the set of Atoms and how they 98 /// obtain their values. 99 /// 100 /// An Apple Accelerator Table can be serialized by calling emitAppleAccelTable 101 /// function. 102 103 namespace llvm { 104 105 class AsmPrinter; 106 class DwarfDebug; 107 class DwarfTypeUnit; 108 class MCSymbol; 109 class raw_ostream; 110 111 /// Interface which the different types of accelerator table data have to 112 /// conform. It serves as a base class for different values of the template 113 /// argument of the AccelTable class template. 114 class AccelTableData { 115 public: 116 virtual ~AccelTableData() = default; 117 118 bool operator<(const AccelTableData &Other) const { 119 return order() < Other.order(); 120 } 121 122 // Subclasses should implement: 123 // static uint32_t hash(StringRef Name); 124 125 #ifndef NDEBUG 126 virtual void print(raw_ostream &OS) const = 0; 127 #endif 128 protected: 129 virtual uint64_t order() const = 0; 130 }; 131 132 /// A base class holding non-template-dependant functionality of the AccelTable 133 /// class. Clients should not use this class directly but rather instantiate 134 /// AccelTable with a type derived from AccelTableData. 135 class AccelTableBase { 136 public: 137 using HashFn = uint32_t(StringRef); 138 139 /// Represents a group of entries with identical name (and hence, hash value). 140 struct HashData { 141 DwarfStringPoolEntryRef Name; 142 uint32_t HashValue; 143 std::vector<AccelTableData *> Values; 144 MCSymbol *Sym; 145 146 /// Get all AccelTableData cast as a `T`. 147 template <typename T = AccelTableData *> auto getValues() const { 148 static_assert(std::is_pointer<T>()); 149 static_assert( 150 std::is_base_of<AccelTableData, std::remove_pointer_t<T>>()); 151 return map_range( 152 Values, [](AccelTableData *Data) { return static_cast<T>(Data); }); 153 } 154 155 #ifndef NDEBUG 156 void print(raw_ostream &OS) const; 157 void dump() const { print(dbgs()); } 158 #endif 159 }; 160 using HashList = std::vector<HashData *>; 161 using BucketList = std::vector<HashList>; 162 163 protected: 164 /// Allocator for HashData and Values. 165 BumpPtrAllocator Allocator; 166 167 using StringEntries = MapVector<StringRef, HashData>; 168 StringEntries Entries; 169 170 HashFn *Hash; 171 uint32_t BucketCount = 0; 172 uint32_t UniqueHashCount = 0; 173 174 HashList Hashes; 175 BucketList Buckets; 176 177 void computeBucketCount(); 178 179 AccelTableBase(HashFn *Hash) : Hash(Hash) {} 180 181 public: 182 void finalize(AsmPrinter *Asm, StringRef Prefix); 183 ArrayRef<HashList> getBuckets() const { return Buckets; } 184 uint32_t getBucketCount() const { return BucketCount; } 185 uint32_t getUniqueHashCount() const { return UniqueHashCount; } 186 uint32_t getUniqueNameCount() const { return Entries.size(); } 187 188 #ifndef NDEBUG 189 void print(raw_ostream &OS) const; 190 void dump() const { print(dbgs()); } 191 #endif 192 193 AccelTableBase(const AccelTableBase &) = delete; 194 void operator=(const AccelTableBase &) = delete; 195 }; 196 197 /// This class holds an abstract representation of an Accelerator Table, 198 /// consisting of a sequence of buckets, each bucket containint a sequence of 199 /// HashData entries. The class is parameterized by the type of entries it 200 /// holds. The type template parameter also defines the hash function to use for 201 /// hashing names. 202 template <typename DataT> class AccelTable : public AccelTableBase { 203 public: 204 AccelTable() : AccelTableBase(DataT::hash) {} 205 206 template <typename... Types> 207 void addName(DwarfStringPoolEntryRef Name, Types &&... Args); 208 void clear() { Entries.clear(); } 209 void addEntries(AccelTable<DataT> &Table); 210 const StringEntries getEntries() const { return Entries; } 211 }; 212 213 template <typename AccelTableDataT> 214 template <typename... Types> 215 void AccelTable<AccelTableDataT>::addName(DwarfStringPoolEntryRef Name, 216 Types &&... Args) { 217 assert(Buckets.empty() && "Already finalized!"); 218 // If the string is in the list already then add this die to the list 219 // otherwise add a new one. 220 auto &It = Entries[Name.getString()]; 221 if (It.Values.empty()) { 222 It.Name = Name; 223 It.HashValue = Hash(Name.getString()); 224 } 225 It.Values.push_back(new (Allocator) 226 AccelTableDataT(std::forward<Types>(Args)...)); 227 } 228 229 /// A base class for different implementations of Data classes for Apple 230 /// Accelerator Tables. The columns in the table are defined by the static Atoms 231 /// variable defined on the subclasses. 232 class AppleAccelTableData : public AccelTableData { 233 public: 234 /// An Atom defines the form of the data in an Apple accelerator table. 235 /// Conceptually it is a column in the accelerator consisting of a type and a 236 /// specification of the form of its data. 237 struct Atom { 238 /// Atom Type. 239 const uint16_t Type; 240 /// DWARF Form. 241 const uint16_t Form; 242 243 constexpr Atom(uint16_t Type, uint16_t Form) : Type(Type), Form(Form) {} 244 245 #ifndef NDEBUG 246 void print(raw_ostream &OS) const; 247 void dump() const { print(dbgs()); } 248 #endif 249 }; 250 // Subclasses should define: 251 // static constexpr Atom Atoms[]; 252 253 virtual void emit(AsmPrinter *Asm) const = 0; 254 255 static uint32_t hash(StringRef Buffer) { return djbHash(Buffer); } 256 }; 257 258 /// Helper class to identify an entry in DWARF5AccelTable based on their DIE 259 /// offset and UnitID. 260 struct OffsetAndUnitID { 261 uint64_t Offset = 0; 262 uint32_t UnitID = 0; 263 bool IsTU = false; 264 OffsetAndUnitID() = delete; 265 OffsetAndUnitID(uint64_t Offset, uint32_t UnitID, bool IsTU) 266 : Offset(Offset), UnitID(UnitID), IsTU(IsTU) {} 267 uint64_t offset() const { return Offset; }; 268 uint32_t unitID() const { return UnitID; }; 269 bool isTU() const { return IsTU; } 270 }; 271 272 template <> struct DenseMapInfo<OffsetAndUnitID> { 273 static inline OffsetAndUnitID getEmptyKey() { 274 return OffsetAndUnitID(-1, -1, false); 275 } 276 static inline OffsetAndUnitID getTombstoneKey() { 277 return OffsetAndUnitID(-2, -2, false); 278 } 279 static unsigned getHashValue(const OffsetAndUnitID &Val) { 280 return (unsigned)llvm::hash_combine(Val.offset(), Val.unitID(), Val.IsTU); 281 } 282 static bool isEqual(const OffsetAndUnitID &LHS, const OffsetAndUnitID &RHS) { 283 return LHS.offset() == RHS.offset() && LHS.unitID() == RHS.unitID() && 284 LHS.IsTU == RHS.isTU(); 285 } 286 }; 287 288 /// The Data class implementation for DWARF v5 accelerator table. Unlike the 289 /// Apple Data classes, this class is just a DIE wrapper, and does not know to 290 /// serialize itself. The complete serialization logic is in the 291 /// emitDWARF5AccelTable function. 292 class DWARF5AccelTableData : public AccelTableData { 293 public: 294 static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Name); } 295 296 DWARF5AccelTableData(const DIE &Die, const uint32_t UnitID, const bool IsTU); 297 DWARF5AccelTableData(const uint64_t DieOffset, 298 const std::optional<uint64_t> DefiningParentOffset, 299 const unsigned DieTag, const unsigned UnitID, 300 const bool IsTU) 301 : OffsetVal(DieOffset), ParentOffset(DefiningParentOffset), 302 DieTag(DieTag), AbbrevNumber(0), IsTU(IsTU), UnitID(UnitID) {} 303 304 #ifndef NDEBUG 305 void print(raw_ostream &OS) const override; 306 #endif 307 308 uint64_t getDieOffset() const { 309 assert(isNormalized() && "Accessing DIE Offset before normalizing."); 310 return std::get<uint64_t>(OffsetVal); 311 } 312 313 OffsetAndUnitID getDieOffsetAndUnitID() const { 314 return {getDieOffset(), getUnitID(), isTU()}; 315 } 316 317 unsigned getDieTag() const { return DieTag; } 318 unsigned getUnitID() const { return UnitID; } 319 bool isTU() const { return IsTU; } 320 void normalizeDIEToOffset() { 321 assert(!isNormalized() && "Accessing offset after normalizing."); 322 const DIE *Entry = std::get<const DIE *>(OffsetVal); 323 ParentOffset = getDefiningParentDieOffset(*Entry); 324 OffsetVal = Entry->getOffset(); 325 } 326 bool isNormalized() const { 327 return std::holds_alternative<uint64_t>(OffsetVal); 328 } 329 330 std::optional<uint64_t> getParentDieOffset() const { 331 if (auto OffsetAndId = getParentDieOffsetAndUnitID()) 332 return OffsetAndId->offset(); 333 return {}; 334 } 335 336 std::optional<OffsetAndUnitID> getParentDieOffsetAndUnitID() const { 337 assert(isNormalized() && "Accessing DIE Offset before normalizing."); 338 if (!ParentOffset) 339 return std::nullopt; 340 return OffsetAndUnitID(*ParentOffset, getUnitID(), isTU()); 341 } 342 343 /// Sets AbbrevIndex for an Entry. 344 void setAbbrevNumber(uint16_t AbbrevNum) { AbbrevNumber = AbbrevNum; } 345 346 /// Returns AbbrevIndex for an Entry. 347 uint16_t getAbbrevNumber() const { return AbbrevNumber; } 348 349 /// If `Die` has a non-null parent and the parent is not a declaration, 350 /// return its offset. 351 static std::optional<uint64_t> getDefiningParentDieOffset(const DIE &Die); 352 353 protected: 354 std::variant<const DIE *, uint64_t> OffsetVal; 355 std::optional<uint64_t> ParentOffset; 356 uint32_t DieTag : 16; 357 uint32_t AbbrevNumber : 15; 358 uint32_t IsTU : 1; 359 uint32_t UnitID; 360 uint64_t order() const override { return getDieOffset(); } 361 }; 362 363 class DebugNamesAbbrev : public FoldingSetNode { 364 public: 365 uint32_t DieTag; 366 uint32_t Number; 367 struct AttributeEncoding { 368 dwarf::Index Index; 369 dwarf::Form Form; 370 }; 371 DebugNamesAbbrev(uint32_t DieTag) : DieTag(DieTag), Number(0) {} 372 /// Add attribute encoding to an abbreviation. 373 void addAttribute(const DebugNamesAbbrev::AttributeEncoding &Attr) { 374 AttrVect.push_back(Attr); 375 } 376 /// Set abbreviation tag index. 377 void setNumber(uint32_t AbbrevNumber) { Number = AbbrevNumber; } 378 /// Get abbreviation tag index. 379 uint32_t getNumber() const { return Number; } 380 /// Get DIE Tag. 381 uint32_t getDieTag() const { return DieTag; } 382 /// Used to gather unique data for the abbreviation folding set. 383 void Profile(FoldingSetNodeID &ID) const; 384 /// Returns attributes for an abbreviation. 385 const SmallVector<AttributeEncoding, 1> &getAttributes() const { 386 return AttrVect; 387 } 388 389 private: 390 SmallVector<AttributeEncoding, 1> AttrVect; 391 }; 392 393 struct TypeUnitMetaInfo { 394 // Symbol for start of the TU section or signature if this is SplitDwarf. 395 std::variant<MCSymbol *, uint64_t> LabelOrSignature; 396 // Unique ID of Type Unit. 397 unsigned UniqueID; 398 }; 399 using TUVectorTy = SmallVector<TypeUnitMetaInfo, 1>; 400 class DWARF5AccelTable : public AccelTable<DWARF5AccelTableData> { 401 // Symbols to start of all the TU sections that were generated. 402 TUVectorTy TUSymbolsOrHashes; 403 404 public: 405 struct UnitIndexAndEncoding { 406 unsigned Index; 407 DebugNamesAbbrev::AttributeEncoding Encoding; 408 }; 409 /// Returns type units that were constructed. 410 const TUVectorTy &getTypeUnitsSymbols() { return TUSymbolsOrHashes; } 411 /// Add a type unit start symbol. 412 void addTypeUnitSymbol(DwarfTypeUnit &U); 413 /// Add a type unit Signature. 414 void addTypeUnitSignature(DwarfTypeUnit &U); 415 /// Convert DIE entries to explicit offset. 416 /// Needs to be called after DIE offsets are computed. 417 void convertDieToOffset() { 418 for (auto &Entry : Entries) { 419 for (auto *Data : Entry.second.getValues<DWARF5AccelTableData *>()) { 420 // For TU we normalize as each Unit is emitted. 421 // So when this is invoked after CU construction we will be in mixed 422 // state. 423 if (!Data->isNormalized()) 424 Data->normalizeDIEToOffset(); 425 } 426 } 427 } 428 429 void addTypeEntries(DWARF5AccelTable &Table) { 430 for (auto &Entry : Table.getEntries()) { 431 for (auto *Data : Entry.second.getValues<DWARF5AccelTableData *>()) { 432 addName(Entry.second.Name, Data->getDieOffset(), 433 Data->getParentDieOffset(), Data->getDieTag(), 434 Data->getUnitID(), Data->isTU()); 435 } 436 } 437 } 438 }; 439 440 void emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents, 441 StringRef Prefix, const MCSymbol *SecBegin, 442 ArrayRef<AppleAccelTableData::Atom> Atoms); 443 444 /// Emit an Apple Accelerator Table consisting of entries in the specified 445 /// AccelTable. The DataT template parameter should be derived from 446 /// AppleAccelTableData. 447 template <typename DataT> 448 void emitAppleAccelTable(AsmPrinter *Asm, AccelTable<DataT> &Contents, 449 StringRef Prefix, const MCSymbol *SecBegin) { 450 static_assert(std::is_convertible<DataT *, AppleAccelTableData *>::value); 451 emitAppleAccelTableImpl(Asm, Contents, Prefix, SecBegin, DataT::Atoms); 452 } 453 454 void emitDWARF5AccelTable(AsmPrinter *Asm, DWARF5AccelTable &Contents, 455 const DwarfDebug &DD, 456 ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs); 457 458 /// Emit a DWARFv5 Accelerator Table consisting of entries in the specified 459 /// AccelTable. The \p CUs contains either symbols keeping offsets to the 460 /// start of compilation unit, either offsets to the start of compilation 461 /// unit themselves. 462 void emitDWARF5AccelTable( 463 AsmPrinter *Asm, DWARF5AccelTable &Contents, 464 ArrayRef<std::variant<MCSymbol *, uint64_t>> CUs, 465 llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>( 466 const DWARF5AccelTableData &)> 467 getIndexForEntry); 468 469 /// Accelerator table data implementation for simple Apple accelerator tables 470 /// with just a DIE reference. 471 class AppleAccelTableOffsetData : public AppleAccelTableData { 472 public: 473 AppleAccelTableOffsetData(const DIE &D) : Die(D) {} 474 475 void emit(AsmPrinter *Asm) const override; 476 477 static constexpr Atom Atoms[] = { 478 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)}; 479 480 #ifndef NDEBUG 481 void print(raw_ostream &OS) const override; 482 #endif 483 protected: 484 uint64_t order() const override { return Die.getOffset(); } 485 486 const DIE &Die; 487 }; 488 489 /// Accelerator table data implementation for Apple type accelerator tables. 490 class AppleAccelTableTypeData : public AppleAccelTableOffsetData { 491 public: 492 AppleAccelTableTypeData(const DIE &D) : AppleAccelTableOffsetData(D) {} 493 494 void emit(AsmPrinter *Asm) const override; 495 496 static constexpr Atom Atoms[] = { 497 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), 498 Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), 499 Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)}; 500 501 #ifndef NDEBUG 502 void print(raw_ostream &OS) const override; 503 #endif 504 }; 505 506 /// Accelerator table data implementation for simple Apple accelerator tables 507 /// with a DIE offset but no actual DIE pointer. 508 class AppleAccelTableStaticOffsetData : public AppleAccelTableData { 509 public: 510 AppleAccelTableStaticOffsetData(uint32_t Offset) : Offset(Offset) {} 511 512 void emit(AsmPrinter *Asm) const override; 513 514 static constexpr Atom Atoms[] = { 515 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)}; 516 517 #ifndef NDEBUG 518 void print(raw_ostream &OS) const override; 519 #endif 520 protected: 521 uint64_t order() const override { return Offset; } 522 523 uint32_t Offset; 524 }; 525 526 /// Accelerator table data implementation for type accelerator tables with 527 /// a DIE offset but no actual DIE pointer. 528 class AppleAccelTableStaticTypeData : public AppleAccelTableStaticOffsetData { 529 public: 530 AppleAccelTableStaticTypeData(uint32_t Offset, uint16_t Tag, 531 bool ObjCClassIsImplementation, 532 uint32_t QualifiedNameHash) 533 : AppleAccelTableStaticOffsetData(Offset), 534 QualifiedNameHash(QualifiedNameHash), Tag(Tag), 535 ObjCClassIsImplementation(ObjCClassIsImplementation) {} 536 537 void emit(AsmPrinter *Asm) const override; 538 539 static constexpr Atom Atoms[] = { 540 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), 541 Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), 542 Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)}; 543 544 #ifndef NDEBUG 545 void print(raw_ostream &OS) const override; 546 #endif 547 protected: 548 uint64_t order() const override { return Offset; } 549 550 uint32_t QualifiedNameHash; 551 uint16_t Tag; 552 bool ObjCClassIsImplementation; 553 }; 554 555 } // end namespace llvm 556 557 #endif // LLVM_CODEGEN_ACCELTABLE_H 558