1 //===- DebugTypes.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "DebugTypes.h" 10 #include "COFFLinkerContext.h" 11 #include "Chunks.h" 12 #include "Driver.h" 13 #include "InputFiles.h" 14 #include "PDB.h" 15 #include "TypeMerger.h" 16 #include "lld/Common/ErrorHandler.h" 17 #include "lld/Common/Memory.h" 18 #include "llvm/ADT/StringExtras.h" 19 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" 20 #include "llvm/DebugInfo/CodeView/TypeRecord.h" 21 #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" 22 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" 23 #include "llvm/DebugInfo/PDB/GenericError.h" 24 #include "llvm/DebugInfo/PDB/Native/InfoStream.h" 25 #include "llvm/DebugInfo/PDB/Native/NativeSession.h" 26 #include "llvm/DebugInfo/PDB/Native/PDBFile.h" 27 #include "llvm/DebugInfo/PDB/Native/TpiHashing.h" 28 #include "llvm/DebugInfo/PDB/Native/TpiStream.h" 29 #include "llvm/Support/FormatVariadic.h" 30 #include "llvm/Support/Parallel.h" 31 #include "llvm/Support/Path.h" 32 #include "llvm/Support/TimeProfiler.h" 33 34 using namespace llvm; 35 using namespace llvm::codeview; 36 using namespace lld; 37 using namespace lld::coff; 38 39 namespace { 40 class TypeServerIpiSource; 41 42 // The TypeServerSource class represents a PDB type server, a file referenced by 43 // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ 44 // files, therefore there must be only once instance per OBJ lot. The file path 45 // is discovered from the dependent OBJ's debug type stream. The 46 // TypeServerSource object is then queued and loaded by the COFF Driver. The 47 // debug type stream for such PDB files will be merged first in the final PDB, 48 // before any dependent OBJ. 49 class TypeServerSource : public TpiSource { 50 public: 51 explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f) 52 : TpiSource(ctx, PDB, nullptr), pdbInputFile(f) { 53 if (f->loadErrorStr) 54 return; 55 pdb::PDBFile &file = f->session->getPDBFile(); 56 auto expectedInfo = file.getPDBInfoStream(); 57 if (!expectedInfo) 58 return; 59 Guid = expectedInfo->getGuid(); 60 auto it = ctx.typeServerSourceMappings.emplace(Guid, this); 61 if (!it.second) { 62 // If we hit here we have collision on Guid's in two PDB files. 63 // This can happen if the PDB Guid is invalid or if we are really 64 // unlucky. This should fall back on stright file-system lookup. 65 it.first->second = nullptr; 66 } 67 } 68 69 Error mergeDebugT(TypeMerger *m) override; 70 71 void loadGHashes() override; 72 void remapTpiWithGHashes(GHashState *g) override; 73 74 bool isDependency() const override { return true; } 75 76 PDBInputFile *pdbInputFile = nullptr; 77 78 // TpiSource for IPI stream. 79 TypeServerIpiSource *ipiSrc = nullptr; 80 81 // The PDB signature GUID. 82 codeview::GUID Guid; 83 }; 84 85 // Companion to TypeServerSource. Stores the index map for the IPI stream in the 86 // PDB. Modeling PDBs with two sources for TPI and IPI helps establish the 87 // invariant of one type index space per source. 88 class TypeServerIpiSource : public TpiSource { 89 public: 90 explicit TypeServerIpiSource(COFFLinkerContext &ctx) 91 : TpiSource(ctx, PDBIpi, nullptr) {} 92 93 friend class TypeServerSource; 94 95 // All of the TpiSource methods are no-ops. The parent TypeServerSource 96 // handles both TPI and IPI. 97 Error mergeDebugT(TypeMerger *m) override { return Error::success(); } 98 void loadGHashes() override {} 99 void remapTpiWithGHashes(GHashState *g) override {} 100 bool isDependency() const override { return true; } 101 }; 102 103 // This class represents the debug type stream of an OBJ file that depends on a 104 // PDB type server (see TypeServerSource). 105 class UseTypeServerSource : public TpiSource { 106 Expected<TypeServerSource *> getTypeServerSource(); 107 108 public: 109 UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts) 110 : TpiSource(ctx, UsingPDB, f), typeServerDependency(ts) {} 111 112 Error mergeDebugT(TypeMerger *m) override; 113 114 // No need to load ghashes from /Zi objects. 115 void loadGHashes() override {} 116 void remapTpiWithGHashes(GHashState *g) override; 117 118 // Information about the PDB type server dependency, that needs to be loaded 119 // in before merging this OBJ. 120 TypeServer2Record typeServerDependency; 121 }; 122 123 // This class represents the debug type stream of a Microsoft precompiled 124 // headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output 125 // PDB, before any other OBJs that depend on this. Note that only MSVC generate 126 // such files, clang does not. 127 class PrecompSource : public TpiSource { 128 public: 129 PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource(ctx, PCH, f) { 130 // If the S_OBJNAME record contains the PCH signature, we'll register this 131 // source file right away. 132 registerMapping(); 133 } 134 135 Error mergeDebugT(TypeMerger *m) override; 136 137 void loadGHashes() override; 138 139 bool isDependency() const override { return true; } 140 141 private: 142 void registerMapping(); 143 144 // Whether this precomp OBJ was recorded in the precompSourceMappings map. 145 // Only happens if the file->pchSignature is valid. 146 bool registered = false; 147 }; 148 149 // This class represents the debug type stream of an OBJ file that depends on a 150 // Microsoft precompiled headers OBJ (see PrecompSource). 151 class UsePrecompSource : public TpiSource { 152 public: 153 UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp) 154 : TpiSource(ctx, UsingPCH, f), precompDependency(precomp) {} 155 156 Error mergeDebugT(TypeMerger *m) override; 157 158 void loadGHashes() override; 159 void remapTpiWithGHashes(GHashState *g) override; 160 161 private: 162 Error mergeInPrecompHeaderObj(); 163 164 PrecompSource *findObjByName(StringRef fileNameOnly); 165 PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr); 166 Expected<PrecompSource *> findPrecompMap(ObjFile *file, PrecompRecord &pr); 167 168 public: 169 // Information about the Precomp OBJ dependency, that needs to be loaded in 170 // before merging this OBJ. 171 PrecompRecord precompDependency; 172 }; 173 } // namespace 174 175 TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f) 176 : ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) { 177 ctx.addTpiSource(this); 178 } 179 180 // Vtable key method. 181 TpiSource::~TpiSource() { 182 // Silence any assertions about unchecked errors. 183 consumeError(std::move(typeMergingError)); 184 } 185 186 TpiSource *lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile *file) { 187 return make<TpiSource>(ctx, TpiSource::Regular, file); 188 } 189 190 TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx, 191 PDBInputFile *pdbInputFile) { 192 // Type server sources come in pairs: the TPI stream, and the IPI stream. 193 auto *tpiSource = make<TypeServerSource>(ctx, pdbInputFile); 194 if (pdbInputFile->session->getPDBFile().hasPDBIpiStream()) 195 tpiSource->ipiSrc = make<TypeServerIpiSource>(ctx); 196 return tpiSource; 197 } 198 199 TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx, 200 ObjFile *file, 201 TypeServer2Record ts) { 202 return make<UseTypeServerSource>(ctx, file, ts); 203 } 204 205 TpiSource *lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile *file) { 206 return make<PrecompSource>(ctx, file); 207 } 208 209 TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx, 210 ObjFile *file, 211 PrecompRecord precomp) { 212 return make<UsePrecompSource>(ctx, file, precomp); 213 } 214 215 bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const { 216 if (ti.isSimple()) 217 return true; 218 219 // This can be an item index or a type index. Choose the appropriate map. 220 ArrayRef<TypeIndex> tpiOrIpiMap = 221 (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap; 222 if (ti.toArrayIndex() >= tpiOrIpiMap.size()) 223 return false; 224 ti = tpiOrIpiMap[ti.toArrayIndex()]; 225 return true; 226 } 227 228 void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec, 229 ArrayRef<TiReference> typeRefs) { 230 MutableArrayRef<uint8_t> contents = rec.drop_front(sizeof(RecordPrefix)); 231 for (const TiReference &ref : typeRefs) { 232 unsigned byteSize = ref.Count * sizeof(TypeIndex); 233 if (contents.size() < ref.Offset + byteSize) 234 fatal("symbol record too short"); 235 236 MutableArrayRef<TypeIndex> indices( 237 reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count); 238 for (TypeIndex &ti : indices) { 239 if (!remapTypeIndex(ti, ref.Kind)) { 240 if (ctx.config.verbose) { 241 uint16_t kind = 242 reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind; 243 StringRef fname = file ? file->getName() : "<unknown PDB>"; 244 log("failed to remap type index in record of kind 0x" + 245 utohexstr(kind) + " in " + fname + " with bad " + 246 (ref.Kind == TiRefKind::IndexRef ? "item" : "type") + 247 " index 0x" + utohexstr(ti.getIndex())); 248 } 249 ti = TypeIndex(SimpleTypeKind::NotTranslated); 250 continue; 251 } 252 } 253 } 254 } 255 256 void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) { 257 // TODO: Handle errors similar to symbols. 258 SmallVector<TiReference, 32> typeRefs; 259 discoverTypeIndices(CVType(rec), typeRefs); 260 remapRecord(rec, typeRefs); 261 } 262 263 bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) { 264 // Discover type index references in the record. Skip it if we don't 265 // know where they are. 266 SmallVector<TiReference, 32> typeRefs; 267 if (!discoverTypeIndicesInSymbol(rec, typeRefs)) 268 return false; 269 remapRecord(rec, typeRefs); 270 return true; 271 } 272 273 // A COFF .debug$H section is currently a clang extension. This function checks 274 // if a .debug$H section is in a format that we expect / understand, so that we 275 // can ignore any sections which are coincidentally also named .debug$H but do 276 // not contain a format we recognize. 277 static bool canUseDebugH(ArrayRef<uint8_t> debugH) { 278 if (debugH.size() < sizeof(object::debug_h_header)) 279 return false; 280 auto *header = 281 reinterpret_cast<const object::debug_h_header *>(debugH.data()); 282 debugH = debugH.drop_front(sizeof(object::debug_h_header)); 283 return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC && 284 header->Version == 0 && 285 header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::BLAKE3) && 286 (debugH.size() % 8 == 0); 287 } 288 289 static std::optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) { 290 SectionChunk *sec = 291 SectionChunk::findByName(file->getDebugChunks(), ".debug$H"); 292 if (!sec) 293 return std::nullopt; 294 ArrayRef<uint8_t> contents = sec->getContents(); 295 if (!canUseDebugH(contents)) 296 return std::nullopt; 297 return contents; 298 } 299 300 static ArrayRef<GloballyHashedType> 301 getHashesFromDebugH(ArrayRef<uint8_t> debugH) { 302 assert(canUseDebugH(debugH)); 303 debugH = debugH.drop_front(sizeof(object::debug_h_header)); 304 uint32_t count = debugH.size() / sizeof(GloballyHashedType); 305 return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count}; 306 } 307 308 // Merge .debug$T for a generic object file. 309 Error TpiSource::mergeDebugT(TypeMerger *m) { 310 assert(!ctx.config.debugGHashes && 311 "use remapTpiWithGHashes when ghash is enabled"); 312 313 CVTypeArray types; 314 BinaryStreamReader reader(file->debugTypes, llvm::endianness::little); 315 cantFail(reader.readArray(types, reader.getLength())); 316 317 // When dealing with PCH.OBJ, some indices were already merged. 318 unsigned nbHeadIndices = indexMapStorage.size(); 319 320 std::optional<PCHMergerInfo> pchInfo; 321 if (auto err = mergeTypeAndIdRecords(m->idTable, m->typeTable, 322 indexMapStorage, types, pchInfo)) 323 fatal("codeview::mergeTypeAndIdRecords failed: " + 324 toString(std::move(err))); 325 if (pchInfo) { 326 file->pchSignature = pchInfo->PCHSignature; 327 endPrecompIdx = pchInfo->EndPrecompIndex; 328 } 329 330 // In an object, there is only one mapping for both types and items. 331 tpiMap = indexMapStorage; 332 ipiMap = indexMapStorage; 333 334 if (ctx.config.showSummary) { 335 nbTypeRecords = indexMapStorage.size() - nbHeadIndices; 336 nbTypeRecordsBytes = reader.getLength(); 337 // Count how many times we saw each type record in our input. This 338 // calculation requires a second pass over the type records to classify each 339 // record as a type or index. This is slow, but this code executes when 340 // collecting statistics. 341 m->tpiCounts.resize(m->getTypeTable().size()); 342 m->ipiCounts.resize(m->getIDTable().size()); 343 uint32_t srcIdx = nbHeadIndices; 344 for (const CVType &ty : types) { 345 TypeIndex dstIdx = tpiMap[srcIdx++]; 346 // Type merging may fail, so a complex source type may become the simple 347 // NotTranslated type, which cannot be used as an array index. 348 if (dstIdx.isSimple()) 349 continue; 350 SmallVectorImpl<uint32_t> &counts = 351 isIdRecord(ty.kind()) ? m->ipiCounts : m->tpiCounts; 352 ++counts[dstIdx.toArrayIndex()]; 353 } 354 } 355 356 return Error::success(); 357 } 358 359 // Merge types from a type server PDB. 360 Error TypeServerSource::mergeDebugT(TypeMerger *m) { 361 assert(!ctx.config.debugGHashes && 362 "use remapTpiWithGHashes when ghash is enabled"); 363 364 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); 365 Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream(); 366 if (auto e = expectedTpi.takeError()) 367 fatal("Type server does not have TPI stream: " + toString(std::move(e))); 368 pdb::TpiStream *maybeIpi = nullptr; 369 if (pdbFile.hasPDBIpiStream()) { 370 Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream(); 371 if (auto e = expectedIpi.takeError()) 372 fatal("Error getting type server IPI stream: " + toString(std::move(e))); 373 maybeIpi = &*expectedIpi; 374 } 375 376 // Merge TPI first, because the IPI stream will reference type indices. 377 if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage, 378 expectedTpi->typeArray())) 379 fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err))); 380 tpiMap = indexMapStorage; 381 382 // Merge IPI. 383 if (maybeIpi) { 384 if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage, 385 maybeIpi->typeArray())) 386 fatal("codeview::mergeIdRecords failed: " + toString(std::move(err))); 387 ipiMap = ipiSrc->indexMapStorage; 388 } 389 390 if (ctx.config.showSummary) { 391 nbTypeRecords = tpiMap.size() + ipiMap.size(); 392 nbTypeRecordsBytes = 393 expectedTpi->typeArray().getUnderlyingStream().getLength() + 394 (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength() 395 : 0); 396 397 // Count how many times we saw each type record in our input. If a 398 // destination type index is present in the source to destination type index 399 // map, that means we saw it once in the input. Add it to our histogram. 400 m->tpiCounts.resize(m->getTypeTable().size()); 401 m->ipiCounts.resize(m->getIDTable().size()); 402 for (TypeIndex ti : tpiMap) 403 if (!ti.isSimple()) 404 ++m->tpiCounts[ti.toArrayIndex()]; 405 for (TypeIndex ti : ipiMap) 406 if (!ti.isSimple()) 407 ++m->ipiCounts[ti.toArrayIndex()]; 408 } 409 410 return Error::success(); 411 } 412 413 Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() { 414 const codeview::GUID &tsId = typeServerDependency.getGuid(); 415 StringRef tsPath = typeServerDependency.getName(); 416 417 TypeServerSource *tsSrc = nullptr; 418 auto it = ctx.typeServerSourceMappings.find(tsId); 419 if (it != ctx.typeServerSourceMappings.end()) { 420 tsSrc = (TypeServerSource *)it->second; 421 } 422 if (tsSrc == nullptr) { 423 // The file failed to load, lookup by name 424 PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, tsPath, file); 425 if (!pdb) 426 return createFileError(tsPath, errorCodeToError(std::error_code( 427 ENOENT, std::generic_category()))); 428 // If an error occurred during loading, throw it now 429 if (pdb->loadErrorStr) 430 return createFileError( 431 tsPath, make_error<StringError>(*pdb->loadErrorStr, 432 llvm::inconvertibleErrorCode())); 433 434 tsSrc = (TypeServerSource *)pdb->debugTypesObj; 435 436 // Just because a file with a matching name was found and it was an actual 437 // PDB file doesn't mean it matches. For it to match the InfoStream's GUID 438 // must match the GUID specified in the TypeServer2 record. 439 if (tsSrc->Guid != tsId) { 440 return createFileError(tsPath, 441 make_error<pdb::PDBError>( 442 pdb::pdb_error_code::signature_out_of_date)); 443 } 444 } 445 return tsSrc; 446 } 447 448 Error UseTypeServerSource::mergeDebugT(TypeMerger *m) { 449 Expected<TypeServerSource *> tsSrc = getTypeServerSource(); 450 if (!tsSrc) 451 return tsSrc.takeError(); 452 453 pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile(); 454 auto expectedInfo = pdbSession.getPDBInfoStream(); 455 if (!expectedInfo) 456 return expectedInfo.takeError(); 457 458 // Reuse the type index map of the type server. 459 tpiMap = (*tsSrc)->tpiMap; 460 ipiMap = (*tsSrc)->ipiMap; 461 return Error::success(); 462 } 463 464 static bool equalsPath(StringRef path1, StringRef path2) { 465 #if defined(_WIN32) 466 return path1.equals_insensitive(path2); 467 #else 468 return path1 == path2; 469 #endif 470 } 471 472 // Find by name an OBJ provided on the command line 473 PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) { 474 SmallString<128> currentPath; 475 for (auto kv : ctx.precompSourceMappings) { 476 StringRef currentFileName = sys::path::filename(kv.second->file->getName(), 477 sys::path::Style::windows); 478 479 // Compare based solely on the file name (link.exe behavior) 480 if (equalsPath(currentFileName, fileNameOnly)) 481 return (PrecompSource *)kv.second; 482 } 483 return nullptr; 484 } 485 486 PrecompSource *UsePrecompSource::findPrecompSource(ObjFile *file, 487 PrecompRecord &pr) { 488 // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP 489 // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly, 490 // the paths embedded in the OBJs are in the Windows format. 491 SmallString<128> prFileName = 492 sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows); 493 494 auto it = ctx.precompSourceMappings.find(pr.getSignature()); 495 if (it != ctx.precompSourceMappings.end()) { 496 return (PrecompSource *)it->second; 497 } 498 // Lookup by name 499 return findObjByName(prFileName); 500 } 501 502 Expected<PrecompSource *> UsePrecompSource::findPrecompMap(ObjFile *file, 503 PrecompRecord &pr) { 504 PrecompSource *precomp = findPrecompSource(file, pr); 505 506 if (!precomp) 507 return createFileError( 508 pr.getPrecompFilePath(), 509 make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch)); 510 511 // Don't rely on the PCH signature to validate the concordance between the PCH 512 // and the OBJ that uses it. However we do validate here that the 513 // LF_ENDPRECOMP record index lines up with the number of type records 514 // LF_PRECOMP is expecting. 515 if (precomp->endPrecompIdx != pr.getTypesCount()) 516 return createFileError( 517 toString(file), 518 make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch)); 519 520 return precomp; 521 } 522 523 /// Merges a precompiled headers TPI map into the current TPI map. The 524 /// precompiled headers object will also be loaded and remapped in the 525 /// process. 526 Error UsePrecompSource::mergeInPrecompHeaderObj() { 527 auto e = findPrecompMap(file, precompDependency); 528 if (!e) 529 return e.takeError(); 530 531 PrecompSource *precompSrc = *e; 532 if (precompSrc->tpiMap.empty()) 533 return Error::success(); 534 535 assert(precompDependency.getStartTypeIndex() == 536 TypeIndex::FirstNonSimpleIndex); 537 assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size()); 538 // Use the previously remapped index map from the precompiled headers. 539 indexMapStorage.insert(indexMapStorage.begin(), precompSrc->tpiMap.begin(), 540 precompSrc->tpiMap.begin() + 541 precompDependency.getTypesCount()); 542 543 return Error::success(); 544 } 545 546 Error UsePrecompSource::mergeDebugT(TypeMerger *m) { 547 // This object was compiled with /Yu, so process the corresponding 548 // precompiled headers object (/Yc) first. Some type indices in the current 549 // object are referencing data in the precompiled headers object, so we need 550 // both to be loaded. 551 if (Error e = mergeInPrecompHeaderObj()) 552 return e; 553 554 return TpiSource::mergeDebugT(m); 555 } 556 557 Error PrecompSource::mergeDebugT(TypeMerger *m) { 558 // In some cases, the S_OBJNAME record doesn't contain the PCH signature. 559 // The signature comes later with the LF_ENDPRECOMP record, so we first need 560 // to merge in all the .PCH.OBJ file type records, before registering below. 561 if (Error e = TpiSource::mergeDebugT(m)) 562 return e; 563 564 registerMapping(); 565 566 return Error::success(); 567 } 568 569 void PrecompSource::registerMapping() { 570 if (registered) 571 return; 572 if (file->pchSignature && *file->pchSignature) { 573 auto it = ctx.precompSourceMappings.emplace(*file->pchSignature, this); 574 if (!it.second) 575 fatal("a PCH object with the same signature has already been provided (" + 576 toString(it.first->second->file) + " and " + toString(file) + ")"); 577 registered = true; 578 } 579 } 580 581 //===----------------------------------------------------------------------===// 582 // Parellel GHash type merging implementation. 583 //===----------------------------------------------------------------------===// 584 585 void TpiSource::loadGHashes() { 586 if (std::optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) { 587 ghashes = getHashesFromDebugH(*debugH); 588 ownedGHashes = false; 589 } else { 590 CVTypeArray types; 591 BinaryStreamReader reader(file->debugTypes, llvm::endianness::little); 592 cantFail(reader.readArray(types, reader.getLength())); 593 assignGHashesFromVector(GloballyHashedType::hashTypes(types)); 594 } 595 596 fillIsItemIndexFromDebugT(); 597 } 598 599 // Copies ghashes from a vector into an array. These are long lived, so it's 600 // worth the time to copy these into an appropriately sized vector to reduce 601 // memory usage. 602 void TpiSource::assignGHashesFromVector( 603 std::vector<GloballyHashedType> &&hashVec) { 604 if (hashVec.empty()) 605 return; 606 GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()]; 607 memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType)); 608 ghashes = ArrayRef(hashes, hashVec.size()); 609 ownedGHashes = true; 610 } 611 612 // Faster way to iterate type records. forEachTypeChecked is faster than 613 // iterating CVTypeArray. It avoids virtual readBytes calls in inner loops. 614 static void forEachTypeChecked(ArrayRef<uint8_t> types, 615 function_ref<void(const CVType &)> fn) { 616 checkError( 617 forEachCodeViewRecord<CVType>(types, [fn](const CVType &ty) -> Error { 618 fn(ty); 619 return Error::success(); 620 })); 621 } 622 623 // Walk over file->debugTypes and fill in the isItemIndex bit vector. 624 // TODO: Store this information in .debug$H so that we don't have to recompute 625 // it. This is the main bottleneck slowing down parallel ghashing with one 626 // thread over single-threaded ghashing. 627 void TpiSource::fillIsItemIndexFromDebugT() { 628 uint32_t index = 0; 629 isItemIndex.resize(ghashes.size()); 630 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { 631 if (isIdRecord(ty.kind())) 632 isItemIndex.set(index); 633 ++index; 634 }); 635 } 636 637 void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) { 638 // Decide if the merged type goes into TPI or IPI. 639 bool isItem = isIdRecord(ty.kind()); 640 MergedInfo &merged = isItem ? mergedIpi : mergedTpi; 641 642 // Copy the type into our mutable buffer. 643 assert(ty.length() <= codeview::MaxRecordLength); 644 size_t offset = merged.recs.size(); 645 size_t newSize = alignTo(ty.length(), 4); 646 merged.recs.resize(offset + newSize); 647 auto newRec = MutableArrayRef(&merged.recs[offset], newSize); 648 memcpy(newRec.data(), ty.data().data(), newSize); 649 650 // Fix up the record prefix and padding bytes if it required resizing. 651 if (newSize != ty.length()) { 652 reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2; 653 for (size_t i = ty.length(); i < newSize; ++i) 654 newRec[i] = LF_PAD0 + (newSize - i); 655 } 656 657 // Remap the type indices in the new record. 658 remapTypesInTypeRecord(newRec); 659 uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec))); 660 merged.recSizes.push_back(static_cast<uint16_t>(newSize)); 661 merged.recHashes.push_back(pdbHash); 662 663 // Retain a mapping from PDB function id to PDB function type. This mapping is 664 // used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32 665 // symbols. 666 if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) { 667 bool success = ty.length() >= 12; 668 TypeIndex funcId = curIndex; 669 if (success) 670 success &= remapTypeIndex(funcId, TiRefKind::IndexRef); 671 TypeIndex funcType = 672 *reinterpret_cast<const TypeIndex *>(&newRec.data()[8]); 673 if (success) { 674 funcIdToType.push_back({funcId, funcType}); 675 } else { 676 StringRef fname = file ? file->getName() : "<unknown PDB>"; 677 warn("corrupt LF_[M]FUNC_ID record 0x" + utohexstr(curIndex.getIndex()) + 678 " in " + fname); 679 } 680 } 681 } 682 683 void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords, 684 TypeIndex beginIndex) { 685 // Re-sort the list of unique types by index. 686 if (kind == PDB) 687 assert(llvm::is_sorted(uniqueTypes)); 688 else 689 llvm::sort(uniqueTypes); 690 691 // Accumulate all the unique types into one buffer in mergedTypes. 692 uint32_t ghashIndex = 0; 693 auto nextUniqueIndex = uniqueTypes.begin(); 694 assert(mergedTpi.recs.empty()); 695 assert(mergedIpi.recs.empty()); 696 697 // Pre-compute the number of elements in advance to avoid std::vector resizes. 698 unsigned nbTpiRecs = 0; 699 unsigned nbIpiRecs = 0; 700 forEachTypeChecked(typeRecords, [&](const CVType &ty) { 701 if (nextUniqueIndex != uniqueTypes.end() && 702 *nextUniqueIndex == ghashIndex) { 703 assert(ty.length() <= codeview::MaxRecordLength); 704 size_t newSize = alignTo(ty.length(), 4); 705 (isIdRecord(ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize; 706 ++nextUniqueIndex; 707 } 708 ++ghashIndex; 709 }); 710 mergedTpi.recs.reserve(nbTpiRecs); 711 mergedIpi.recs.reserve(nbIpiRecs); 712 713 // Do the actual type merge. 714 ghashIndex = 0; 715 nextUniqueIndex = uniqueTypes.begin(); 716 forEachTypeChecked(typeRecords, [&](const CVType &ty) { 717 if (nextUniqueIndex != uniqueTypes.end() && 718 *nextUniqueIndex == ghashIndex) { 719 mergeTypeRecord(beginIndex + ghashIndex, ty); 720 ++nextUniqueIndex; 721 } 722 ++ghashIndex; 723 }); 724 assert(nextUniqueIndex == uniqueTypes.end() && 725 "failed to merge all desired records"); 726 assert(uniqueTypes.size() == 727 mergedTpi.recSizes.size() + mergedIpi.recSizes.size() && 728 "missing desired record"); 729 } 730 731 void TpiSource::remapTpiWithGHashes(GHashState *g) { 732 assert(ctx.config.debugGHashes && "ghashes must be enabled"); 733 fillMapFromGHashes(g); 734 tpiMap = indexMapStorage; 735 ipiMap = indexMapStorage; 736 mergeUniqueTypeRecords(file->debugTypes); 737 // TODO: Free all unneeded ghash resources now that we have a full index map. 738 739 if (ctx.config.showSummary) { 740 nbTypeRecords = ghashes.size(); 741 nbTypeRecordsBytes = file->debugTypes.size(); 742 } 743 } 744 745 // PDBs do not actually store global hashes, so when merging a type server 746 // PDB we have to synthesize global hashes. To do this, we first synthesize 747 // global hashes for the TPI stream, since it is independent, then we 748 // synthesize hashes for the IPI stream, using the hashes for the TPI stream 749 // as inputs. 750 void TypeServerSource::loadGHashes() { 751 // Don't hash twice. 752 if (!ghashes.empty()) 753 return; 754 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); 755 756 // Hash TPI stream. 757 Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream(); 758 if (auto e = expectedTpi.takeError()) 759 fatal("Type server does not have TPI stream: " + toString(std::move(e))); 760 assignGHashesFromVector( 761 GloballyHashedType::hashTypes(expectedTpi->typeArray())); 762 isItemIndex.resize(ghashes.size()); 763 764 // Hash IPI stream, which depends on TPI ghashes. 765 if (!pdbFile.hasPDBIpiStream()) 766 return; 767 Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream(); 768 if (auto e = expectedIpi.takeError()) 769 fatal("error retrieving IPI stream: " + toString(std::move(e))); 770 ipiSrc->assignGHashesFromVector( 771 GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes)); 772 773 // The IPI stream isItemIndex bitvector should be all ones. 774 ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size()); 775 ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size()); 776 } 777 778 // Flatten discontiguous PDB type arrays to bytes so that we can use 779 // forEachTypeChecked instead of CVTypeArray iteration. Copying all types from 780 // type servers is faster than iterating all object files compiled with /Z7 with 781 // CVTypeArray, which has high overheads due to the virtual interface of 782 // BinaryStream::readBytes. 783 static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) { 784 BinaryStreamRef stream = types.getUnderlyingStream(); 785 ArrayRef<uint8_t> debugTypes; 786 checkError(stream.readBytes(0, stream.getLength(), debugTypes)); 787 return debugTypes; 788 } 789 790 // Merge types from a type server PDB. 791 void TypeServerSource::remapTpiWithGHashes(GHashState *g) { 792 assert(ctx.config.debugGHashes && "ghashes must be enabled"); 793 794 // IPI merging depends on TPI, so do TPI first, then do IPI. No need to 795 // propagate errors, those should've been handled during ghash loading. 796 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); 797 pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream()); 798 fillMapFromGHashes(g); 799 tpiMap = indexMapStorage; 800 mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray())); 801 if (pdbFile.hasPDBIpiStream()) { 802 pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream()); 803 ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size()); 804 ipiSrc->fillMapFromGHashes(g); 805 ipiMap = ipiSrc->indexMapStorage; 806 ipiSrc->tpiMap = tpiMap; 807 ipiSrc->ipiMap = ipiMap; 808 ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray())); 809 810 if (ctx.config.showSummary) { 811 nbTypeRecords = ipiSrc->ghashes.size(); 812 nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength(); 813 } 814 } 815 816 if (ctx.config.showSummary) { 817 nbTypeRecords += ghashes.size(); 818 nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength(); 819 } 820 } 821 822 void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) { 823 // No remapping to do with /Zi objects. Simply use the index map from the type 824 // server. Errors should have been reported earlier. Symbols from this object 825 // will be ignored. 826 Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource(); 827 if (!maybeTsSrc) { 828 typeMergingError = 829 joinErrors(std::move(typeMergingError), maybeTsSrc.takeError()); 830 return; 831 } 832 TypeServerSource *tsSrc = *maybeTsSrc; 833 tpiMap = tsSrc->tpiMap; 834 ipiMap = tsSrc->ipiMap; 835 } 836 837 void PrecompSource::loadGHashes() { 838 if (getDebugH(file)) { 839 warn("ignoring .debug$H section; pch with ghash is not implemented"); 840 } 841 842 uint32_t ghashIdx = 0; 843 std::vector<GloballyHashedType> hashVec; 844 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { 845 // Remember the index of the LF_ENDPRECOMP record so it can be excluded from 846 // the PDB. There must be an entry in the list of ghashes so that the type 847 // indexes of the following records in the /Yc PCH object line up. 848 if (ty.kind() == LF_ENDPRECOMP) { 849 EndPrecompRecord endPrecomp; 850 cantFail(TypeDeserializer::deserializeAs<EndPrecompRecord>( 851 const_cast<CVType &>(ty), endPrecomp)); 852 file->pchSignature = endPrecomp.getSignature(); 853 registerMapping(); 854 endPrecompIdx = ghashIdx; 855 } 856 857 hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec)); 858 isItemIndex.push_back(isIdRecord(ty.kind())); 859 ++ghashIdx; 860 }); 861 assignGHashesFromVector(std::move(hashVec)); 862 } 863 864 void UsePrecompSource::loadGHashes() { 865 auto e = findPrecompMap(file, precompDependency); 866 if (!e) { 867 warn(toString(e.takeError())); 868 return; 869 } 870 871 PrecompSource *pchSrc = *e; 872 873 // To compute ghashes of a /Yu object file, we need to build on the ghashes of 874 // the /Yc PCH object. After we are done hashing, discard the ghashes from the 875 // PCH source so we don't unnecessarily try to deduplicate them. 876 std::vector<GloballyHashedType> hashVec = 877 pchSrc->ghashes.take_front(precompDependency.getTypesCount()); 878 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { 879 hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec)); 880 isItemIndex.push_back(isIdRecord(ty.kind())); 881 }); 882 hashVec.erase(hashVec.begin(), 883 hashVec.begin() + precompDependency.getTypesCount()); 884 assignGHashesFromVector(std::move(hashVec)); 885 } 886 887 void UsePrecompSource::remapTpiWithGHashes(GHashState *g) { 888 fillMapFromGHashes(g); 889 // This object was compiled with /Yu, so process the corresponding 890 // precompiled headers object (/Yc) first. Some type indices in the current 891 // object are referencing data in the precompiled headers object, so we need 892 // both to be loaded. 893 if (Error e = mergeInPrecompHeaderObj()) { 894 typeMergingError = joinErrors(std::move(typeMergingError), std::move(e)); 895 return; 896 } 897 898 tpiMap = indexMapStorage; 899 ipiMap = indexMapStorage; 900 mergeUniqueTypeRecords(file->debugTypes, 901 TypeIndex(precompDependency.getStartTypeIndex() + 902 precompDependency.getTypesCount())); 903 if (ctx.config.showSummary) { 904 nbTypeRecords = ghashes.size(); 905 nbTypeRecordsBytes = file->debugTypes.size(); 906 } 907 } 908 909 namespace { 910 /// A concurrent hash table for global type hashing. It is based on this paper: 911 /// Concurrent Hash Tables: Fast and General(?)! 912 /// https://dl.acm.org/doi/10.1145/3309206 913 /// 914 /// This hash table is meant to be used in two phases: 915 /// 1. concurrent insertions 916 /// 2. concurrent reads 917 /// It does not support lookup, deletion, or rehashing. It uses linear probing. 918 /// 919 /// The paper describes storing a key-value pair in two machine words. 920 /// Generally, the values stored in this map are type indices, and we can use 921 /// those values to recover the ghash key from a side table. This allows us to 922 /// shrink the table entries further at the cost of some loads, and sidesteps 923 /// the need for a 128 bit atomic compare-and-swap operation. 924 /// 925 /// During insertion, a priority function is used to decide which insertion 926 /// should be preferred. This ensures that the output is deterministic. For 927 /// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred. 928 /// 929 class GHashCell; 930 struct GHashTable { 931 GHashCell *table = nullptr; 932 uint32_t tableSize = 0; 933 934 GHashTable() = default; 935 ~GHashTable(); 936 937 /// Initialize the table with the given size. Because the table cannot be 938 /// resized, the initial size of the table must be large enough to contain all 939 /// inputs, or insertion may not be able to find an empty cell. 940 void init(uint32_t newTableSize); 941 942 /// Insert the cell with the given ghash into the table. Return the insertion 943 /// position in the table. It is safe for the caller to store the insertion 944 /// position because the table cannot be resized. 945 uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash, 946 GHashCell newCell); 947 }; 948 949 /// A ghash table cell for deduplicating types from TpiSources. 950 class GHashCell { 951 // Force "data" to be 64-bit aligned; otherwise, some versions of clang 952 // will generate calls to libatomic when using some versions of libstdc++ 953 // on 32-bit targets. (Also, in theory, there could be a target where 954 // new[] doesn't always return an 8-byte-aligned allocation.) 955 alignas(sizeof(uint64_t)) uint64_t data = 0; 956 957 public: 958 GHashCell() = default; 959 960 // Construct data most to least significant so that sorting works well: 961 // - isItem 962 // - tpiSrcIdx 963 // - ghashIdx 964 // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a 965 // non-zero representation. 966 GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx) 967 : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) | 968 ghashIdx) { 969 assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure"); 970 assert(ghashIdx == getGHashIdx() && "round trip failure"); 971 } 972 973 explicit GHashCell(uint64_t data) : data(data) {} 974 975 // The empty cell is all zeros. 976 bool isEmpty() const { return data == 0ULL; } 977 978 /// Extract the tpiSrcIdx. 979 uint32_t getTpiSrcIdx() const { 980 return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1; 981 } 982 983 /// Extract the index into the ghash array of the TpiSource. 984 uint32_t getGHashIdx() const { return (uint32_t)data; } 985 986 bool isItem() const { return data & (1ULL << 63U); } 987 988 /// Get the ghash key for this cell. 989 GloballyHashedType getGHash(const COFFLinkerContext &ctx) const { 990 return ctx.tpiSourceList[getTpiSrcIdx()]->ghashes[getGHashIdx()]; 991 } 992 993 /// The priority function for the cell. The data is stored such that lower 994 /// tpiSrcIdx and ghashIdx values are preferred, which means that type record 995 /// from earlier sources are more likely to prevail. 996 friend inline bool operator<(const GHashCell &l, const GHashCell &r) { 997 return l.data < r.data; 998 } 999 }; 1000 } // namespace 1001 1002 namespace lld::coff { 1003 /// This type is just a wrapper around GHashTable with external linkage so it 1004 /// can be used from a header. 1005 struct GHashState { 1006 GHashTable table; 1007 }; 1008 } // namespace lld::coff 1009 1010 GHashTable::~GHashTable() { delete[] table; } 1011 1012 void GHashTable::init(uint32_t newTableSize) { 1013 table = new GHashCell[newTableSize]; 1014 memset(table, 0, newTableSize * sizeof(GHashCell)); 1015 tableSize = newTableSize; 1016 } 1017 1018 uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash, 1019 GHashCell newCell) { 1020 assert(!newCell.isEmpty() && "cannot insert empty cell value"); 1021 1022 // FIXME: The low bytes of SHA1 have low entropy for short records, which 1023 // type records are. Swap the byte order for better entropy. A better ghash 1024 // won't need this. 1025 uint32_t startIdx = 1026 llvm::byteswap<uint64_t>(*reinterpret_cast<uint64_t *>(&ghash)) % 1027 tableSize; 1028 1029 // Do a linear probe starting at startIdx. 1030 uint32_t idx = startIdx; 1031 while (true) { 1032 // Run a compare and swap loop. There are four cases: 1033 // - cell is empty: CAS into place and return 1034 // - cell has matching key, earlier priority: do nothing, return 1035 // - cell has matching key, later priority: CAS into place and return 1036 // - cell has non-matching key: hash collision, probe next cell 1037 auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]); 1038 GHashCell oldCell(cellPtr->load()); 1039 while (oldCell.isEmpty() || oldCell.getGHash(ctx) == ghash) { 1040 // Check if there is an existing ghash entry with a higher priority 1041 // (earlier ordering). If so, this is a duplicate, we are done. 1042 if (!oldCell.isEmpty() && oldCell < newCell) 1043 return idx; 1044 // Either the cell is empty, or our value is higher priority. Try to 1045 // compare and swap. If it succeeds, we are done. 1046 if (cellPtr->compare_exchange_weak(oldCell, newCell)) 1047 return idx; 1048 // If the CAS failed, check this cell again. 1049 } 1050 1051 // Advance the probe. Wrap around to the beginning if we run off the end. 1052 ++idx; 1053 idx = idx == tableSize ? 0 : idx; 1054 if (idx == startIdx) { 1055 // If this becomes an issue, we could mark failure and rehash from the 1056 // beginning with a bigger table. There is no difference between rehashing 1057 // internally and starting over. 1058 report_fatal_error("ghash table is full"); 1059 } 1060 } 1061 llvm_unreachable("left infloop"); 1062 } 1063 1064 TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc) 1065 : typeTable(alloc), idTable(alloc), ctx(c) {} 1066 1067 TypeMerger::~TypeMerger() = default; 1068 1069 void TypeMerger::mergeTypesWithGHash() { 1070 // Load ghashes. Do type servers and PCH objects first. 1071 { 1072 llvm::TimeTraceScope timeScope("Load GHASHes"); 1073 ScopedTimer t1(ctx.loadGHashTimer); 1074 parallelForEach(dependencySources, 1075 [&](TpiSource *source) { source->loadGHashes(); }); 1076 parallelForEach(objectSources, 1077 [&](TpiSource *source) { source->loadGHashes(); }); 1078 } 1079 1080 llvm::TimeTraceScope timeScope("Merge types (GHASH)"); 1081 ScopedTimer t2(ctx.mergeGHashTimer); 1082 GHashState ghashState; 1083 1084 // Estimate the size of hash table needed to deduplicate ghashes. This *must* 1085 // be larger than the number of unique types, or hash table insertion may not 1086 // be able to find a vacant slot. Summing the input types guarantees this, but 1087 // it is a gross overestimate. The table size could be reduced to save memory, 1088 // but it would require implementing rehashing, and this table is generally 1089 // small compared to total memory usage, at eight bytes per input type record, 1090 // and most input type records are larger than eight bytes. 1091 size_t tableSize = 0; 1092 for (TpiSource *source : ctx.tpiSourceList) 1093 tableSize += source->ghashes.size(); 1094 1095 // Cap the table size so that we can use 32-bit cell indices. Type indices are 1096 // also 32-bit, so this is an inherent PDB file format limit anyway. 1097 tableSize = 1098 std::min(size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, tableSize); 1099 ghashState.table.init(static_cast<uint32_t>(tableSize)); 1100 1101 // Insert ghashes in parallel. During concurrent insertion, we cannot observe 1102 // the contents of the hash table cell, but we can remember the insertion 1103 // position. Because the table does not rehash, the position will not change 1104 // under insertion. After insertion is done, the value of the cell can be read 1105 // to retrieve the final PDB type index. 1106 parallelFor(0, ctx.tpiSourceList.size(), [&](size_t tpiSrcIdx) { 1107 TpiSource *source = ctx.tpiSourceList[tpiSrcIdx]; 1108 source->indexMapStorage.resize(source->ghashes.size()); 1109 for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) { 1110 if (source->shouldOmitFromPdb(i)) { 1111 source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated); 1112 continue; 1113 } 1114 GloballyHashedType ghash = source->ghashes[i]; 1115 bool isItem = source->isItemIndex.test(i); 1116 uint32_t cellIdx = 1117 ghashState.table.insert(ctx, ghash, GHashCell(isItem, tpiSrcIdx, i)); 1118 1119 // Store the ghash cell index as a type index in indexMapStorage. Later 1120 // we will replace it with the PDB type index. 1121 source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx); 1122 } 1123 }); 1124 1125 // Collect all non-empty cells and sort them. This will implicitly assign 1126 // destination type indices, and partition the entries into type records and 1127 // item records. It arranges types in this order: 1128 // - type records 1129 // - source 0, type 0... 1130 // - source 1, type 1... 1131 // - item records 1132 // - source 0, type 1... 1133 // - source 1, type 0... 1134 std::vector<GHashCell> entries; 1135 for (const GHashCell &cell : ArrayRef(ghashState.table.table, tableSize)) { 1136 if (!cell.isEmpty()) 1137 entries.push_back(cell); 1138 } 1139 parallelSort(entries, std::less<GHashCell>()); 1140 log(formatv("ghash table load factor: {0:p} (size {1} / capacity {2})\n", 1141 tableSize ? double(entries.size()) / tableSize : 0, 1142 entries.size(), tableSize)); 1143 1144 // Find out how many type and item indices there are. 1145 auto mid = llvm::lower_bound(entries, GHashCell(true, 0, 0)); 1146 assert((mid == entries.end() || mid->isItem()) && 1147 (mid == entries.begin() || !std::prev(mid)->isItem()) && 1148 "midpoint is not midpoint"); 1149 uint32_t numTypes = std::distance(entries.begin(), mid); 1150 uint32_t numItems = std::distance(mid, entries.end()); 1151 log("Tpi record count: " + Twine(numTypes)); 1152 log("Ipi record count: " + Twine(numItems)); 1153 1154 // Make a list of the "unique" type records to merge for each tpi source. Type 1155 // merging will skip indices not on this list. Store the destination PDB type 1156 // index for these unique types in the tpiMap for each source. The entries for 1157 // non-unique types will be filled in prior to type merging. 1158 for (uint32_t i = 0, e = entries.size(); i < e; ++i) { 1159 auto &cell = entries[i]; 1160 uint32_t tpiSrcIdx = cell.getTpiSrcIdx(); 1161 TpiSource *source = ctx.tpiSourceList[tpiSrcIdx]; 1162 source->uniqueTypes.push_back(cell.getGHashIdx()); 1163 1164 // Update the ghash table to store the destination PDB type index in the 1165 // table. 1166 uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes; 1167 uint32_t ghashCellIndex = 1168 source->indexMapStorage[cell.getGHashIdx()].toArrayIndex(); 1169 ghashState.table.table[ghashCellIndex] = 1170 GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex); 1171 } 1172 1173 // In parallel, remap all types. 1174 for (TpiSource *source : dependencySources) 1175 source->remapTpiWithGHashes(&ghashState); 1176 parallelForEach(objectSources, [&](TpiSource *source) { 1177 source->remapTpiWithGHashes(&ghashState); 1178 }); 1179 1180 // Build a global map of from function ID to function type. 1181 for (TpiSource *source : ctx.tpiSourceList) { 1182 for (auto idToType : source->funcIdToType) 1183 funcIdToType.insert(idToType); 1184 source->funcIdToType.clear(); 1185 } 1186 1187 clearGHashes(); 1188 } 1189 1190 void TypeMerger::sortDependencies() { 1191 // Order dependencies first, but preserve the existing order. 1192 std::vector<TpiSource *> deps; 1193 std::vector<TpiSource *> objs; 1194 for (TpiSource *s : ctx.tpiSourceList) 1195 (s->isDependency() ? deps : objs).push_back(s); 1196 uint32_t numDeps = deps.size(); 1197 uint32_t numObjs = objs.size(); 1198 ctx.tpiSourceList = std::move(deps); 1199 ctx.tpiSourceList.insert(ctx.tpiSourceList.end(), objs.begin(), objs.end()); 1200 for (uint32_t i = 0, e = ctx.tpiSourceList.size(); i < e; ++i) 1201 ctx.tpiSourceList[i]->tpiSrcIdx = i; 1202 dependencySources = ArrayRef(ctx.tpiSourceList.data(), numDeps); 1203 objectSources = ArrayRef(ctx.tpiSourceList.data() + numDeps, numObjs); 1204 } 1205 1206 /// Given the index into the ghash table for a particular type, return the type 1207 /// index for that type in the output PDB. 1208 static TypeIndex loadPdbTypeIndexFromCell(GHashState *g, 1209 uint32_t ghashCellIdx) { 1210 GHashCell cell = g->table.table[ghashCellIdx]; 1211 return TypeIndex::fromArrayIndex(cell.getGHashIdx()); 1212 } 1213 1214 /// Free heap allocated ghashes. 1215 void TypeMerger::clearGHashes() { 1216 for (TpiSource *src : ctx.tpiSourceList) { 1217 if (src->ownedGHashes) 1218 delete[] src->ghashes.data(); 1219 src->ghashes = {}; 1220 src->isItemIndex.clear(); 1221 src->uniqueTypes.clear(); 1222 } 1223 } 1224 1225 // Fill in a TPI or IPI index map using ghashes. For each source type, use its 1226 // ghash to lookup its final type index in the PDB, and store that in the map. 1227 void TpiSource::fillMapFromGHashes(GHashState *g) { 1228 for (size_t i = 0, e = ghashes.size(); i < e; ++i) { 1229 TypeIndex fakeCellIndex = indexMapStorage[i]; 1230 if (fakeCellIndex.isSimple()) 1231 indexMapStorage[i] = fakeCellIndex; 1232 else 1233 indexMapStorage[i] = 1234 loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex()); 1235 } 1236 } 1237