1 //===- DebugTypes.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "DebugTypes.h" 10 #include "COFFLinkerContext.h" 11 #include "Chunks.h" 12 #include "Driver.h" 13 #include "InputFiles.h" 14 #include "PDB.h" 15 #include "TypeMerger.h" 16 #include "lld/Common/ErrorHandler.h" 17 #include "lld/Common/Memory.h" 18 #include "llvm/ADT/StringExtras.h" 19 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" 20 #include "llvm/DebugInfo/CodeView/TypeRecord.h" 21 #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" 22 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" 23 #include "llvm/DebugInfo/PDB/GenericError.h" 24 #include "llvm/DebugInfo/PDB/Native/InfoStream.h" 25 #include "llvm/DebugInfo/PDB/Native/NativeSession.h" 26 #include "llvm/DebugInfo/PDB/Native/PDBFile.h" 27 #include "llvm/DebugInfo/PDB/Native/TpiHashing.h" 28 #include "llvm/DebugInfo/PDB/Native/TpiStream.h" 29 #include "llvm/Support/FormatVariadic.h" 30 #include "llvm/Support/Parallel.h" 31 #include "llvm/Support/Path.h" 32 #include "llvm/Support/TimeProfiler.h" 33 34 using namespace llvm; 35 using namespace llvm::codeview; 36 using namespace lld; 37 using namespace lld::coff; 38 39 namespace { 40 class TypeServerIpiSource; 41 42 // The TypeServerSource class represents a PDB type server, a file referenced by 43 // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ 44 // files, therefore there must be only once instance per OBJ lot. The file path 45 // is discovered from the dependent OBJ's debug type stream. The 46 // TypeServerSource object is then queued and loaded by the COFF Driver. The 47 // debug type stream for such PDB files will be merged first in the final PDB, 48 // before any dependent OBJ. 49 class TypeServerSource : public TpiSource { 50 public: 51 explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f) 52 : TpiSource(ctx, PDB, nullptr), pdbInputFile(f) { 53 if (f->loadErrorStr) 54 return; 55 pdb::PDBFile &file = f->session->getPDBFile(); 56 auto expectedInfo = file.getPDBInfoStream(); 57 if (!expectedInfo) 58 return; 59 Guid = expectedInfo->getGuid(); 60 auto it = ctx.typeServerSourceMappings.emplace(Guid, this); 61 if (!it.second) { 62 // If we hit here we have collision on Guid's in two PDB files. 63 // This can happen if the PDB Guid is invalid or if we are really 64 // unlucky. This should fall back on stright file-system lookup. 65 it.first->second = nullptr; 66 } 67 } 68 69 Error mergeDebugT(TypeMerger *m) override; 70 71 void loadGHashes() override; 72 void remapTpiWithGHashes(GHashState *g) override; 73 74 bool isDependency() const override { return true; } 75 76 PDBInputFile *pdbInputFile = nullptr; 77 78 // TpiSource for IPI stream. 79 TypeServerIpiSource *ipiSrc = nullptr; 80 81 // The PDB signature GUID. 82 codeview::GUID Guid; 83 }; 84 85 // Companion to TypeServerSource. Stores the index map for the IPI stream in the 86 // PDB. Modeling PDBs with two sources for TPI and IPI helps establish the 87 // invariant of one type index space per source. 88 class TypeServerIpiSource : public TpiSource { 89 public: 90 explicit TypeServerIpiSource(COFFLinkerContext &ctx) 91 : TpiSource(ctx, PDBIpi, nullptr) {} 92 93 friend class TypeServerSource; 94 95 // All of the TpiSource methods are no-ops. The parent TypeServerSource 96 // handles both TPI and IPI. 97 Error mergeDebugT(TypeMerger *m) override { return Error::success(); } 98 void loadGHashes() override {} 99 void remapTpiWithGHashes(GHashState *g) override {} 100 bool isDependency() const override { return true; } 101 }; 102 103 // This class represents the debug type stream of an OBJ file that depends on a 104 // PDB type server (see TypeServerSource). 105 class UseTypeServerSource : public TpiSource { 106 Expected<TypeServerSource *> getTypeServerSource(); 107 108 public: 109 UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts) 110 : TpiSource(ctx, UsingPDB, f), typeServerDependency(ts) {} 111 112 Error mergeDebugT(TypeMerger *m) override; 113 114 // No need to load ghashes from /Zi objects. 115 void loadGHashes() override {} 116 void remapTpiWithGHashes(GHashState *g) override; 117 118 // Information about the PDB type server dependency, that needs to be loaded 119 // in before merging this OBJ. 120 TypeServer2Record typeServerDependency; 121 }; 122 123 // This class represents the debug type stream of a Microsoft precompiled 124 // headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output 125 // PDB, before any other OBJs that depend on this. Note that only MSVC generate 126 // such files, clang does not. 127 class PrecompSource : public TpiSource { 128 public: 129 PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource(ctx, PCH, f) { 130 // If the S_OBJNAME record contains the PCH signature, we'll register this 131 // source file right away. 132 registerMapping(); 133 } 134 135 Error mergeDebugT(TypeMerger *m) override; 136 137 void loadGHashes() override; 138 139 bool isDependency() const override { return true; } 140 141 private: 142 void registerMapping(); 143 144 // Whether this precomp OBJ was recorded in the precompSourceMappings map. 145 // Only happens if the file->pchSignature is valid. 146 bool registered = false; 147 }; 148 149 // This class represents the debug type stream of an OBJ file that depends on a 150 // Microsoft precompiled headers OBJ (see PrecompSource). 151 class UsePrecompSource : public TpiSource { 152 public: 153 UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp) 154 : TpiSource(ctx, UsingPCH, f), precompDependency(precomp) {} 155 156 Error mergeDebugT(TypeMerger *m) override; 157 158 void loadGHashes() override; 159 void remapTpiWithGHashes(GHashState *g) override; 160 161 private: 162 Error mergeInPrecompHeaderObj(); 163 164 PrecompSource *findObjByName(StringRef fileNameOnly); 165 PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr); 166 Expected<PrecompSource *> findPrecompMap(ObjFile *file, PrecompRecord &pr); 167 168 public: 169 // Information about the Precomp OBJ dependency, that needs to be loaded in 170 // before merging this OBJ. 171 PrecompRecord precompDependency; 172 }; 173 } // namespace 174 175 TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f) 176 : ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) { 177 ctx.addTpiSource(this); 178 } 179 180 // Vtable key method. 181 TpiSource::~TpiSource() { 182 // Silence any assertions about unchecked errors. 183 consumeError(std::move(typeMergingError)); 184 } 185 186 TpiSource *lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile *file) { 187 return make<TpiSource>(ctx, TpiSource::Regular, file); 188 } 189 190 TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx, 191 PDBInputFile *pdbInputFile) { 192 // Type server sources come in pairs: the TPI stream, and the IPI stream. 193 auto *tpiSource = make<TypeServerSource>(ctx, pdbInputFile); 194 if (pdbInputFile->session->getPDBFile().hasPDBIpiStream()) 195 tpiSource->ipiSrc = make<TypeServerIpiSource>(ctx); 196 return tpiSource; 197 } 198 199 TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx, 200 ObjFile *file, 201 TypeServer2Record ts) { 202 return make<UseTypeServerSource>(ctx, file, ts); 203 } 204 205 TpiSource *lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile *file) { 206 return make<PrecompSource>(ctx, file); 207 } 208 209 TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx, 210 ObjFile *file, 211 PrecompRecord precomp) { 212 return make<UsePrecompSource>(ctx, file, precomp); 213 } 214 215 bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const { 216 if (ti.isSimple()) 217 return true; 218 219 // This can be an item index or a type index. Choose the appropriate map. 220 ArrayRef<TypeIndex> tpiOrIpiMap = 221 (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap; 222 if (ti.toArrayIndex() >= tpiOrIpiMap.size()) 223 return false; 224 ti = tpiOrIpiMap[ti.toArrayIndex()]; 225 return true; 226 } 227 228 void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec, 229 ArrayRef<TiReference> typeRefs) { 230 MutableArrayRef<uint8_t> contents = rec.drop_front(sizeof(RecordPrefix)); 231 for (const TiReference &ref : typeRefs) { 232 unsigned byteSize = ref.Count * sizeof(TypeIndex); 233 if (contents.size() < ref.Offset + byteSize) 234 Fatal(ctx) << "symbol record too short"; 235 236 MutableArrayRef<TypeIndex> indices( 237 reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count); 238 for (TypeIndex &ti : indices) { 239 if (!remapTypeIndex(ti, ref.Kind)) { 240 if (ctx.config.verbose) { 241 uint16_t kind = 242 reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind; 243 StringRef fname = file ? file->getName() : "<unknown PDB>"; 244 Log(ctx) << "failed to remap type index in record of kind 0x" 245 << utohexstr(kind) << " in " << fname << " with bad " 246 << (ref.Kind == TiRefKind::IndexRef ? "item" : "type") 247 << " index 0x" << utohexstr(ti.getIndex()); 248 } 249 ti = TypeIndex(SimpleTypeKind::NotTranslated); 250 continue; 251 } 252 } 253 } 254 } 255 256 void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) { 257 // TODO: Handle errors similar to symbols. 258 SmallVector<TiReference, 32> typeRefs; 259 discoverTypeIndices(CVType(rec), typeRefs); 260 remapRecord(rec, typeRefs); 261 } 262 263 bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) { 264 // Discover type index references in the record. Skip it if we don't 265 // know where they are. 266 SmallVector<TiReference, 32> typeRefs; 267 if (!discoverTypeIndicesInSymbol(rec, typeRefs)) 268 return false; 269 remapRecord(rec, typeRefs); 270 return true; 271 } 272 273 // A COFF .debug$H section is currently a clang extension. This function checks 274 // if a .debug$H section is in a format that we expect / understand, so that we 275 // can ignore any sections which are coincidentally also named .debug$H but do 276 // not contain a format we recognize. 277 static bool canUseDebugH(ArrayRef<uint8_t> debugH) { 278 if (debugH.size() < sizeof(object::debug_h_header)) 279 return false; 280 auto *header = 281 reinterpret_cast<const object::debug_h_header *>(debugH.data()); 282 debugH = debugH.drop_front(sizeof(object::debug_h_header)); 283 return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC && 284 header->Version == 0 && 285 header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::BLAKE3) && 286 (debugH.size() % 8 == 0); 287 } 288 289 static std::optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) { 290 SectionChunk *sec = 291 SectionChunk::findByName(file->getDebugChunks(), ".debug$H"); 292 if (!sec) 293 return std::nullopt; 294 ArrayRef<uint8_t> contents = sec->getContents(); 295 if (!canUseDebugH(contents)) 296 return std::nullopt; 297 return contents; 298 } 299 300 static ArrayRef<GloballyHashedType> 301 getHashesFromDebugH(ArrayRef<uint8_t> debugH) { 302 assert(canUseDebugH(debugH)); 303 debugH = debugH.drop_front(sizeof(object::debug_h_header)); 304 uint32_t count = debugH.size() / sizeof(GloballyHashedType); 305 return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count}; 306 } 307 308 // Merge .debug$T for a generic object file. 309 Error TpiSource::mergeDebugT(TypeMerger *m) { 310 assert(!ctx.config.debugGHashes && 311 "use remapTpiWithGHashes when ghash is enabled"); 312 313 CVTypeArray types; 314 BinaryStreamReader reader(file->debugTypes, llvm::endianness::little); 315 cantFail(reader.readArray(types, reader.getLength())); 316 317 // When dealing with PCH.OBJ, some indices were already merged. 318 unsigned nbHeadIndices = indexMapStorage.size(); 319 320 std::optional<PCHMergerInfo> pchInfo; 321 if (auto err = mergeTypeAndIdRecords(m->idTable, m->typeTable, 322 indexMapStorage, types, pchInfo)) 323 Fatal(ctx) << "codeview::mergeTypeAndIdRecords failed: " 324 << toString(std::move(err)); 325 if (pchInfo) { 326 file->pchSignature = pchInfo->PCHSignature; 327 endPrecompIdx = pchInfo->EndPrecompIndex; 328 } 329 330 // In an object, there is only one mapping for both types and items. 331 tpiMap = indexMapStorage; 332 ipiMap = indexMapStorage; 333 334 if (ctx.config.showSummary) { 335 nbTypeRecords = indexMapStorage.size() - nbHeadIndices; 336 nbTypeRecordsBytes = reader.getLength(); 337 // Count how many times we saw each type record in our input. This 338 // calculation requires a second pass over the type records to classify each 339 // record as a type or index. This is slow, but this code executes when 340 // collecting statistics. 341 m->tpiCounts.resize(m->getTypeTable().size()); 342 m->ipiCounts.resize(m->getIDTable().size()); 343 uint32_t srcIdx = nbHeadIndices; 344 for (const CVType &ty : types) { 345 TypeIndex dstIdx = tpiMap[srcIdx++]; 346 // Type merging may fail, so a complex source type may become the simple 347 // NotTranslated type, which cannot be used as an array index. 348 if (dstIdx.isSimple()) 349 continue; 350 SmallVectorImpl<uint32_t> &counts = 351 isIdRecord(ty.kind()) ? m->ipiCounts : m->tpiCounts; 352 ++counts[dstIdx.toArrayIndex()]; 353 } 354 } 355 356 return Error::success(); 357 } 358 359 // Merge types from a type server PDB. 360 Error TypeServerSource::mergeDebugT(TypeMerger *m) { 361 assert(!ctx.config.debugGHashes && 362 "use remapTpiWithGHashes when ghash is enabled"); 363 364 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); 365 Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream(); 366 if (auto e = expectedTpi.takeError()) 367 Fatal(ctx) << "Type server does not have TPI stream: " 368 << toString(std::move(e)); 369 pdb::TpiStream *maybeIpi = nullptr; 370 if (pdbFile.hasPDBIpiStream()) { 371 Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream(); 372 if (auto e = expectedIpi.takeError()) 373 Fatal(ctx) << "Error getting type server IPI stream: " 374 << toString(std::move(e)); 375 maybeIpi = &*expectedIpi; 376 } 377 378 // Merge TPI first, because the IPI stream will reference type indices. 379 if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage, 380 expectedTpi->typeArray())) 381 Fatal(ctx) << "codeview::mergeTypeRecords failed: " 382 << toString(std::move(err)); 383 tpiMap = indexMapStorage; 384 385 // Merge IPI. 386 if (maybeIpi) { 387 if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage, 388 maybeIpi->typeArray())) 389 Fatal(ctx) << "codeview::mergeIdRecords failed: " 390 << toString(std::move(err)); 391 ipiMap = ipiSrc->indexMapStorage; 392 } 393 394 if (ctx.config.showSummary) { 395 nbTypeRecords = tpiMap.size() + ipiMap.size(); 396 nbTypeRecordsBytes = 397 expectedTpi->typeArray().getUnderlyingStream().getLength() + 398 (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength() 399 : 0); 400 401 // Count how many times we saw each type record in our input. If a 402 // destination type index is present in the source to destination type index 403 // map, that means we saw it once in the input. Add it to our histogram. 404 m->tpiCounts.resize(m->getTypeTable().size()); 405 m->ipiCounts.resize(m->getIDTable().size()); 406 for (TypeIndex ti : tpiMap) 407 if (!ti.isSimple()) 408 ++m->tpiCounts[ti.toArrayIndex()]; 409 for (TypeIndex ti : ipiMap) 410 if (!ti.isSimple()) 411 ++m->ipiCounts[ti.toArrayIndex()]; 412 } 413 414 return Error::success(); 415 } 416 417 Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() { 418 const codeview::GUID &tsId = typeServerDependency.getGuid(); 419 StringRef tsPath = typeServerDependency.getName(); 420 421 TypeServerSource *tsSrc = nullptr; 422 auto it = ctx.typeServerSourceMappings.find(tsId); 423 if (it != ctx.typeServerSourceMappings.end()) { 424 tsSrc = (TypeServerSource *)it->second; 425 } 426 if (tsSrc == nullptr) { 427 // The file failed to load, lookup by name 428 PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, tsPath, file); 429 if (!pdb) 430 return createFileError(tsPath, errorCodeToError(std::error_code( 431 ENOENT, std::generic_category()))); 432 // If an error occurred during loading, throw it now 433 if (pdb->loadErrorStr) 434 return createFileError( 435 tsPath, make_error<StringError>(*pdb->loadErrorStr, 436 llvm::inconvertibleErrorCode())); 437 438 tsSrc = (TypeServerSource *)pdb->debugTypesObj; 439 440 // Just because a file with a matching name was found and it was an actual 441 // PDB file doesn't mean it matches. For it to match the InfoStream's GUID 442 // must match the GUID specified in the TypeServer2 record. 443 if (tsSrc->Guid != tsId) { 444 return createFileError(tsPath, 445 make_error<pdb::PDBError>( 446 pdb::pdb_error_code::signature_out_of_date)); 447 } 448 } 449 return tsSrc; 450 } 451 452 Error UseTypeServerSource::mergeDebugT(TypeMerger *m) { 453 Expected<TypeServerSource *> tsSrc = getTypeServerSource(); 454 if (!tsSrc) 455 return tsSrc.takeError(); 456 457 pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile(); 458 auto expectedInfo = pdbSession.getPDBInfoStream(); 459 if (!expectedInfo) 460 return expectedInfo.takeError(); 461 462 // Reuse the type index map of the type server. 463 tpiMap = (*tsSrc)->tpiMap; 464 ipiMap = (*tsSrc)->ipiMap; 465 return Error::success(); 466 } 467 468 static bool equalsPath(StringRef path1, StringRef path2) { 469 #if defined(_WIN32) 470 return path1.equals_insensitive(path2); 471 #else 472 return path1 == path2; 473 #endif 474 } 475 476 // Find by name an OBJ provided on the command line 477 PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) { 478 SmallString<128> currentPath; 479 for (auto kv : ctx.precompSourceMappings) { 480 StringRef currentFileName = sys::path::filename(kv.second->file->getName(), 481 sys::path::Style::windows); 482 483 // Compare based solely on the file name (link.exe behavior) 484 if (equalsPath(currentFileName, fileNameOnly)) 485 return (PrecompSource *)kv.second; 486 } 487 return nullptr; 488 } 489 490 PrecompSource *UsePrecompSource::findPrecompSource(ObjFile *file, 491 PrecompRecord &pr) { 492 // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP 493 // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly, 494 // the paths embedded in the OBJs are in the Windows format. 495 SmallString<128> prFileName = 496 sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows); 497 498 auto it = ctx.precompSourceMappings.find(pr.getSignature()); 499 if (it != ctx.precompSourceMappings.end()) { 500 return (PrecompSource *)it->second; 501 } 502 // Lookup by name 503 return findObjByName(prFileName); 504 } 505 506 Expected<PrecompSource *> UsePrecompSource::findPrecompMap(ObjFile *file, 507 PrecompRecord &pr) { 508 PrecompSource *precomp = findPrecompSource(file, pr); 509 510 if (!precomp) 511 return createFileError( 512 pr.getPrecompFilePath(), 513 make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch)); 514 515 // Don't rely on the PCH signature to validate the concordance between the PCH 516 // and the OBJ that uses it. However we do validate here that the 517 // LF_ENDPRECOMP record index lines up with the number of type records 518 // LF_PRECOMP is expecting. 519 if (precomp->endPrecompIdx != pr.getTypesCount()) 520 return createFileError( 521 toString(file), 522 make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch)); 523 524 return precomp; 525 } 526 527 /// Merges a precompiled headers TPI map into the current TPI map. The 528 /// precompiled headers object will also be loaded and remapped in the 529 /// process. 530 Error UsePrecompSource::mergeInPrecompHeaderObj() { 531 auto e = findPrecompMap(file, precompDependency); 532 if (!e) 533 return e.takeError(); 534 535 PrecompSource *precompSrc = *e; 536 if (precompSrc->tpiMap.empty()) 537 return Error::success(); 538 539 assert(precompDependency.getStartTypeIndex() == 540 TypeIndex::FirstNonSimpleIndex); 541 assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size()); 542 // Use the previously remapped index map from the precompiled headers. 543 indexMapStorage.insert(indexMapStorage.begin(), precompSrc->tpiMap.begin(), 544 precompSrc->tpiMap.begin() + 545 precompDependency.getTypesCount()); 546 547 return Error::success(); 548 } 549 550 Error UsePrecompSource::mergeDebugT(TypeMerger *m) { 551 // This object was compiled with /Yu, so process the corresponding 552 // precompiled headers object (/Yc) first. Some type indices in the current 553 // object are referencing data in the precompiled headers object, so we need 554 // both to be loaded. 555 if (Error e = mergeInPrecompHeaderObj()) 556 return e; 557 558 return TpiSource::mergeDebugT(m); 559 } 560 561 Error PrecompSource::mergeDebugT(TypeMerger *m) { 562 // In some cases, the S_OBJNAME record doesn't contain the PCH signature. 563 // The signature comes later with the LF_ENDPRECOMP record, so we first need 564 // to merge in all the .PCH.OBJ file type records, before registering below. 565 if (Error e = TpiSource::mergeDebugT(m)) 566 return e; 567 568 registerMapping(); 569 570 return Error::success(); 571 } 572 573 void PrecompSource::registerMapping() { 574 if (registered) 575 return; 576 if (file->pchSignature && *file->pchSignature) { 577 auto it = ctx.precompSourceMappings.emplace(*file->pchSignature, this); 578 if (!it.second) 579 Fatal(ctx) 580 << "a PCH object with the same signature has already been provided (" 581 << toString(it.first->second->file) << " and " << toString(file) 582 << ")"; 583 registered = true; 584 } 585 } 586 587 //===----------------------------------------------------------------------===// 588 // Parellel GHash type merging implementation. 589 //===----------------------------------------------------------------------===// 590 591 void TpiSource::loadGHashes() { 592 if (std::optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) { 593 ghashes = getHashesFromDebugH(*debugH); 594 ownedGHashes = false; 595 } else { 596 CVTypeArray types; 597 BinaryStreamReader reader(file->debugTypes, llvm::endianness::little); 598 cantFail(reader.readArray(types, reader.getLength())); 599 assignGHashesFromVector(GloballyHashedType::hashTypes(types)); 600 } 601 602 fillIsItemIndexFromDebugT(); 603 } 604 605 // Copies ghashes from a vector into an array. These are long lived, so it's 606 // worth the time to copy these into an appropriately sized vector to reduce 607 // memory usage. 608 void TpiSource::assignGHashesFromVector( 609 std::vector<GloballyHashedType> &&hashVec) { 610 if (hashVec.empty()) 611 return; 612 GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()]; 613 memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType)); 614 ghashes = ArrayRef(hashes, hashVec.size()); 615 ownedGHashes = true; 616 } 617 618 // Faster way to iterate type records. forEachTypeChecked is faster than 619 // iterating CVTypeArray. It avoids virtual readBytes calls in inner loops. 620 static void forEachTypeChecked(ArrayRef<uint8_t> types, 621 function_ref<void(const CVType &)> fn) { 622 checkError( 623 forEachCodeViewRecord<CVType>(types, [fn](const CVType &ty) -> Error { 624 fn(ty); 625 return Error::success(); 626 })); 627 } 628 629 // Walk over file->debugTypes and fill in the isItemIndex bit vector. 630 // TODO: Store this information in .debug$H so that we don't have to recompute 631 // it. This is the main bottleneck slowing down parallel ghashing with one 632 // thread over single-threaded ghashing. 633 void TpiSource::fillIsItemIndexFromDebugT() { 634 uint32_t index = 0; 635 isItemIndex.resize(ghashes.size()); 636 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { 637 if (isIdRecord(ty.kind())) 638 isItemIndex.set(index); 639 ++index; 640 }); 641 } 642 643 void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) { 644 // Decide if the merged type goes into TPI or IPI. 645 bool isItem = isIdRecord(ty.kind()); 646 MergedInfo &merged = isItem ? mergedIpi : mergedTpi; 647 648 // Copy the type into our mutable buffer. 649 assert(ty.length() <= codeview::MaxRecordLength); 650 size_t offset = merged.recs.size(); 651 size_t newSize = alignTo(ty.length(), 4); 652 merged.recs.resize(offset + newSize); 653 auto newRec = MutableArrayRef(&merged.recs[offset], newSize); 654 memcpy(newRec.data(), ty.data().data(), newSize); 655 656 // Fix up the record prefix and padding bytes if it required resizing. 657 if (newSize != ty.length()) { 658 reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2; 659 for (size_t i = ty.length(); i < newSize; ++i) 660 newRec[i] = LF_PAD0 + (newSize - i); 661 } 662 663 // Remap the type indices in the new record. 664 remapTypesInTypeRecord(newRec); 665 uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec))); 666 merged.recSizes.push_back(static_cast<uint16_t>(newSize)); 667 merged.recHashes.push_back(pdbHash); 668 669 // Retain a mapping from PDB function id to PDB function type. This mapping is 670 // used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32 671 // symbols. 672 if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) { 673 bool success = ty.length() >= 12; 674 TypeIndex funcId = curIndex; 675 if (success) 676 success &= remapTypeIndex(funcId, TiRefKind::IndexRef); 677 TypeIndex funcType = 678 *reinterpret_cast<const TypeIndex *>(&newRec.data()[8]); 679 if (success) { 680 funcIdToType.push_back({funcId, funcType}); 681 } else { 682 StringRef fname = file ? file->getName() : "<unknown PDB>"; 683 Warn(ctx) << "corrupt LF_[M]FUNC_ID record 0x" 684 << utohexstr(curIndex.getIndex()) << " in " << fname; 685 } 686 } 687 } 688 689 void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords, 690 TypeIndex beginIndex) { 691 // Re-sort the list of unique types by index. 692 if (kind == PDB) 693 assert(llvm::is_sorted(uniqueTypes)); 694 else 695 llvm::sort(uniqueTypes); 696 697 // Accumulate all the unique types into one buffer in mergedTypes. 698 uint32_t ghashIndex = 0; 699 auto nextUniqueIndex = uniqueTypes.begin(); 700 assert(mergedTpi.recs.empty()); 701 assert(mergedIpi.recs.empty()); 702 703 // Pre-compute the number of elements in advance to avoid std::vector resizes. 704 unsigned nbTpiRecs = 0; 705 unsigned nbIpiRecs = 0; 706 forEachTypeChecked(typeRecords, [&](const CVType &ty) { 707 if (nextUniqueIndex != uniqueTypes.end() && 708 *nextUniqueIndex == ghashIndex) { 709 assert(ty.length() <= codeview::MaxRecordLength); 710 size_t newSize = alignTo(ty.length(), 4); 711 (isIdRecord(ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize; 712 ++nextUniqueIndex; 713 } 714 ++ghashIndex; 715 }); 716 mergedTpi.recs.reserve(nbTpiRecs); 717 mergedIpi.recs.reserve(nbIpiRecs); 718 719 // Do the actual type merge. 720 ghashIndex = 0; 721 nextUniqueIndex = uniqueTypes.begin(); 722 forEachTypeChecked(typeRecords, [&](const CVType &ty) { 723 if (nextUniqueIndex != uniqueTypes.end() && 724 *nextUniqueIndex == ghashIndex) { 725 mergeTypeRecord(beginIndex + ghashIndex, ty); 726 ++nextUniqueIndex; 727 } 728 ++ghashIndex; 729 }); 730 assert(nextUniqueIndex == uniqueTypes.end() && 731 "failed to merge all desired records"); 732 assert(uniqueTypes.size() == 733 mergedTpi.recSizes.size() + mergedIpi.recSizes.size() && 734 "missing desired record"); 735 } 736 737 void TpiSource::remapTpiWithGHashes(GHashState *g) { 738 assert(ctx.config.debugGHashes && "ghashes must be enabled"); 739 fillMapFromGHashes(g); 740 tpiMap = indexMapStorage; 741 ipiMap = indexMapStorage; 742 mergeUniqueTypeRecords(file->debugTypes); 743 // TODO: Free all unneeded ghash resources now that we have a full index map. 744 745 if (ctx.config.showSummary) { 746 nbTypeRecords = ghashes.size(); 747 nbTypeRecordsBytes = file->debugTypes.size(); 748 } 749 } 750 751 // PDBs do not actually store global hashes, so when merging a type server 752 // PDB we have to synthesize global hashes. To do this, we first synthesize 753 // global hashes for the TPI stream, since it is independent, then we 754 // synthesize hashes for the IPI stream, using the hashes for the TPI stream 755 // as inputs. 756 void TypeServerSource::loadGHashes() { 757 // Don't hash twice. 758 if (!ghashes.empty()) 759 return; 760 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); 761 762 // Hash TPI stream. 763 Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream(); 764 if (auto e = expectedTpi.takeError()) 765 Fatal(ctx) << "Type server does not have TPI stream: " 766 << toString(std::move(e)); 767 assignGHashesFromVector( 768 GloballyHashedType::hashTypes(expectedTpi->typeArray())); 769 isItemIndex.resize(ghashes.size()); 770 771 // Hash IPI stream, which depends on TPI ghashes. 772 if (!pdbFile.hasPDBIpiStream()) 773 return; 774 Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream(); 775 if (auto e = expectedIpi.takeError()) 776 Fatal(ctx) << "error retrieving IPI stream: " << toString(std::move(e)); 777 ipiSrc->assignGHashesFromVector( 778 GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes)); 779 780 // The IPI stream isItemIndex bitvector should be all ones. 781 ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size()); 782 ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size()); 783 } 784 785 // Flatten discontiguous PDB type arrays to bytes so that we can use 786 // forEachTypeChecked instead of CVTypeArray iteration. Copying all types from 787 // type servers is faster than iterating all object files compiled with /Z7 with 788 // CVTypeArray, which has high overheads due to the virtual interface of 789 // BinaryStream::readBytes. 790 static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) { 791 BinaryStreamRef stream = types.getUnderlyingStream(); 792 ArrayRef<uint8_t> debugTypes; 793 checkError(stream.readBytes(0, stream.getLength(), debugTypes)); 794 return debugTypes; 795 } 796 797 // Merge types from a type server PDB. 798 void TypeServerSource::remapTpiWithGHashes(GHashState *g) { 799 assert(ctx.config.debugGHashes && "ghashes must be enabled"); 800 801 // IPI merging depends on TPI, so do TPI first, then do IPI. No need to 802 // propagate errors, those should've been handled during ghash loading. 803 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); 804 pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream()); 805 fillMapFromGHashes(g); 806 tpiMap = indexMapStorage; 807 mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray())); 808 if (pdbFile.hasPDBIpiStream()) { 809 pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream()); 810 ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size()); 811 ipiSrc->fillMapFromGHashes(g); 812 ipiMap = ipiSrc->indexMapStorage; 813 ipiSrc->tpiMap = tpiMap; 814 ipiSrc->ipiMap = ipiMap; 815 ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray())); 816 817 if (ctx.config.showSummary) { 818 nbTypeRecords = ipiSrc->ghashes.size(); 819 nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength(); 820 } 821 } 822 823 if (ctx.config.showSummary) { 824 nbTypeRecords += ghashes.size(); 825 nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength(); 826 } 827 } 828 829 void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) { 830 // No remapping to do with /Zi objects. Simply use the index map from the type 831 // server. Errors should have been reported earlier. Symbols from this object 832 // will be ignored. 833 Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource(); 834 if (!maybeTsSrc) { 835 typeMergingError = 836 joinErrors(std::move(typeMergingError), maybeTsSrc.takeError()); 837 return; 838 } 839 TypeServerSource *tsSrc = *maybeTsSrc; 840 tpiMap = tsSrc->tpiMap; 841 ipiMap = tsSrc->ipiMap; 842 } 843 844 void PrecompSource::loadGHashes() { 845 if (getDebugH(file)) { 846 Warn(ctx) << "ignoring .debug$H section; pch with ghash is not implemented"; 847 } 848 849 uint32_t ghashIdx = 0; 850 std::vector<GloballyHashedType> hashVec; 851 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { 852 // Remember the index of the LF_ENDPRECOMP record so it can be excluded from 853 // the PDB. There must be an entry in the list of ghashes so that the type 854 // indexes of the following records in the /Yc PCH object line up. 855 if (ty.kind() == LF_ENDPRECOMP) { 856 EndPrecompRecord endPrecomp; 857 cantFail(TypeDeserializer::deserializeAs<EndPrecompRecord>( 858 const_cast<CVType &>(ty), endPrecomp)); 859 file->pchSignature = endPrecomp.getSignature(); 860 registerMapping(); 861 endPrecompIdx = ghashIdx; 862 } 863 864 hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec)); 865 isItemIndex.push_back(isIdRecord(ty.kind())); 866 ++ghashIdx; 867 }); 868 assignGHashesFromVector(std::move(hashVec)); 869 } 870 871 void UsePrecompSource::loadGHashes() { 872 auto e = findPrecompMap(file, precompDependency); 873 if (!e) { 874 Warn(ctx) << e.takeError(); 875 return; 876 } 877 878 PrecompSource *pchSrc = *e; 879 880 // To compute ghashes of a /Yu object file, we need to build on the ghashes of 881 // the /Yc PCH object. After we are done hashing, discard the ghashes from the 882 // PCH source so we don't unnecessarily try to deduplicate them. 883 std::vector<GloballyHashedType> hashVec = 884 pchSrc->ghashes.take_front(precompDependency.getTypesCount()); 885 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { 886 hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec)); 887 isItemIndex.push_back(isIdRecord(ty.kind())); 888 }); 889 hashVec.erase(hashVec.begin(), 890 hashVec.begin() + precompDependency.getTypesCount()); 891 assignGHashesFromVector(std::move(hashVec)); 892 } 893 894 void UsePrecompSource::remapTpiWithGHashes(GHashState *g) { 895 fillMapFromGHashes(g); 896 // This object was compiled with /Yu, so process the corresponding 897 // precompiled headers object (/Yc) first. Some type indices in the current 898 // object are referencing data in the precompiled headers object, so we need 899 // both to be loaded. 900 if (Error e = mergeInPrecompHeaderObj()) { 901 typeMergingError = joinErrors(std::move(typeMergingError), std::move(e)); 902 return; 903 } 904 905 tpiMap = indexMapStorage; 906 ipiMap = indexMapStorage; 907 mergeUniqueTypeRecords(file->debugTypes, 908 TypeIndex(precompDependency.getStartTypeIndex() + 909 precompDependency.getTypesCount())); 910 if (ctx.config.showSummary) { 911 nbTypeRecords = ghashes.size(); 912 nbTypeRecordsBytes = file->debugTypes.size(); 913 } 914 } 915 916 namespace { 917 /// A concurrent hash table for global type hashing. It is based on this paper: 918 /// Concurrent Hash Tables: Fast and General(?)! 919 /// https://dl.acm.org/doi/10.1145/3309206 920 /// 921 /// This hash table is meant to be used in two phases: 922 /// 1. concurrent insertions 923 /// 2. concurrent reads 924 /// It does not support lookup, deletion, or rehashing. It uses linear probing. 925 /// 926 /// The paper describes storing a key-value pair in two machine words. 927 /// Generally, the values stored in this map are type indices, and we can use 928 /// those values to recover the ghash key from a side table. This allows us to 929 /// shrink the table entries further at the cost of some loads, and sidesteps 930 /// the need for a 128 bit atomic compare-and-swap operation. 931 /// 932 /// During insertion, a priority function is used to decide which insertion 933 /// should be preferred. This ensures that the output is deterministic. For 934 /// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred. 935 /// 936 class GHashCell; 937 struct GHashTable { 938 GHashCell *table = nullptr; 939 uint32_t tableSize = 0; 940 941 GHashTable() = default; 942 ~GHashTable(); 943 944 /// Initialize the table with the given size. Because the table cannot be 945 /// resized, the initial size of the table must be large enough to contain all 946 /// inputs, or insertion may not be able to find an empty cell. 947 void init(uint32_t newTableSize); 948 949 /// Insert the cell with the given ghash into the table. Return the insertion 950 /// position in the table. It is safe for the caller to store the insertion 951 /// position because the table cannot be resized. 952 uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash, 953 GHashCell newCell); 954 }; 955 956 /// A ghash table cell for deduplicating types from TpiSources. 957 class GHashCell { 958 // Force "data" to be 64-bit aligned; otherwise, some versions of clang 959 // will generate calls to libatomic when using some versions of libstdc++ 960 // on 32-bit targets. (Also, in theory, there could be a target where 961 // new[] doesn't always return an 8-byte-aligned allocation.) 962 alignas(sizeof(uint64_t)) uint64_t data = 0; 963 964 public: 965 GHashCell() = default; 966 967 // Construct data most to least significant so that sorting works well: 968 // - isItem 969 // - tpiSrcIdx 970 // - ghashIdx 971 // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a 972 // non-zero representation. 973 GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx) 974 : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) | 975 ghashIdx) { 976 assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure"); 977 assert(ghashIdx == getGHashIdx() && "round trip failure"); 978 } 979 980 explicit GHashCell(uint64_t data) : data(data) {} 981 982 // The empty cell is all zeros. 983 bool isEmpty() const { return data == 0ULL; } 984 985 /// Extract the tpiSrcIdx. 986 uint32_t getTpiSrcIdx() const { 987 return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1; 988 } 989 990 /// Extract the index into the ghash array of the TpiSource. 991 uint32_t getGHashIdx() const { return (uint32_t)data; } 992 993 bool isItem() const { return data & (1ULL << 63U); } 994 995 /// Get the ghash key for this cell. 996 GloballyHashedType getGHash(const COFFLinkerContext &ctx) const { 997 return ctx.tpiSourceList[getTpiSrcIdx()]->ghashes[getGHashIdx()]; 998 } 999 1000 /// The priority function for the cell. The data is stored such that lower 1001 /// tpiSrcIdx and ghashIdx values are preferred, which means that type record 1002 /// from earlier sources are more likely to prevail. 1003 friend inline bool operator<(const GHashCell &l, const GHashCell &r) { 1004 return l.data < r.data; 1005 } 1006 }; 1007 } // namespace 1008 1009 namespace lld::coff { 1010 /// This type is just a wrapper around GHashTable with external linkage so it 1011 /// can be used from a header. 1012 struct GHashState { 1013 GHashTable table; 1014 }; 1015 } // namespace lld::coff 1016 1017 GHashTable::~GHashTable() { delete[] table; } 1018 1019 void GHashTable::init(uint32_t newTableSize) { 1020 table = new GHashCell[newTableSize]; 1021 memset(table, 0, newTableSize * sizeof(GHashCell)); 1022 tableSize = newTableSize; 1023 } 1024 1025 uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash, 1026 GHashCell newCell) { 1027 assert(!newCell.isEmpty() && "cannot insert empty cell value"); 1028 1029 // FIXME: The low bytes of SHA1 have low entropy for short records, which 1030 // type records are. Swap the byte order for better entropy. A better ghash 1031 // won't need this. 1032 uint32_t startIdx = 1033 llvm::byteswap<uint64_t>(*reinterpret_cast<uint64_t *>(&ghash)) % 1034 tableSize; 1035 1036 // Do a linear probe starting at startIdx. 1037 uint32_t idx = startIdx; 1038 while (true) { 1039 // Run a compare and swap loop. There are four cases: 1040 // - cell is empty: CAS into place and return 1041 // - cell has matching key, earlier priority: do nothing, return 1042 // - cell has matching key, later priority: CAS into place and return 1043 // - cell has non-matching key: hash collision, probe next cell 1044 auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]); 1045 GHashCell oldCell(cellPtr->load()); 1046 while (oldCell.isEmpty() || oldCell.getGHash(ctx) == ghash) { 1047 // Check if there is an existing ghash entry with a higher priority 1048 // (earlier ordering). If so, this is a duplicate, we are done. 1049 if (!oldCell.isEmpty() && oldCell < newCell) 1050 return idx; 1051 // Either the cell is empty, or our value is higher priority. Try to 1052 // compare and swap. If it succeeds, we are done. 1053 if (cellPtr->compare_exchange_weak(oldCell, newCell)) 1054 return idx; 1055 // If the CAS failed, check this cell again. 1056 } 1057 1058 // Advance the probe. Wrap around to the beginning if we run off the end. 1059 ++idx; 1060 idx = idx == tableSize ? 0 : idx; 1061 if (idx == startIdx) { 1062 // If this becomes an issue, we could mark failure and rehash from the 1063 // beginning with a bigger table. There is no difference between rehashing 1064 // internally and starting over. 1065 report_fatal_error("ghash table is full"); 1066 } 1067 } 1068 llvm_unreachable("left infloop"); 1069 } 1070 1071 TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc) 1072 : typeTable(alloc), idTable(alloc), ctx(c) {} 1073 1074 TypeMerger::~TypeMerger() = default; 1075 1076 void TypeMerger::mergeTypesWithGHash() { 1077 // Load ghashes. Do type servers and PCH objects first. 1078 { 1079 llvm::TimeTraceScope timeScope("Load GHASHes"); 1080 ScopedTimer t1(ctx.loadGHashTimer); 1081 parallelForEach(dependencySources, 1082 [&](TpiSource *source) { source->loadGHashes(); }); 1083 parallelForEach(objectSources, 1084 [&](TpiSource *source) { source->loadGHashes(); }); 1085 } 1086 1087 llvm::TimeTraceScope timeScope("Merge types (GHASH)"); 1088 ScopedTimer t2(ctx.mergeGHashTimer); 1089 GHashState ghashState; 1090 1091 // Estimate the size of hash table needed to deduplicate ghashes. This *must* 1092 // be larger than the number of unique types, or hash table insertion may not 1093 // be able to find a vacant slot. Summing the input types guarantees this, but 1094 // it is a gross overestimate. The table size could be reduced to save memory, 1095 // but it would require implementing rehashing, and this table is generally 1096 // small compared to total memory usage, at eight bytes per input type record, 1097 // and most input type records are larger than eight bytes. 1098 size_t tableSize = 0; 1099 for (TpiSource *source : ctx.tpiSourceList) 1100 tableSize += source->ghashes.size(); 1101 1102 // Cap the table size so that we can use 32-bit cell indices. Type indices are 1103 // also 32-bit, so this is an inherent PDB file format limit anyway. 1104 tableSize = 1105 std::min(size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, tableSize); 1106 ghashState.table.init(static_cast<uint32_t>(tableSize)); 1107 1108 // Insert ghashes in parallel. During concurrent insertion, we cannot observe 1109 // the contents of the hash table cell, but we can remember the insertion 1110 // position. Because the table does not rehash, the position will not change 1111 // under insertion. After insertion is done, the value of the cell can be read 1112 // to retrieve the final PDB type index. 1113 parallelFor(0, ctx.tpiSourceList.size(), [&](size_t tpiSrcIdx) { 1114 TpiSource *source = ctx.tpiSourceList[tpiSrcIdx]; 1115 source->indexMapStorage.resize(source->ghashes.size()); 1116 for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) { 1117 if (source->shouldOmitFromPdb(i)) { 1118 source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated); 1119 continue; 1120 } 1121 GloballyHashedType ghash = source->ghashes[i]; 1122 bool isItem = source->isItemIndex.test(i); 1123 uint32_t cellIdx = 1124 ghashState.table.insert(ctx, ghash, GHashCell(isItem, tpiSrcIdx, i)); 1125 1126 // Store the ghash cell index as a type index in indexMapStorage. Later 1127 // we will replace it with the PDB type index. 1128 source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx); 1129 } 1130 }); 1131 1132 // Collect all non-empty cells and sort them. This will implicitly assign 1133 // destination type indices, and partition the entries into type records and 1134 // item records. It arranges types in this order: 1135 // - type records 1136 // - source 0, type 0... 1137 // - source 1, type 1... 1138 // - item records 1139 // - source 0, type 1... 1140 // - source 1, type 0... 1141 std::vector<GHashCell> entries; 1142 for (const GHashCell &cell : ArrayRef(ghashState.table.table, tableSize)) { 1143 if (!cell.isEmpty()) 1144 entries.push_back(cell); 1145 } 1146 parallelSort(entries, std::less<GHashCell>()); 1147 Log(ctx) << formatv( 1148 "ghash table load factor: {0:p} (size {1} / capacity {2})\n", 1149 tableSize ? double(entries.size()) / tableSize : 0, entries.size(), 1150 tableSize); 1151 1152 // Find out how many type and item indices there are. 1153 auto mid = llvm::lower_bound(entries, GHashCell(true, 0, 0)); 1154 assert((mid == entries.end() || mid->isItem()) && 1155 (mid == entries.begin() || !std::prev(mid)->isItem()) && 1156 "midpoint is not midpoint"); 1157 uint32_t numTypes = std::distance(entries.begin(), mid); 1158 uint32_t numItems = std::distance(mid, entries.end()); 1159 Log(ctx) << "Tpi record count: " << numTypes; 1160 Log(ctx) << "Ipi record count: " << numItems; 1161 1162 // Make a list of the "unique" type records to merge for each tpi source. Type 1163 // merging will skip indices not on this list. Store the destination PDB type 1164 // index for these unique types in the tpiMap for each source. The entries for 1165 // non-unique types will be filled in prior to type merging. 1166 for (uint32_t i = 0, e = entries.size(); i < e; ++i) { 1167 auto &cell = entries[i]; 1168 uint32_t tpiSrcIdx = cell.getTpiSrcIdx(); 1169 TpiSource *source = ctx.tpiSourceList[tpiSrcIdx]; 1170 source->uniqueTypes.push_back(cell.getGHashIdx()); 1171 1172 // Update the ghash table to store the destination PDB type index in the 1173 // table. 1174 uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes; 1175 uint32_t ghashCellIndex = 1176 source->indexMapStorage[cell.getGHashIdx()].toArrayIndex(); 1177 ghashState.table.table[ghashCellIndex] = 1178 GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex); 1179 } 1180 1181 // In parallel, remap all types. 1182 for (TpiSource *source : dependencySources) 1183 source->remapTpiWithGHashes(&ghashState); 1184 parallelForEach(objectSources, [&](TpiSource *source) { 1185 source->remapTpiWithGHashes(&ghashState); 1186 }); 1187 1188 // Build a global map of from function ID to function type. 1189 for (TpiSource *source : ctx.tpiSourceList) { 1190 for (auto idToType : source->funcIdToType) 1191 funcIdToType.insert(idToType); 1192 source->funcIdToType.clear(); 1193 } 1194 1195 clearGHashes(); 1196 } 1197 1198 void TypeMerger::sortDependencies() { 1199 // Order dependencies first, but preserve the existing order. 1200 std::vector<TpiSource *> deps; 1201 std::vector<TpiSource *> objs; 1202 for (TpiSource *s : ctx.tpiSourceList) 1203 (s->isDependency() ? deps : objs).push_back(s); 1204 uint32_t numDeps = deps.size(); 1205 uint32_t numObjs = objs.size(); 1206 ctx.tpiSourceList = std::move(deps); 1207 ctx.tpiSourceList.insert(ctx.tpiSourceList.end(), objs.begin(), objs.end()); 1208 for (uint32_t i = 0, e = ctx.tpiSourceList.size(); i < e; ++i) 1209 ctx.tpiSourceList[i]->tpiSrcIdx = i; 1210 dependencySources = ArrayRef(ctx.tpiSourceList.data(), numDeps); 1211 objectSources = ArrayRef(ctx.tpiSourceList.data() + numDeps, numObjs); 1212 } 1213 1214 /// Given the index into the ghash table for a particular type, return the type 1215 /// index for that type in the output PDB. 1216 static TypeIndex loadPdbTypeIndexFromCell(GHashState *g, 1217 uint32_t ghashCellIdx) { 1218 GHashCell cell = g->table.table[ghashCellIdx]; 1219 return TypeIndex::fromArrayIndex(cell.getGHashIdx()); 1220 } 1221 1222 /// Free heap allocated ghashes. 1223 void TypeMerger::clearGHashes() { 1224 for (TpiSource *src : ctx.tpiSourceList) { 1225 if (src->ownedGHashes) 1226 delete[] src->ghashes.data(); 1227 src->ghashes = {}; 1228 src->isItemIndex.clear(); 1229 src->uniqueTypes.clear(); 1230 } 1231 } 1232 1233 // Fill in a TPI or IPI index map using ghashes. For each source type, use its 1234 // ghash to lookup its final type index in the PDB, and store that in the map. 1235 void TpiSource::fillMapFromGHashes(GHashState *g) { 1236 for (size_t i = 0, e = ghashes.size(); i < e; ++i) { 1237 TypeIndex fakeCellIndex = indexMapStorage[i]; 1238 if (fakeCellIndex.isSimple()) 1239 indexMapStorage[i] = fakeCellIndex; 1240 else 1241 indexMapStorage[i] = 1242 loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex()); 1243 } 1244 } 1245