1 //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/DebugInfo/PDB/Native/PDBFile.h" 10 #include "llvm/ADT/ArrayRef.h" 11 #include "llvm/ADT/STLExtras.h" 12 #include "llvm/DebugInfo/MSF/MSFCommon.h" 13 #include "llvm/DebugInfo/MSF/MappedBlockStream.h" 14 #include "llvm/DebugInfo/PDB/Native/DbiStream.h" 15 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" 16 #include "llvm/DebugInfo/PDB/Native/InfoStream.h" 17 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" 18 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h" 19 #include "llvm/DebugInfo/PDB/Native/RawError.h" 20 #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" 21 #include "llvm/DebugInfo/PDB/Native/TpiStream.h" 22 #include "llvm/Support/BinaryStream.h" 23 #include "llvm/Support/BinaryStreamArray.h" 24 #include "llvm/Support/BinaryStreamReader.h" 25 #include "llvm/Support/Endian.h" 26 #include "llvm/Support/Error.h" 27 #include "llvm/Support/Path.h" 28 #include <algorithm> 29 #include <cassert> 30 #include <cstdint> 31 32 using namespace llvm; 33 using namespace llvm::codeview; 34 using namespace llvm::msf; 35 using namespace llvm::pdb; 36 37 namespace { 38 typedef FixedStreamArray<support::ulittle32_t> ulittle_array; 39 } // end anonymous namespace 40 41 PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer, 42 BumpPtrAllocator &Allocator) 43 : FilePath(Path), Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {} 44 45 PDBFile::~PDBFile() = default; 46 47 StringRef PDBFile::getFilePath() const { return FilePath; } 48 49 StringRef PDBFile::getFileDirectory() const { 50 return sys::path::parent_path(FilePath); 51 } 52 53 uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; } 54 55 uint32_t PDBFile::getFreeBlockMapBlock() const { 56 return ContainerLayout.SB->FreeBlockMapBlock; 57 } 58 59 uint32_t PDBFile::getBlockCount() const { 60 return ContainerLayout.SB->NumBlocks; 61 } 62 63 uint32_t PDBFile::getNumDirectoryBytes() const { 64 return ContainerLayout.SB->NumDirectoryBytes; 65 } 66 67 uint32_t PDBFile::getBlockMapIndex() const { 68 return ContainerLayout.SB->BlockMapAddr; 69 } 70 71 uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; } 72 73 uint32_t PDBFile::getNumDirectoryBlocks() const { 74 return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes, 75 ContainerLayout.SB->BlockSize); 76 } 77 78 uint64_t PDBFile::getBlockMapOffset() const { 79 return (uint64_t)ContainerLayout.SB->BlockMapAddr * 80 ContainerLayout.SB->BlockSize; 81 } 82 83 uint32_t PDBFile::getNumStreams() const { 84 return ContainerLayout.StreamSizes.size(); 85 } 86 87 uint32_t PDBFile::getMaxStreamSize() const { 88 return *std::max_element(ContainerLayout.StreamSizes.begin(), 89 ContainerLayout.StreamSizes.end()); 90 } 91 92 uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const { 93 return ContainerLayout.StreamSizes[StreamIndex]; 94 } 95 96 ArrayRef<support::ulittle32_t> 97 PDBFile::getStreamBlockList(uint32_t StreamIndex) const { 98 return ContainerLayout.StreamMap[StreamIndex]; 99 } 100 101 uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); } 102 103 Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex, 104 uint32_t NumBytes) const { 105 uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize()); 106 107 ArrayRef<uint8_t> Result; 108 if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result)) 109 return std::move(EC); 110 return Result; 111 } 112 113 Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset, 114 ArrayRef<uint8_t> Data) const { 115 return make_error<RawError>(raw_error_code::not_writable, 116 "PDBFile is immutable"); 117 } 118 119 Error PDBFile::parseFileHeaders() { 120 BinaryStreamReader Reader(*Buffer); 121 122 // Initialize SB. 123 const msf::SuperBlock *SB = nullptr; 124 if (auto EC = Reader.readObject(SB)) { 125 consumeError(std::move(EC)); 126 return make_error<RawError>(raw_error_code::corrupt_file, 127 "MSF superblock is missing"); 128 } 129 130 if (auto EC = msf::validateSuperBlock(*SB)) 131 return EC; 132 133 if (Buffer->getLength() % SB->BlockSize != 0) 134 return make_error<RawError>(raw_error_code::corrupt_file, 135 "File size is not a multiple of block size"); 136 ContainerLayout.SB = SB; 137 138 // Initialize Free Page Map. 139 ContainerLayout.FreePageMap.resize(SB->NumBlocks); 140 // The Fpm exists either at block 1 or block 2 of the MSF. However, this 141 // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and 142 // thusly an equal number of total blocks in the file. For a block size 143 // of 4KiB (very common), this would yield 32KiB total blocks in file, for a 144 // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so 145 // the Fpm is split across the file at `getBlockSize()` intervals. As a 146 // result, every block whose index is of the form |{1,2} + getBlockSize() * k| 147 // for any non-negative integer k is an Fpm block. In theory, we only really 148 // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but 149 // current versions of the MSF format already expect the Fpm to be arranged 150 // at getBlockSize() intervals, so we have to be compatible. 151 // See the function fpmPn() for more information: 152 // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489 153 auto FpmStream = 154 MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator); 155 BinaryStreamReader FpmReader(*FpmStream); 156 ArrayRef<uint8_t> FpmBytes; 157 if (auto EC = FpmReader.readBytes(FpmBytes, FpmReader.bytesRemaining())) 158 return EC; 159 uint32_t BlocksRemaining = getBlockCount(); 160 uint32_t BI = 0; 161 for (auto Byte : FpmBytes) { 162 uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U); 163 for (uint32_t I = 0; I < BlocksThisByte; ++I) { 164 if (Byte & (1 << I)) 165 ContainerLayout.FreePageMap[BI] = true; 166 --BlocksRemaining; 167 ++BI; 168 } 169 } 170 171 Reader.setOffset(getBlockMapOffset()); 172 if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks, 173 getNumDirectoryBlocks())) 174 return EC; 175 176 return Error::success(); 177 } 178 179 Error PDBFile::parseStreamData() { 180 assert(ContainerLayout.SB); 181 if (DirectoryStream) 182 return Error::success(); 183 184 uint32_t NumStreams = 0; 185 186 // Normally you can't use a MappedBlockStream without having fully parsed the 187 // PDB file, because it accesses the directory and various other things, which 188 // is exactly what we are attempting to parse. By specifying a custom 189 // subclass of IPDBStreamData which only accesses the fields that have already 190 // been parsed, we can avoid this and reuse MappedBlockStream. 191 auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer, 192 Allocator); 193 BinaryStreamReader Reader(*DS); 194 if (auto EC = Reader.readInteger(NumStreams)) 195 return EC; 196 197 if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams)) 198 return EC; 199 for (uint32_t I = 0; I < NumStreams; ++I) { 200 uint32_t StreamSize = getStreamByteSize(I); 201 // FIXME: What does StreamSize ~0U mean? 202 uint64_t NumExpectedStreamBlocks = 203 StreamSize == UINT32_MAX 204 ? 0 205 : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize); 206 207 // For convenience, we store the block array contiguously. This is because 208 // if someone calls setStreamMap(), it is more convenient to be able to call 209 // it with an ArrayRef instead of setting up a StreamRef. Since the 210 // DirectoryStream is cached in the class and thus lives for the life of the 211 // class, we can be guaranteed that readArray() will return a stable 212 // reference, even if it has to allocate from its internal pool. 213 ArrayRef<support::ulittle32_t> Blocks; 214 if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks)) 215 return EC; 216 for (uint32_t Block : Blocks) { 217 uint64_t BlockEndOffset = 218 (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize; 219 if (BlockEndOffset > getFileSize()) 220 return make_error<RawError>(raw_error_code::corrupt_file, 221 "Stream block map is corrupt."); 222 } 223 ContainerLayout.StreamMap.push_back(Blocks); 224 } 225 226 // We should have read exactly SB->NumDirectoryBytes bytes. 227 assert(Reader.bytesRemaining() == 0); 228 DirectoryStream = std::move(DS); 229 return Error::success(); 230 } 231 232 ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const { 233 return ContainerLayout.DirectoryBlocks; 234 } 235 236 std::unique_ptr<MappedBlockStream> PDBFile::createIndexedStream(uint16_t SN) { 237 if (SN == kInvalidStreamIndex) 238 return nullptr; 239 return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN, 240 Allocator); 241 } 242 243 MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const { 244 MSFStreamLayout Result; 245 auto Blocks = getStreamBlockList(StreamIdx); 246 Result.Blocks.assign(Blocks.begin(), Blocks.end()); 247 Result.Length = getStreamByteSize(StreamIdx); 248 return Result; 249 } 250 251 msf::MSFStreamLayout PDBFile::getFpmStreamLayout() const { 252 return msf::getFpmStreamLayout(ContainerLayout); 253 } 254 255 Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() { 256 if (!Globals) { 257 auto DbiS = getPDBDbiStream(); 258 if (!DbiS) 259 return DbiS.takeError(); 260 261 auto GlobalS = safelyCreateIndexedStream( 262 ContainerLayout, *Buffer, DbiS->getGlobalSymbolStreamIndex()); 263 if (!GlobalS) 264 return GlobalS.takeError(); 265 auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(*GlobalS)); 266 if (auto EC = TempGlobals->reload()) 267 return std::move(EC); 268 Globals = std::move(TempGlobals); 269 } 270 return *Globals; 271 } 272 273 Expected<InfoStream &> PDBFile::getPDBInfoStream() { 274 if (!Info) { 275 auto InfoS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamPDB); 276 if (!InfoS) 277 return InfoS.takeError(); 278 auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS)); 279 if (auto EC = TempInfo->reload()) 280 return std::move(EC); 281 Info = std::move(TempInfo); 282 } 283 return *Info; 284 } 285 286 Expected<DbiStream &> PDBFile::getPDBDbiStream() { 287 if (!Dbi) { 288 auto DbiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamDBI); 289 if (!DbiS) 290 return DbiS.takeError(); 291 auto TempDbi = llvm::make_unique<DbiStream>(std::move(*DbiS)); 292 if (auto EC = TempDbi->reload(this)) 293 return std::move(EC); 294 Dbi = std::move(TempDbi); 295 } 296 return *Dbi; 297 } 298 299 Expected<TpiStream &> PDBFile::getPDBTpiStream() { 300 if (!Tpi) { 301 auto TpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamTPI); 302 if (!TpiS) 303 return TpiS.takeError(); 304 auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS)); 305 if (auto EC = TempTpi->reload()) 306 return std::move(EC); 307 Tpi = std::move(TempTpi); 308 } 309 return *Tpi; 310 } 311 312 Expected<TpiStream &> PDBFile::getPDBIpiStream() { 313 if (!Ipi) { 314 if (!hasPDBIpiStream()) 315 return make_error<RawError>(raw_error_code::no_stream); 316 317 auto IpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamIPI); 318 if (!IpiS) 319 return IpiS.takeError(); 320 auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS)); 321 if (auto EC = TempIpi->reload()) 322 return std::move(EC); 323 Ipi = std::move(TempIpi); 324 } 325 return *Ipi; 326 } 327 328 Expected<PublicsStream &> PDBFile::getPDBPublicsStream() { 329 if (!Publics) { 330 auto DbiS = getPDBDbiStream(); 331 if (!DbiS) 332 return DbiS.takeError(); 333 334 auto PublicS = safelyCreateIndexedStream( 335 ContainerLayout, *Buffer, DbiS->getPublicSymbolStreamIndex()); 336 if (!PublicS) 337 return PublicS.takeError(); 338 auto TempPublics = llvm::make_unique<PublicsStream>(std::move(*PublicS)); 339 if (auto EC = TempPublics->reload()) 340 return std::move(EC); 341 Publics = std::move(TempPublics); 342 } 343 return *Publics; 344 } 345 346 Expected<SymbolStream &> PDBFile::getPDBSymbolStream() { 347 if (!Symbols) { 348 auto DbiS = getPDBDbiStream(); 349 if (!DbiS) 350 return DbiS.takeError(); 351 352 uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex(); 353 auto SymbolS = 354 safelyCreateIndexedStream(ContainerLayout, *Buffer, SymbolStreamNum); 355 if (!SymbolS) 356 return SymbolS.takeError(); 357 358 auto TempSymbols = llvm::make_unique<SymbolStream>(std::move(*SymbolS)); 359 if (auto EC = TempSymbols->reload()) 360 return std::move(EC); 361 Symbols = std::move(TempSymbols); 362 } 363 return *Symbols; 364 } 365 366 Expected<PDBStringTable &> PDBFile::getStringTable() { 367 if (!Strings) { 368 auto IS = getPDBInfoStream(); 369 if (!IS) 370 return IS.takeError(); 371 372 Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names"); 373 if (!ExpectedNSI) 374 return ExpectedNSI.takeError(); 375 uint32_t NameStreamIndex = *ExpectedNSI; 376 377 auto NS = 378 safelyCreateIndexedStream(ContainerLayout, *Buffer, NameStreamIndex); 379 if (!NS) 380 return NS.takeError(); 381 382 auto N = llvm::make_unique<PDBStringTable>(); 383 BinaryStreamReader Reader(**NS); 384 if (auto EC = N->reload(Reader)) 385 return std::move(EC); 386 assert(Reader.bytesRemaining() == 0); 387 StringTableStream = std::move(*NS); 388 Strings = std::move(N); 389 } 390 return *Strings; 391 } 392 393 uint32_t PDBFile::getPointerSize() { 394 auto DbiS = getPDBDbiStream(); 395 if (!DbiS) 396 return 0; 397 PDB_Machine Machine = DbiS->getMachineType(); 398 if (Machine == PDB_Machine::Amd64) 399 return 8; 400 return 4; 401 } 402 403 bool PDBFile::hasPDBDbiStream() const { 404 return StreamDBI < getNumStreams() && getStreamByteSize(StreamDBI) > 0; 405 } 406 407 bool PDBFile::hasPDBGlobalsStream() { 408 auto DbiS = getPDBDbiStream(); 409 if (!DbiS) { 410 consumeError(DbiS.takeError()); 411 return false; 412 } 413 414 return DbiS->getGlobalSymbolStreamIndex() < getNumStreams(); 415 } 416 417 bool PDBFile::hasPDBInfoStream() const { return StreamPDB < getNumStreams(); } 418 419 bool PDBFile::hasPDBIpiStream() const { 420 if (!hasPDBInfoStream()) 421 return false; 422 423 if (StreamIPI >= getNumStreams()) 424 return false; 425 426 auto &InfoStream = cantFail(const_cast<PDBFile *>(this)->getPDBInfoStream()); 427 return InfoStream.containsIdStream(); 428 } 429 430 bool PDBFile::hasPDBPublicsStream() { 431 auto DbiS = getPDBDbiStream(); 432 if (!DbiS) { 433 consumeError(DbiS.takeError()); 434 return false; 435 } 436 return DbiS->getPublicSymbolStreamIndex() < getNumStreams(); 437 } 438 439 bool PDBFile::hasPDBSymbolStream() { 440 auto DbiS = getPDBDbiStream(); 441 if (!DbiS) 442 return false; 443 return DbiS->getSymRecordStreamIndex() < getNumStreams(); 444 } 445 446 bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); } 447 448 bool PDBFile::hasPDBStringTable() { 449 auto IS = getPDBInfoStream(); 450 if (!IS) 451 return false; 452 Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names"); 453 if (!ExpectedNSI) { 454 consumeError(ExpectedNSI.takeError()); 455 return false; 456 } 457 assert(*ExpectedNSI < getNumStreams()); 458 return true; 459 } 460 461 /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a 462 /// stream with that index actually exists. If it does not, the return value 463 /// will have an MSFError with code msf_error_code::no_stream. Else, the return 464 /// value will contain the stream returned by createIndexedStream(). 465 Expected<std::unique_ptr<MappedBlockStream>> 466 PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout, 467 BinaryStreamRef MsfData, 468 uint32_t StreamIndex) const { 469 if (StreamIndex >= getNumStreams()) 470 return make_error<RawError>(raw_error_code::no_stream); 471 return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex, 472 Allocator); 473 } 474