1 //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/DebugInfo/PDB/Native/PDBFile.h" 11 #include "llvm/ADT/ArrayRef.h" 12 #include "llvm/ADT/STLExtras.h" 13 #include "llvm/DebugInfo/MSF/MSFCommon.h" 14 #include "llvm/DebugInfo/MSF/MappedBlockStream.h" 15 #include "llvm/DebugInfo/PDB/Native/DbiStream.h" 16 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" 17 #include "llvm/DebugInfo/PDB/Native/InfoStream.h" 18 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" 19 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h" 20 #include "llvm/DebugInfo/PDB/Native/RawError.h" 21 #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" 22 #include "llvm/DebugInfo/PDB/Native/TpiStream.h" 23 #include "llvm/Support/BinaryStream.h" 24 #include "llvm/Support/BinaryStreamArray.h" 25 #include "llvm/Support/BinaryStreamReader.h" 26 #include "llvm/Support/Endian.h" 27 #include "llvm/Support/Error.h" 28 #include "llvm/Support/Path.h" 29 #include <algorithm> 30 #include <cassert> 31 #include <cstdint> 32 33 using namespace llvm; 34 using namespace llvm::codeview; 35 using namespace llvm::msf; 36 using namespace llvm::pdb; 37 38 namespace { 39 typedef FixedStreamArray<support::ulittle32_t> ulittle_array; 40 } // end anonymous namespace 41 42 PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer, 43 BumpPtrAllocator &Allocator) 44 : FilePath(Path), Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {} 45 46 PDBFile::~PDBFile() = default; 47 48 StringRef PDBFile::getFilePath() const { return FilePath; } 49 50 StringRef PDBFile::getFileDirectory() const { 51 return sys::path::parent_path(FilePath); 52 } 53 54 uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; } 55 56 uint32_t PDBFile::getFreeBlockMapBlock() const { 57 return ContainerLayout.SB->FreeBlockMapBlock; 58 } 59 60 uint32_t PDBFile::getBlockCount() const { 61 return ContainerLayout.SB->NumBlocks; 62 } 63 64 uint32_t PDBFile::getNumDirectoryBytes() const { 65 return ContainerLayout.SB->NumDirectoryBytes; 66 } 67 68 uint32_t PDBFile::getBlockMapIndex() const { 69 return ContainerLayout.SB->BlockMapAddr; 70 } 71 72 uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; } 73 74 uint32_t PDBFile::getNumDirectoryBlocks() const { 75 return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes, 76 ContainerLayout.SB->BlockSize); 77 } 78 79 uint64_t PDBFile::getBlockMapOffset() const { 80 return (uint64_t)ContainerLayout.SB->BlockMapAddr * 81 ContainerLayout.SB->BlockSize; 82 } 83 84 uint32_t PDBFile::getNumStreams() const { 85 return ContainerLayout.StreamSizes.size(); 86 } 87 88 uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const { 89 return ContainerLayout.StreamSizes[StreamIndex]; 90 } 91 92 ArrayRef<support::ulittle32_t> 93 PDBFile::getStreamBlockList(uint32_t StreamIndex) const { 94 return ContainerLayout.StreamMap[StreamIndex]; 95 } 96 97 uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); } 98 99 Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex, 100 uint32_t NumBytes) const { 101 uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize()); 102 103 ArrayRef<uint8_t> Result; 104 if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result)) 105 return std::move(EC); 106 return Result; 107 } 108 109 Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset, 110 ArrayRef<uint8_t> Data) const { 111 return make_error<RawError>(raw_error_code::not_writable, 112 "PDBFile is immutable"); 113 } 114 115 Error PDBFile::parseFileHeaders() { 116 BinaryStreamReader Reader(*Buffer); 117 118 // Initialize SB. 119 const msf::SuperBlock *SB = nullptr; 120 if (auto EC = Reader.readObject(SB)) { 121 consumeError(std::move(EC)); 122 return make_error<RawError>(raw_error_code::corrupt_file, 123 "Does not contain superblock"); 124 } 125 126 if (auto EC = msf::validateSuperBlock(*SB)) 127 return EC; 128 129 if (Buffer->getLength() % SB->BlockSize != 0) 130 return make_error<RawError>(raw_error_code::corrupt_file, 131 "File size is not a multiple of block size"); 132 ContainerLayout.SB = SB; 133 134 // Initialize Free Page Map. 135 ContainerLayout.FreePageMap.resize(SB->NumBlocks); 136 // The Fpm exists either at block 1 or block 2 of the MSF. However, this 137 // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and 138 // thusly an equal number of total blocks in the file. For a block size 139 // of 4KiB (very common), this would yield 32KiB total blocks in file, for a 140 // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so 141 // the Fpm is split across the file at `getBlockSize()` intervals. As a 142 // result, every block whose index is of the form |{1,2} + getBlockSize() * k| 143 // for any non-negative integer k is an Fpm block. In theory, we only really 144 // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but 145 // current versions of the MSF format already expect the Fpm to be arranged 146 // at getBlockSize() intervals, so we have to be compatible. 147 // See the function fpmPn() for more information: 148 // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489 149 auto FpmStream = 150 MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator); 151 BinaryStreamReader FpmReader(*FpmStream); 152 ArrayRef<uint8_t> FpmBytes; 153 if (auto EC = FpmReader.readBytes(FpmBytes, 154 msf::getFullFpmByteSize(ContainerLayout))) 155 return EC; 156 uint32_t BlocksRemaining = getBlockCount(); 157 uint32_t BI = 0; 158 for (auto Byte : FpmBytes) { 159 uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U); 160 for (uint32_t I = 0; I < BlocksThisByte; ++I) { 161 if (Byte & (1 << I)) 162 ContainerLayout.FreePageMap[BI] = true; 163 --BlocksRemaining; 164 ++BI; 165 } 166 } 167 168 Reader.setOffset(getBlockMapOffset()); 169 if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks, 170 getNumDirectoryBlocks())) 171 return EC; 172 173 return Error::success(); 174 } 175 176 Error PDBFile::parseStreamData() { 177 assert(ContainerLayout.SB); 178 if (DirectoryStream) 179 return Error::success(); 180 181 uint32_t NumStreams = 0; 182 183 // Normally you can't use a MappedBlockStream without having fully parsed the 184 // PDB file, because it accesses the directory and various other things, which 185 // is exactly what we are attempting to parse. By specifying a custom 186 // subclass of IPDBStreamData which only accesses the fields that have already 187 // been parsed, we can avoid this and reuse MappedBlockStream. 188 auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer, 189 Allocator); 190 BinaryStreamReader Reader(*DS); 191 if (auto EC = Reader.readInteger(NumStreams)) 192 return EC; 193 194 if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams)) 195 return EC; 196 for (uint32_t I = 0; I < NumStreams; ++I) { 197 uint32_t StreamSize = getStreamByteSize(I); 198 // FIXME: What does StreamSize ~0U mean? 199 uint64_t NumExpectedStreamBlocks = 200 StreamSize == UINT32_MAX 201 ? 0 202 : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize); 203 204 // For convenience, we store the block array contiguously. This is because 205 // if someone calls setStreamMap(), it is more convenient to be able to call 206 // it with an ArrayRef instead of setting up a StreamRef. Since the 207 // DirectoryStream is cached in the class and thus lives for the life of the 208 // class, we can be guaranteed that readArray() will return a stable 209 // reference, even if it has to allocate from its internal pool. 210 ArrayRef<support::ulittle32_t> Blocks; 211 if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks)) 212 return EC; 213 for (uint32_t Block : Blocks) { 214 uint64_t BlockEndOffset = 215 (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize; 216 if (BlockEndOffset > getFileSize()) 217 return make_error<RawError>(raw_error_code::corrupt_file, 218 "Stream block map is corrupt."); 219 } 220 ContainerLayout.StreamMap.push_back(Blocks); 221 } 222 223 // We should have read exactly SB->NumDirectoryBytes bytes. 224 assert(Reader.bytesRemaining() == 0); 225 DirectoryStream = std::move(DS); 226 return Error::success(); 227 } 228 229 ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const { 230 return ContainerLayout.DirectoryBlocks; 231 } 232 233 MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const { 234 MSFStreamLayout Result; 235 auto Blocks = getStreamBlockList(StreamIdx); 236 Result.Blocks.assign(Blocks.begin(), Blocks.end()); 237 Result.Length = getStreamByteSize(StreamIdx); 238 return Result; 239 } 240 241 Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() { 242 if (!Globals) { 243 auto DbiS = getPDBDbiStream(); 244 if (!DbiS) 245 return DbiS.takeError(); 246 247 auto GlobalS = safelyCreateIndexedStream( 248 ContainerLayout, *Buffer, DbiS->getGlobalSymbolStreamIndex()); 249 if (!GlobalS) 250 return GlobalS.takeError(); 251 auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(*GlobalS)); 252 if (auto EC = TempGlobals->reload()) 253 return std::move(EC); 254 Globals = std::move(TempGlobals); 255 } 256 return *Globals; 257 } 258 259 Expected<InfoStream &> PDBFile::getPDBInfoStream() { 260 if (!Info) { 261 auto InfoS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamPDB); 262 if (!InfoS) 263 return InfoS.takeError(); 264 auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS)); 265 if (auto EC = TempInfo->reload()) 266 return std::move(EC); 267 Info = std::move(TempInfo); 268 } 269 return *Info; 270 } 271 272 Expected<DbiStream &> PDBFile::getPDBDbiStream() { 273 if (!Dbi) { 274 auto DbiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamDBI); 275 if (!DbiS) 276 return DbiS.takeError(); 277 auto TempDbi = llvm::make_unique<DbiStream>(*this, std::move(*DbiS)); 278 if (auto EC = TempDbi->reload()) 279 return std::move(EC); 280 Dbi = std::move(TempDbi); 281 } 282 return *Dbi; 283 } 284 285 Expected<TpiStream &> PDBFile::getPDBTpiStream() { 286 if (!Tpi) { 287 auto TpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamTPI); 288 if (!TpiS) 289 return TpiS.takeError(); 290 auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS)); 291 if (auto EC = TempTpi->reload()) 292 return std::move(EC); 293 Tpi = std::move(TempTpi); 294 } 295 return *Tpi; 296 } 297 298 Expected<TpiStream &> PDBFile::getPDBIpiStream() { 299 if (!Ipi) { 300 auto IpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamIPI); 301 if (!IpiS) 302 return IpiS.takeError(); 303 auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS)); 304 if (auto EC = TempIpi->reload()) 305 return std::move(EC); 306 Ipi = std::move(TempIpi); 307 } 308 return *Ipi; 309 } 310 311 Expected<PublicsStream &> PDBFile::getPDBPublicsStream() { 312 if (!Publics) { 313 auto DbiS = getPDBDbiStream(); 314 if (!DbiS) 315 return DbiS.takeError(); 316 317 auto PublicS = safelyCreateIndexedStream( 318 ContainerLayout, *Buffer, DbiS->getPublicSymbolStreamIndex()); 319 if (!PublicS) 320 return PublicS.takeError(); 321 auto TempPublics = 322 llvm::make_unique<PublicsStream>(*this, std::move(*PublicS)); 323 if (auto EC = TempPublics->reload()) 324 return std::move(EC); 325 Publics = std::move(TempPublics); 326 } 327 return *Publics; 328 } 329 330 Expected<SymbolStream &> PDBFile::getPDBSymbolStream() { 331 if (!Symbols) { 332 auto DbiS = getPDBDbiStream(); 333 if (!DbiS) 334 return DbiS.takeError(); 335 336 uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex(); 337 auto SymbolS = 338 safelyCreateIndexedStream(ContainerLayout, *Buffer, SymbolStreamNum); 339 if (!SymbolS) 340 return SymbolS.takeError(); 341 342 auto TempSymbols = llvm::make_unique<SymbolStream>(std::move(*SymbolS)); 343 if (auto EC = TempSymbols->reload()) 344 return std::move(EC); 345 Symbols = std::move(TempSymbols); 346 } 347 return *Symbols; 348 } 349 350 Expected<PDBStringTable &> PDBFile::getStringTable() { 351 if (!Strings) { 352 auto IS = getPDBInfoStream(); 353 if (!IS) 354 return IS.takeError(); 355 356 uint32_t NameStreamIndex = IS->getNamedStreamIndex("/names"); 357 358 auto NS = 359 safelyCreateIndexedStream(ContainerLayout, *Buffer, NameStreamIndex); 360 if (!NS) 361 return NS.takeError(); 362 363 auto N = llvm::make_unique<PDBStringTable>(); 364 BinaryStreamReader Reader(**NS); 365 if (auto EC = N->reload(Reader)) 366 return std::move(EC); 367 assert(Reader.bytesRemaining() == 0); 368 StringTableStream = std::move(*NS); 369 Strings = std::move(N); 370 } 371 return *Strings; 372 } 373 374 uint32_t PDBFile::getPointerSize() { 375 auto DbiS = getPDBDbiStream(); 376 if (!DbiS) 377 return 0; 378 PDB_Machine Machine = DbiS->getMachineType(); 379 if (Machine == PDB_Machine::Amd64) 380 return 8; 381 return 4; 382 } 383 384 bool PDBFile::hasPDBDbiStream() const { return StreamDBI < getNumStreams(); } 385 386 bool PDBFile::hasPDBGlobalsStream() { 387 auto DbiS = getPDBDbiStream(); 388 if (!DbiS) { 389 consumeError(DbiS.takeError()); 390 return false; 391 } 392 393 return DbiS->getGlobalSymbolStreamIndex() < getNumStreams(); 394 } 395 396 bool PDBFile::hasPDBInfoStream() { return StreamPDB < getNumStreams(); } 397 398 bool PDBFile::hasPDBIpiStream() const { return StreamIPI < getNumStreams(); } 399 400 bool PDBFile::hasPDBPublicsStream() { 401 auto DbiS = getPDBDbiStream(); 402 if (!DbiS) { 403 consumeError(DbiS.takeError()); 404 return false; 405 } 406 return DbiS->getPublicSymbolStreamIndex() < getNumStreams(); 407 } 408 409 bool PDBFile::hasPDBSymbolStream() { 410 auto DbiS = getPDBDbiStream(); 411 if (!DbiS) 412 return false; 413 return DbiS->getSymRecordStreamIndex() < getNumStreams(); 414 } 415 416 bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); } 417 418 bool PDBFile::hasPDBStringTable() { 419 auto IS = getPDBInfoStream(); 420 if (!IS) 421 return false; 422 return IS->getNamedStreamIndex("/names") < getNumStreams(); 423 } 424 425 /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a 426 /// stream with that index actually exists. If it does not, the return value 427 /// will have an MSFError with code msf_error_code::no_stream. Else, the return 428 /// value will contain the stream returned by createIndexedStream(). 429 Expected<std::unique_ptr<MappedBlockStream>> 430 PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout, 431 BinaryStreamRef MsfData, 432 uint32_t StreamIndex) const { 433 if (StreamIndex >= getNumStreams()) 434 return make_error<RawError>(raw_error_code::no_stream); 435 return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex, 436 Allocator); 437 } 438