xref: /llvm-project/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp (revision 2946cd701067404b99c39fb29dc9c74bd7193eb3)
1 //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/DebugInfo/MSF/MSFCommon.h"
13 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
14 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
15 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
16 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
17 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
18 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
19 #include "llvm/DebugInfo/PDB/Native/RawError.h"
20 #include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
21 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
22 #include "llvm/Support/BinaryStream.h"
23 #include "llvm/Support/BinaryStreamArray.h"
24 #include "llvm/Support/BinaryStreamReader.h"
25 #include "llvm/Support/Endian.h"
26 #include "llvm/Support/Error.h"
27 #include "llvm/Support/Path.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstdint>
31 
32 using namespace llvm;
33 using namespace llvm::codeview;
34 using namespace llvm::msf;
35 using namespace llvm::pdb;
36 
37 namespace {
38 typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
39 } // end anonymous namespace
40 
41 PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer,
42                  BumpPtrAllocator &Allocator)
43     : FilePath(Path), Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {}
44 
45 PDBFile::~PDBFile() = default;
46 
47 StringRef PDBFile::getFilePath() const { return FilePath; }
48 
49 StringRef PDBFile::getFileDirectory() const {
50   return sys::path::parent_path(FilePath);
51 }
52 
53 uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
54 
55 uint32_t PDBFile::getFreeBlockMapBlock() const {
56   return ContainerLayout.SB->FreeBlockMapBlock;
57 }
58 
59 uint32_t PDBFile::getBlockCount() const {
60   return ContainerLayout.SB->NumBlocks;
61 }
62 
63 uint32_t PDBFile::getNumDirectoryBytes() const {
64   return ContainerLayout.SB->NumDirectoryBytes;
65 }
66 
67 uint32_t PDBFile::getBlockMapIndex() const {
68   return ContainerLayout.SB->BlockMapAddr;
69 }
70 
71 uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }
72 
73 uint32_t PDBFile::getNumDirectoryBlocks() const {
74   return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes,
75                             ContainerLayout.SB->BlockSize);
76 }
77 
78 uint64_t PDBFile::getBlockMapOffset() const {
79   return (uint64_t)ContainerLayout.SB->BlockMapAddr *
80          ContainerLayout.SB->BlockSize;
81 }
82 
83 uint32_t PDBFile::getNumStreams() const {
84   return ContainerLayout.StreamSizes.size();
85 }
86 
87 uint32_t PDBFile::getMaxStreamSize() const {
88   return *std::max_element(ContainerLayout.StreamSizes.begin(),
89                            ContainerLayout.StreamSizes.end());
90 }
91 
92 uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {
93   return ContainerLayout.StreamSizes[StreamIndex];
94 }
95 
96 ArrayRef<support::ulittle32_t>
97 PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
98   return ContainerLayout.StreamMap[StreamIndex];
99 }
100 
101 uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); }
102 
103 Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,
104                                                   uint32_t NumBytes) const {
105   uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize());
106 
107   ArrayRef<uint8_t> Result;
108   if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result))
109     return std::move(EC);
110   return Result;
111 }
112 
113 Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,
114                             ArrayRef<uint8_t> Data) const {
115   return make_error<RawError>(raw_error_code::not_writable,
116                               "PDBFile is immutable");
117 }
118 
119 Error PDBFile::parseFileHeaders() {
120   BinaryStreamReader Reader(*Buffer);
121 
122   // Initialize SB.
123   const msf::SuperBlock *SB = nullptr;
124   if (auto EC = Reader.readObject(SB)) {
125     consumeError(std::move(EC));
126     return make_error<RawError>(raw_error_code::corrupt_file,
127                                 "MSF superblock is missing");
128   }
129 
130   if (auto EC = msf::validateSuperBlock(*SB))
131     return EC;
132 
133   if (Buffer->getLength() % SB->BlockSize != 0)
134     return make_error<RawError>(raw_error_code::corrupt_file,
135                                 "File size is not a multiple of block size");
136   ContainerLayout.SB = SB;
137 
138   // Initialize Free Page Map.
139   ContainerLayout.FreePageMap.resize(SB->NumBlocks);
140   // The Fpm exists either at block 1 or block 2 of the MSF.  However, this
141   // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
142   // thusly an equal number of total blocks in the file.  For a block size
143   // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
144   // maximum file size of 32KiB * 4KiB = 128MiB.  Obviously this won't do, so
145   // the Fpm is split across the file at `getBlockSize()` intervals.  As a
146   // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
147   // for any non-negative integer k is an Fpm block.  In theory, we only really
148   // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
149   // current versions of the MSF format already expect the Fpm to be arranged
150   // at getBlockSize() intervals, so we have to be compatible.
151   // See the function fpmPn() for more information:
152   // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
153   auto FpmStream =
154       MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator);
155   BinaryStreamReader FpmReader(*FpmStream);
156   ArrayRef<uint8_t> FpmBytes;
157   if (auto EC = FpmReader.readBytes(FpmBytes, FpmReader.bytesRemaining()))
158     return EC;
159   uint32_t BlocksRemaining = getBlockCount();
160   uint32_t BI = 0;
161   for (auto Byte : FpmBytes) {
162     uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U);
163     for (uint32_t I = 0; I < BlocksThisByte; ++I) {
164       if (Byte & (1 << I))
165         ContainerLayout.FreePageMap[BI] = true;
166       --BlocksRemaining;
167       ++BI;
168     }
169   }
170 
171   Reader.setOffset(getBlockMapOffset());
172   if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,
173                                  getNumDirectoryBlocks()))
174     return EC;
175 
176   return Error::success();
177 }
178 
179 Error PDBFile::parseStreamData() {
180   assert(ContainerLayout.SB);
181   if (DirectoryStream)
182     return Error::success();
183 
184   uint32_t NumStreams = 0;
185 
186   // Normally you can't use a MappedBlockStream without having fully parsed the
187   // PDB file, because it accesses the directory and various other things, which
188   // is exactly what we are attempting to parse.  By specifying a custom
189   // subclass of IPDBStreamData which only accesses the fields that have already
190   // been parsed, we can avoid this and reuse MappedBlockStream.
191   auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer,
192                                                      Allocator);
193   BinaryStreamReader Reader(*DS);
194   if (auto EC = Reader.readInteger(NumStreams))
195     return EC;
196 
197   if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams))
198     return EC;
199   for (uint32_t I = 0; I < NumStreams; ++I) {
200     uint32_t StreamSize = getStreamByteSize(I);
201     // FIXME: What does StreamSize ~0U mean?
202     uint64_t NumExpectedStreamBlocks =
203         StreamSize == UINT32_MAX
204             ? 0
205             : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize);
206 
207     // For convenience, we store the block array contiguously.  This is because
208     // if someone calls setStreamMap(), it is more convenient to be able to call
209     // it with an ArrayRef instead of setting up a StreamRef.  Since the
210     // DirectoryStream is cached in the class and thus lives for the life of the
211     // class, we can be guaranteed that readArray() will return a stable
212     // reference, even if it has to allocate from its internal pool.
213     ArrayRef<support::ulittle32_t> Blocks;
214     if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))
215       return EC;
216     for (uint32_t Block : Blocks) {
217       uint64_t BlockEndOffset =
218           (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;
219       if (BlockEndOffset > getFileSize())
220         return make_error<RawError>(raw_error_code::corrupt_file,
221                                     "Stream block map is corrupt.");
222     }
223     ContainerLayout.StreamMap.push_back(Blocks);
224   }
225 
226   // We should have read exactly SB->NumDirectoryBytes bytes.
227   assert(Reader.bytesRemaining() == 0);
228   DirectoryStream = std::move(DS);
229   return Error::success();
230 }
231 
232 ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
233   return ContainerLayout.DirectoryBlocks;
234 }
235 
236 std::unique_ptr<MappedBlockStream> PDBFile::createIndexedStream(uint16_t SN) {
237   if (SN == kInvalidStreamIndex)
238     return nullptr;
239   return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN,
240                                                 Allocator);
241 }
242 
243 MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const {
244   MSFStreamLayout Result;
245   auto Blocks = getStreamBlockList(StreamIdx);
246   Result.Blocks.assign(Blocks.begin(), Blocks.end());
247   Result.Length = getStreamByteSize(StreamIdx);
248   return Result;
249 }
250 
251 msf::MSFStreamLayout PDBFile::getFpmStreamLayout() const {
252   return msf::getFpmStreamLayout(ContainerLayout);
253 }
254 
255 Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
256   if (!Globals) {
257     auto DbiS = getPDBDbiStream();
258     if (!DbiS)
259       return DbiS.takeError();
260 
261     auto GlobalS = safelyCreateIndexedStream(
262         ContainerLayout, *Buffer, DbiS->getGlobalSymbolStreamIndex());
263     if (!GlobalS)
264       return GlobalS.takeError();
265     auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(*GlobalS));
266     if (auto EC = TempGlobals->reload())
267       return std::move(EC);
268     Globals = std::move(TempGlobals);
269   }
270   return *Globals;
271 }
272 
273 Expected<InfoStream &> PDBFile::getPDBInfoStream() {
274   if (!Info) {
275     auto InfoS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamPDB);
276     if (!InfoS)
277       return InfoS.takeError();
278     auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS));
279     if (auto EC = TempInfo->reload())
280       return std::move(EC);
281     Info = std::move(TempInfo);
282   }
283   return *Info;
284 }
285 
286 Expected<DbiStream &> PDBFile::getPDBDbiStream() {
287   if (!Dbi) {
288     auto DbiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamDBI);
289     if (!DbiS)
290       return DbiS.takeError();
291     auto TempDbi = llvm::make_unique<DbiStream>(std::move(*DbiS));
292     if (auto EC = TempDbi->reload(this))
293       return std::move(EC);
294     Dbi = std::move(TempDbi);
295   }
296   return *Dbi;
297 }
298 
299 Expected<TpiStream &> PDBFile::getPDBTpiStream() {
300   if (!Tpi) {
301     auto TpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamTPI);
302     if (!TpiS)
303       return TpiS.takeError();
304     auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS));
305     if (auto EC = TempTpi->reload())
306       return std::move(EC);
307     Tpi = std::move(TempTpi);
308   }
309   return *Tpi;
310 }
311 
312 Expected<TpiStream &> PDBFile::getPDBIpiStream() {
313   if (!Ipi) {
314     if (!hasPDBIpiStream())
315       return make_error<RawError>(raw_error_code::no_stream);
316 
317     auto IpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamIPI);
318     if (!IpiS)
319       return IpiS.takeError();
320     auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS));
321     if (auto EC = TempIpi->reload())
322       return std::move(EC);
323     Ipi = std::move(TempIpi);
324   }
325   return *Ipi;
326 }
327 
328 Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
329   if (!Publics) {
330     auto DbiS = getPDBDbiStream();
331     if (!DbiS)
332       return DbiS.takeError();
333 
334     auto PublicS = safelyCreateIndexedStream(
335         ContainerLayout, *Buffer, DbiS->getPublicSymbolStreamIndex());
336     if (!PublicS)
337       return PublicS.takeError();
338     auto TempPublics = llvm::make_unique<PublicsStream>(std::move(*PublicS));
339     if (auto EC = TempPublics->reload())
340       return std::move(EC);
341     Publics = std::move(TempPublics);
342   }
343   return *Publics;
344 }
345 
346 Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
347   if (!Symbols) {
348     auto DbiS = getPDBDbiStream();
349     if (!DbiS)
350       return DbiS.takeError();
351 
352     uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
353     auto SymbolS =
354         safelyCreateIndexedStream(ContainerLayout, *Buffer, SymbolStreamNum);
355     if (!SymbolS)
356       return SymbolS.takeError();
357 
358     auto TempSymbols = llvm::make_unique<SymbolStream>(std::move(*SymbolS));
359     if (auto EC = TempSymbols->reload())
360       return std::move(EC);
361     Symbols = std::move(TempSymbols);
362   }
363   return *Symbols;
364 }
365 
366 Expected<PDBStringTable &> PDBFile::getStringTable() {
367   if (!Strings) {
368     auto IS = getPDBInfoStream();
369     if (!IS)
370       return IS.takeError();
371 
372     Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
373     if (!ExpectedNSI)
374       return ExpectedNSI.takeError();
375     uint32_t NameStreamIndex = *ExpectedNSI;
376 
377     auto NS =
378         safelyCreateIndexedStream(ContainerLayout, *Buffer, NameStreamIndex);
379     if (!NS)
380       return NS.takeError();
381 
382     auto N = llvm::make_unique<PDBStringTable>();
383     BinaryStreamReader Reader(**NS);
384     if (auto EC = N->reload(Reader))
385       return std::move(EC);
386     assert(Reader.bytesRemaining() == 0);
387     StringTableStream = std::move(*NS);
388     Strings = std::move(N);
389   }
390   return *Strings;
391 }
392 
393 uint32_t PDBFile::getPointerSize() {
394   auto DbiS = getPDBDbiStream();
395   if (!DbiS)
396     return 0;
397   PDB_Machine Machine = DbiS->getMachineType();
398   if (Machine == PDB_Machine::Amd64)
399     return 8;
400   return 4;
401 }
402 
403 bool PDBFile::hasPDBDbiStream() const {
404   return StreamDBI < getNumStreams() && getStreamByteSize(StreamDBI) > 0;
405 }
406 
407 bool PDBFile::hasPDBGlobalsStream() {
408   auto DbiS = getPDBDbiStream();
409   if (!DbiS) {
410     consumeError(DbiS.takeError());
411     return false;
412   }
413 
414   return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
415 }
416 
417 bool PDBFile::hasPDBInfoStream() const { return StreamPDB < getNumStreams(); }
418 
419 bool PDBFile::hasPDBIpiStream() const {
420   if (!hasPDBInfoStream())
421     return false;
422 
423   if (StreamIPI >= getNumStreams())
424     return false;
425 
426   auto &InfoStream = cantFail(const_cast<PDBFile *>(this)->getPDBInfoStream());
427   return InfoStream.containsIdStream();
428 }
429 
430 bool PDBFile::hasPDBPublicsStream() {
431   auto DbiS = getPDBDbiStream();
432   if (!DbiS) {
433     consumeError(DbiS.takeError());
434     return false;
435   }
436   return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
437 }
438 
439 bool PDBFile::hasPDBSymbolStream() {
440   auto DbiS = getPDBDbiStream();
441   if (!DbiS)
442     return false;
443   return DbiS->getSymRecordStreamIndex() < getNumStreams();
444 }
445 
446 bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }
447 
448 bool PDBFile::hasPDBStringTable() {
449   auto IS = getPDBInfoStream();
450   if (!IS)
451     return false;
452   Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
453   if (!ExpectedNSI) {
454     consumeError(ExpectedNSI.takeError());
455     return false;
456   }
457   assert(*ExpectedNSI < getNumStreams());
458   return true;
459 }
460 
461 /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
462 /// stream with that index actually exists.  If it does not, the return value
463 /// will have an MSFError with code msf_error_code::no_stream.  Else, the return
464 /// value will contain the stream returned by createIndexedStream().
465 Expected<std::unique_ptr<MappedBlockStream>>
466 PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout,
467                                    BinaryStreamRef MsfData,
468                                    uint32_t StreamIndex) const {
469   if (StreamIndex >= getNumStreams())
470     return make_error<RawError>(raw_error_code::no_stream);
471   return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex,
472                                                 Allocator);
473 }
474