xref: /llvm-project/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp (revision c3d8eec9e9a96be8ab87dea895f5ee452347192d)
1 //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/DebugInfo/MSF/MSFCommon.h"
14 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
15 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
16 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
17 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
18 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
19 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
20 #include "llvm/DebugInfo/PDB/Native/RawError.h"
21 #include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
22 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
23 #include "llvm/Support/BinaryStream.h"
24 #include "llvm/Support/BinaryStreamArray.h"
25 #include "llvm/Support/BinaryStreamReader.h"
26 #include "llvm/Support/Endian.h"
27 #include "llvm/Support/Error.h"
28 #include "llvm/Support/Path.h"
29 #include <algorithm>
30 #include <cassert>
31 #include <cstdint>
32 
33 using namespace llvm;
34 using namespace llvm::codeview;
35 using namespace llvm::msf;
36 using namespace llvm::pdb;
37 
38 namespace {
39 typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
40 } // end anonymous namespace
41 
42 PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer,
43                  BumpPtrAllocator &Allocator)
44     : FilePath(Path), Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {}
45 
46 PDBFile::~PDBFile() = default;
47 
48 StringRef PDBFile::getFilePath() const { return FilePath; }
49 
50 StringRef PDBFile::getFileDirectory() const {
51   return sys::path::parent_path(FilePath);
52 }
53 
54 uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
55 
56 uint32_t PDBFile::getFreeBlockMapBlock() const {
57   return ContainerLayout.SB->FreeBlockMapBlock;
58 }
59 
60 uint32_t PDBFile::getBlockCount() const {
61   return ContainerLayout.SB->NumBlocks;
62 }
63 
64 uint32_t PDBFile::getNumDirectoryBytes() const {
65   return ContainerLayout.SB->NumDirectoryBytes;
66 }
67 
68 uint32_t PDBFile::getBlockMapIndex() const {
69   return ContainerLayout.SB->BlockMapAddr;
70 }
71 
72 uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }
73 
74 uint32_t PDBFile::getNumDirectoryBlocks() const {
75   return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes,
76                             ContainerLayout.SB->BlockSize);
77 }
78 
79 uint64_t PDBFile::getBlockMapOffset() const {
80   return (uint64_t)ContainerLayout.SB->BlockMapAddr *
81          ContainerLayout.SB->BlockSize;
82 }
83 
84 uint32_t PDBFile::getNumStreams() const {
85   return ContainerLayout.StreamSizes.size();
86 }
87 
88 uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {
89   return ContainerLayout.StreamSizes[StreamIndex];
90 }
91 
92 ArrayRef<support::ulittle32_t>
93 PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
94   return ContainerLayout.StreamMap[StreamIndex];
95 }
96 
97 uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); }
98 
99 Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,
100                                                   uint32_t NumBytes) const {
101   uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize());
102 
103   ArrayRef<uint8_t> Result;
104   if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result))
105     return std::move(EC);
106   return Result;
107 }
108 
109 Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,
110                             ArrayRef<uint8_t> Data) const {
111   return make_error<RawError>(raw_error_code::not_writable,
112                               "PDBFile is immutable");
113 }
114 
115 Error PDBFile::parseFileHeaders() {
116   BinaryStreamReader Reader(*Buffer);
117 
118   // Initialize SB.
119   const msf::SuperBlock *SB = nullptr;
120   if (auto EC = Reader.readObject(SB)) {
121     consumeError(std::move(EC));
122     return make_error<RawError>(raw_error_code::corrupt_file,
123                                 "Does not contain superblock");
124   }
125 
126   if (auto EC = msf::validateSuperBlock(*SB))
127     return EC;
128 
129   if (Buffer->getLength() % SB->BlockSize != 0)
130     return make_error<RawError>(raw_error_code::corrupt_file,
131                                 "File size is not a multiple of block size");
132   ContainerLayout.SB = SB;
133 
134   // Initialize Free Page Map.
135   ContainerLayout.FreePageMap.resize(SB->NumBlocks);
136   // The Fpm exists either at block 1 or block 2 of the MSF.  However, this
137   // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
138   // thusly an equal number of total blocks in the file.  For a block size
139   // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
140   // maximum file size of 32KiB * 4KiB = 128MiB.  Obviously this won't do, so
141   // the Fpm is split across the file at `getBlockSize()` intervals.  As a
142   // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
143   // for any non-negative integer k is an Fpm block.  In theory, we only really
144   // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
145   // current versions of the MSF format already expect the Fpm to be arranged
146   // at getBlockSize() intervals, so we have to be compatible.
147   // See the function fpmPn() for more information:
148   // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
149   auto FpmStream =
150       MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator);
151   BinaryStreamReader FpmReader(*FpmStream);
152   ArrayRef<uint8_t> FpmBytes;
153   if (auto EC = FpmReader.readBytes(FpmBytes,
154                                     msf::getFullFpmByteSize(ContainerLayout)))
155     return EC;
156   uint32_t BlocksRemaining = getBlockCount();
157   uint32_t BI = 0;
158   for (auto Byte : FpmBytes) {
159     uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U);
160     for (uint32_t I = 0; I < BlocksThisByte; ++I) {
161       if (Byte & (1 << I))
162         ContainerLayout.FreePageMap[BI] = true;
163       --BlocksRemaining;
164       ++BI;
165     }
166   }
167 
168   Reader.setOffset(getBlockMapOffset());
169   if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,
170                                  getNumDirectoryBlocks()))
171     return EC;
172 
173   return Error::success();
174 }
175 
176 Error PDBFile::parseStreamData() {
177   assert(ContainerLayout.SB);
178   if (DirectoryStream)
179     return Error::success();
180 
181   uint32_t NumStreams = 0;
182 
183   // Normally you can't use a MappedBlockStream without having fully parsed the
184   // PDB file, because it accesses the directory and various other things, which
185   // is exactly what we are attempting to parse.  By specifying a custom
186   // subclass of IPDBStreamData which only accesses the fields that have already
187   // been parsed, we can avoid this and reuse MappedBlockStream.
188   auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer,
189                                                      Allocator);
190   BinaryStreamReader Reader(*DS);
191   if (auto EC = Reader.readInteger(NumStreams))
192     return EC;
193 
194   if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams))
195     return EC;
196   for (uint32_t I = 0; I < NumStreams; ++I) {
197     uint32_t StreamSize = getStreamByteSize(I);
198     // FIXME: What does StreamSize ~0U mean?
199     uint64_t NumExpectedStreamBlocks =
200         StreamSize == UINT32_MAX
201             ? 0
202             : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize);
203 
204     // For convenience, we store the block array contiguously.  This is because
205     // if someone calls setStreamMap(), it is more convenient to be able to call
206     // it with an ArrayRef instead of setting up a StreamRef.  Since the
207     // DirectoryStream is cached in the class and thus lives for the life of the
208     // class, we can be guaranteed that readArray() will return a stable
209     // reference, even if it has to allocate from its internal pool.
210     ArrayRef<support::ulittle32_t> Blocks;
211     if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))
212       return EC;
213     for (uint32_t Block : Blocks) {
214       uint64_t BlockEndOffset =
215           (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;
216       if (BlockEndOffset > getFileSize())
217         return make_error<RawError>(raw_error_code::corrupt_file,
218                                     "Stream block map is corrupt.");
219     }
220     ContainerLayout.StreamMap.push_back(Blocks);
221   }
222 
223   // We should have read exactly SB->NumDirectoryBytes bytes.
224   assert(Reader.bytesRemaining() == 0);
225   DirectoryStream = std::move(DS);
226   return Error::success();
227 }
228 
229 ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
230   return ContainerLayout.DirectoryBlocks;
231 }
232 
233 MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const {
234   MSFStreamLayout Result;
235   auto Blocks = getStreamBlockList(StreamIdx);
236   Result.Blocks.assign(Blocks.begin(), Blocks.end());
237   Result.Length = getStreamByteSize(StreamIdx);
238   return Result;
239 }
240 
241 msf::MSFStreamLayout PDBFile::getFpmStreamLayout() const {
242   return msf::getFpmStreamLayout(ContainerLayout);
243 }
244 
245 Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
246   if (!Globals) {
247     auto DbiS = getPDBDbiStream();
248     if (!DbiS)
249       return DbiS.takeError();
250 
251     auto GlobalS = safelyCreateIndexedStream(
252         ContainerLayout, *Buffer, DbiS->getGlobalSymbolStreamIndex());
253     if (!GlobalS)
254       return GlobalS.takeError();
255     auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(*GlobalS));
256     if (auto EC = TempGlobals->reload())
257       return std::move(EC);
258     Globals = std::move(TempGlobals);
259   }
260   return *Globals;
261 }
262 
263 Expected<InfoStream &> PDBFile::getPDBInfoStream() {
264   if (!Info) {
265     auto InfoS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamPDB);
266     if (!InfoS)
267       return InfoS.takeError();
268     auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS));
269     if (auto EC = TempInfo->reload())
270       return std::move(EC);
271     Info = std::move(TempInfo);
272   }
273   return *Info;
274 }
275 
276 Expected<DbiStream &> PDBFile::getPDBDbiStream() {
277   if (!Dbi) {
278     auto DbiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamDBI);
279     if (!DbiS)
280       return DbiS.takeError();
281     auto TempDbi = llvm::make_unique<DbiStream>(*this, std::move(*DbiS));
282     if (auto EC = TempDbi->reload())
283       return std::move(EC);
284     Dbi = std::move(TempDbi);
285   }
286   return *Dbi;
287 }
288 
289 Expected<TpiStream &> PDBFile::getPDBTpiStream() {
290   if (!Tpi) {
291     auto TpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamTPI);
292     if (!TpiS)
293       return TpiS.takeError();
294     auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS));
295     if (auto EC = TempTpi->reload())
296       return std::move(EC);
297     Tpi = std::move(TempTpi);
298   }
299   return *Tpi;
300 }
301 
302 Expected<TpiStream &> PDBFile::getPDBIpiStream() {
303   if (!Ipi) {
304     auto IpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamIPI);
305     if (!IpiS)
306       return IpiS.takeError();
307     auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS));
308     if (auto EC = TempIpi->reload())
309       return std::move(EC);
310     Ipi = std::move(TempIpi);
311   }
312   return *Ipi;
313 }
314 
315 Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
316   if (!Publics) {
317     auto DbiS = getPDBDbiStream();
318     if (!DbiS)
319       return DbiS.takeError();
320 
321     auto PublicS = safelyCreateIndexedStream(
322         ContainerLayout, *Buffer, DbiS->getPublicSymbolStreamIndex());
323     if (!PublicS)
324       return PublicS.takeError();
325     auto TempPublics = llvm::make_unique<PublicsStream>(std::move(*PublicS));
326     if (auto EC = TempPublics->reload())
327       return std::move(EC);
328     Publics = std::move(TempPublics);
329   }
330   return *Publics;
331 }
332 
333 Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
334   if (!Symbols) {
335     auto DbiS = getPDBDbiStream();
336     if (!DbiS)
337       return DbiS.takeError();
338 
339     uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
340     auto SymbolS =
341         safelyCreateIndexedStream(ContainerLayout, *Buffer, SymbolStreamNum);
342     if (!SymbolS)
343       return SymbolS.takeError();
344 
345     auto TempSymbols = llvm::make_unique<SymbolStream>(std::move(*SymbolS));
346     if (auto EC = TempSymbols->reload())
347       return std::move(EC);
348     Symbols = std::move(TempSymbols);
349   }
350   return *Symbols;
351 }
352 
353 Expected<PDBStringTable &> PDBFile::getStringTable() {
354   if (!Strings) {
355     auto IS = getPDBInfoStream();
356     if (!IS)
357       return IS.takeError();
358 
359     uint32_t NameStreamIndex = IS->getNamedStreamIndex("/names");
360 
361     auto NS =
362         safelyCreateIndexedStream(ContainerLayout, *Buffer, NameStreamIndex);
363     if (!NS)
364       return NS.takeError();
365 
366     auto N = llvm::make_unique<PDBStringTable>();
367     BinaryStreamReader Reader(**NS);
368     if (auto EC = N->reload(Reader))
369       return std::move(EC);
370     assert(Reader.bytesRemaining() == 0);
371     StringTableStream = std::move(*NS);
372     Strings = std::move(N);
373   }
374   return *Strings;
375 }
376 
377 uint32_t PDBFile::getPointerSize() {
378   auto DbiS = getPDBDbiStream();
379   if (!DbiS)
380     return 0;
381   PDB_Machine Machine = DbiS->getMachineType();
382   if (Machine == PDB_Machine::Amd64)
383     return 8;
384   return 4;
385 }
386 
387 bool PDBFile::hasPDBDbiStream() const { return StreamDBI < getNumStreams(); }
388 
389 bool PDBFile::hasPDBGlobalsStream() {
390   auto DbiS = getPDBDbiStream();
391   if (!DbiS) {
392     consumeError(DbiS.takeError());
393     return false;
394   }
395 
396   return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
397 }
398 
399 bool PDBFile::hasPDBInfoStream() { return StreamPDB < getNumStreams(); }
400 
401 bool PDBFile::hasPDBIpiStream() const { return StreamIPI < getNumStreams(); }
402 
403 bool PDBFile::hasPDBPublicsStream() {
404   auto DbiS = getPDBDbiStream();
405   if (!DbiS) {
406     consumeError(DbiS.takeError());
407     return false;
408   }
409   return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
410 }
411 
412 bool PDBFile::hasPDBSymbolStream() {
413   auto DbiS = getPDBDbiStream();
414   if (!DbiS)
415     return false;
416   return DbiS->getSymRecordStreamIndex() < getNumStreams();
417 }
418 
419 bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }
420 
421 bool PDBFile::hasPDBStringTable() {
422   auto IS = getPDBInfoStream();
423   if (!IS)
424     return false;
425   return IS->getNamedStreamIndex("/names") < getNumStreams();
426 }
427 
428 /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
429 /// stream with that index actually exists.  If it does not, the return value
430 /// will have an MSFError with code msf_error_code::no_stream.  Else, the return
431 /// value will contain the stream returned by createIndexedStream().
432 Expected<std::unique_ptr<MappedBlockStream>>
433 PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout,
434                                    BinaryStreamRef MsfData,
435                                    uint32_t StreamIndex) const {
436   if (StreamIndex >= getNumStreams())
437     return make_error<RawError>(raw_error_code::no_stream);
438   return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex,
439                                                 Allocator);
440 }
441