xref: /freebsd-src/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
100b57cec5SDimitry Andric 
110b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h"
120b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/Hash.h"
130b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
140b57cec5SDimitry Andric #include "llvm/Support/BinaryStreamWriter.h"
150b57cec5SDimitry Andric #include "llvm/Support/Endian.h"
16*5f757f3fSDimitry Andric #include "llvm/Support/TimeProfiler.h"
170b57cec5SDimitry Andric 
180b57cec5SDimitry Andric #include <map>
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric using namespace llvm;
210b57cec5SDimitry Andric using namespace llvm::msf;
220b57cec5SDimitry Andric using namespace llvm::support;
230b57cec5SDimitry Andric using namespace llvm::support::endian;
240b57cec5SDimitry Andric using namespace llvm::pdb;
250b57cec5SDimitry Andric 
StringTableHashTraits(PDBStringTableBuilder & Table)260b57cec5SDimitry Andric StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table)
270b57cec5SDimitry Andric     : Table(&Table) {}
280b57cec5SDimitry Andric 
hashLookupKey(StringRef S) const290b57cec5SDimitry Andric uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const {
300b57cec5SDimitry Andric   // The reference implementation doesn't include code for /src/headerblock
310b57cec5SDimitry Andric   // handling, but it can only read natvis entries lld's PDB files if
320b57cec5SDimitry Andric   // this hash function truncates the hash to 16 bit.
330b57cec5SDimitry Andric   // PDB/include/misc.h in the reference implementation has a hashSz() function
340b57cec5SDimitry Andric   // that returns an unsigned short, that seems what's being used for
350b57cec5SDimitry Andric   // /src/headerblock.
360b57cec5SDimitry Andric   return static_cast<uint16_t>(Table->getIdForString(S));
370b57cec5SDimitry Andric }
380b57cec5SDimitry Andric 
storageKeyToLookupKey(uint32_t Offset) const390b57cec5SDimitry Andric StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const {
400b57cec5SDimitry Andric   return Table->getStringForId(Offset);
410b57cec5SDimitry Andric }
420b57cec5SDimitry Andric 
lookupKeyToStorageKey(StringRef S)430b57cec5SDimitry Andric uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) {
440b57cec5SDimitry Andric   return Table->insert(S);
450b57cec5SDimitry Andric }
460b57cec5SDimitry Andric 
insert(StringRef S)470b57cec5SDimitry Andric uint32_t PDBStringTableBuilder::insert(StringRef S) {
480b57cec5SDimitry Andric   return Strings.insert(S);
490b57cec5SDimitry Andric }
500b57cec5SDimitry Andric 
getIdForString(StringRef S) const510b57cec5SDimitry Andric uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const {
520b57cec5SDimitry Andric   return Strings.getIdForString(S);
530b57cec5SDimitry Andric }
540b57cec5SDimitry Andric 
getStringForId(uint32_t Id) const550b57cec5SDimitry Andric StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const {
560b57cec5SDimitry Andric   return Strings.getStringForId(Id);
570b57cec5SDimitry Andric }
580b57cec5SDimitry Andric 
computeBucketCount(uint32_t NumStrings)590b57cec5SDimitry Andric static uint32_t computeBucketCount(uint32_t NumStrings) {
600b57cec5SDimitry Andric   // This is a precomputed list of Buckets given the specified number of
610b57cec5SDimitry Andric   // strings.  Matching the reference algorithm exactly is not strictly
620b57cec5SDimitry Andric   // necessary for correctness, but it helps when comparing LLD's PDBs with
630b57cec5SDimitry Andric   // Microsoft's PDBs so as to eliminate superfluous differences.
640b57cec5SDimitry Andric   // The reference implementation does (in nmt.h, NMT::grow()):
650b57cec5SDimitry Andric   //   unsigned StringCount = 0;
660b57cec5SDimitry Andric   //   unsigned BucketCount = 1;
670b57cec5SDimitry Andric   //   fn insert() {
680b57cec5SDimitry Andric   //     ++StringCount;
690b57cec5SDimitry Andric   //     if (BucketCount * 3 / 4 < StringCount)
700b57cec5SDimitry Andric   //       BucketCount = BucketCount * 3 / 2 + 1;
710b57cec5SDimitry Andric   //   }
720b57cec5SDimitry Andric   // This list contains all StringCount, BucketCount pairs where BucketCount was
730b57cec5SDimitry Andric   // just incremented.  It ends before the first BucketCount entry where
740b57cec5SDimitry Andric   // BucketCount * 3 would overflow a 32-bit unsigned int.
7581ad6265SDimitry Andric   static const std::pair<uint32_t, uint32_t> StringsToBuckets[] = {
760b57cec5SDimitry Andric       {0, 1},
770b57cec5SDimitry Andric       {1, 2},
780b57cec5SDimitry Andric       {2, 4},
790b57cec5SDimitry Andric       {4, 7},
800b57cec5SDimitry Andric       {6, 11},
810b57cec5SDimitry Andric       {9, 17},
820b57cec5SDimitry Andric       {13, 26},
830b57cec5SDimitry Andric       {20, 40},
840b57cec5SDimitry Andric       {31, 61},
850b57cec5SDimitry Andric       {46, 92},
860b57cec5SDimitry Andric       {70, 139},
870b57cec5SDimitry Andric       {105, 209},
880b57cec5SDimitry Andric       {157, 314},
890b57cec5SDimitry Andric       {236, 472},
900b57cec5SDimitry Andric       {355, 709},
910b57cec5SDimitry Andric       {532, 1064},
920b57cec5SDimitry Andric       {799, 1597},
930b57cec5SDimitry Andric       {1198, 2396},
940b57cec5SDimitry Andric       {1798, 3595},
950b57cec5SDimitry Andric       {2697, 5393},
960b57cec5SDimitry Andric       {4045, 8090},
970b57cec5SDimitry Andric       {6068, 12136},
980b57cec5SDimitry Andric       {9103, 18205},
990b57cec5SDimitry Andric       {13654, 27308},
1000b57cec5SDimitry Andric       {20482, 40963},
1010b57cec5SDimitry Andric       {30723, 61445},
1020b57cec5SDimitry Andric       {46084, 92168},
1030b57cec5SDimitry Andric       {69127, 138253},
1040b57cec5SDimitry Andric       {103690, 207380},
1050b57cec5SDimitry Andric       {155536, 311071},
1060b57cec5SDimitry Andric       {233304, 466607},
1070b57cec5SDimitry Andric       {349956, 699911},
1080b57cec5SDimitry Andric       {524934, 1049867},
1090b57cec5SDimitry Andric       {787401, 1574801},
1100b57cec5SDimitry Andric       {1181101, 2362202},
1110b57cec5SDimitry Andric       {1771652, 3543304},
1120b57cec5SDimitry Andric       {2657479, 5314957},
1130b57cec5SDimitry Andric       {3986218, 7972436},
1140b57cec5SDimitry Andric       {5979328, 11958655},
1150b57cec5SDimitry Andric       {8968992, 17937983},
1160b57cec5SDimitry Andric       {13453488, 26906975},
1170b57cec5SDimitry Andric       {20180232, 40360463},
1180b57cec5SDimitry Andric       {30270348, 60540695},
1190b57cec5SDimitry Andric       {45405522, 90811043},
1200b57cec5SDimitry Andric       {68108283, 136216565},
1210b57cec5SDimitry Andric       {102162424, 204324848},
1220b57cec5SDimitry Andric       {153243637, 306487273},
1230b57cec5SDimitry Andric       {229865455, 459730910},
1240b57cec5SDimitry Andric       {344798183, 689596366},
1250b57cec5SDimitry Andric       {517197275, 1034394550},
1260b57cec5SDimitry Andric       {775795913, 1551591826},
1270b57cec5SDimitry Andric       {1163693870, 2327387740}};
12881ad6265SDimitry Andric   const auto *Entry = llvm::lower_bound(
12981ad6265SDimitry Andric       StringsToBuckets, std::make_pair(NumStrings, 0U), llvm::less_first());
13081ad6265SDimitry Andric   assert(Entry != std::end(StringsToBuckets));
1310b57cec5SDimitry Andric   return Entry->second;
1320b57cec5SDimitry Andric }
1330b57cec5SDimitry Andric 
calculateHashTableSize() const1340b57cec5SDimitry Andric uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
1350b57cec5SDimitry Andric   uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
1360b57cec5SDimitry Andric   Size += sizeof(uint32_t) * computeBucketCount(Strings.size());
1370b57cec5SDimitry Andric 
1380b57cec5SDimitry Andric   return Size;
1390b57cec5SDimitry Andric }
1400b57cec5SDimitry Andric 
calculateSerializedSize() const1410b57cec5SDimitry Andric uint32_t PDBStringTableBuilder::calculateSerializedSize() const {
1420b57cec5SDimitry Andric   uint32_t Size = 0;
1430b57cec5SDimitry Andric   Size += sizeof(PDBStringTableHeader);
1440b57cec5SDimitry Andric   Size += Strings.calculateSerializedSize();
1450b57cec5SDimitry Andric   Size += calculateHashTableSize();
1460b57cec5SDimitry Andric   Size += sizeof(uint32_t); // The /names stream ends with the string count.
1470b57cec5SDimitry Andric   return Size;
1480b57cec5SDimitry Andric }
1490b57cec5SDimitry Andric 
setStrings(const codeview::DebugStringTableSubsection & Strings)1500b57cec5SDimitry Andric void PDBStringTableBuilder::setStrings(
1510b57cec5SDimitry Andric     const codeview::DebugStringTableSubsection &Strings) {
1520b57cec5SDimitry Andric   this->Strings = Strings;
1530b57cec5SDimitry Andric }
1540b57cec5SDimitry Andric 
writeHeader(BinaryStreamWriter & Writer) const1550b57cec5SDimitry Andric Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
1560b57cec5SDimitry Andric   // Write a header
1570b57cec5SDimitry Andric   PDBStringTableHeader H;
1580b57cec5SDimitry Andric   H.Signature = PDBStringTableSignature;
1590b57cec5SDimitry Andric   H.HashVersion = 1;
1600b57cec5SDimitry Andric   H.ByteSize = Strings.calculateSerializedSize();
1610b57cec5SDimitry Andric   if (auto EC = Writer.writeObject(H))
1620b57cec5SDimitry Andric     return EC;
1630b57cec5SDimitry Andric   assert(Writer.bytesRemaining() == 0);
1640b57cec5SDimitry Andric   return Error::success();
1650b57cec5SDimitry Andric }
1660b57cec5SDimitry Andric 
writeStrings(BinaryStreamWriter & Writer) const1670b57cec5SDimitry Andric Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const {
1680b57cec5SDimitry Andric   if (auto EC = Strings.commit(Writer))
1690b57cec5SDimitry Andric     return EC;
1700b57cec5SDimitry Andric 
1710b57cec5SDimitry Andric   assert(Writer.bytesRemaining() == 0);
1720b57cec5SDimitry Andric   return Error::success();
1730b57cec5SDimitry Andric }
1740b57cec5SDimitry Andric 
writeHashTable(BinaryStreamWriter & Writer) const1750b57cec5SDimitry Andric Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
1760b57cec5SDimitry Andric   // Write a hash table.
1770b57cec5SDimitry Andric   uint32_t BucketCount = computeBucketCount(Strings.size());
1780b57cec5SDimitry Andric   if (auto EC = Writer.writeInteger(BucketCount))
1790b57cec5SDimitry Andric     return EC;
1800b57cec5SDimitry Andric   std::vector<ulittle32_t> Buckets(BucketCount);
1810b57cec5SDimitry Andric 
182bdd1243dSDimitry Andric   for (const auto &Pair : Strings) {
1830b57cec5SDimitry Andric     StringRef S = Pair.getKey();
1840b57cec5SDimitry Andric     uint32_t Offset = Pair.getValue();
1850b57cec5SDimitry Andric     uint32_t Hash = hashStringV1(S);
1860b57cec5SDimitry Andric 
1870b57cec5SDimitry Andric     for (uint32_t I = 0; I != BucketCount; ++I) {
1880b57cec5SDimitry Andric       uint32_t Slot = (Hash + I) % BucketCount;
1890b57cec5SDimitry Andric       if (Buckets[Slot] != 0)
1900b57cec5SDimitry Andric         continue;
1910b57cec5SDimitry Andric       Buckets[Slot] = Offset;
1920b57cec5SDimitry Andric       break;
1930b57cec5SDimitry Andric     }
1940b57cec5SDimitry Andric   }
1950b57cec5SDimitry Andric 
1960b57cec5SDimitry Andric   if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
1970b57cec5SDimitry Andric     return EC;
1980b57cec5SDimitry Andric 
1990b57cec5SDimitry Andric   assert(Writer.bytesRemaining() == 0);
2000b57cec5SDimitry Andric   return Error::success();
2010b57cec5SDimitry Andric }
2020b57cec5SDimitry Andric 
writeEpilogue(BinaryStreamWriter & Writer) const2030b57cec5SDimitry Andric Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
2040b57cec5SDimitry Andric   if (auto EC = Writer.writeInteger<uint32_t>(Strings.size()))
2050b57cec5SDimitry Andric     return EC;
2060b57cec5SDimitry Andric   assert(Writer.bytesRemaining() == 0);
2070b57cec5SDimitry Andric   return Error::success();
2080b57cec5SDimitry Andric }
2090b57cec5SDimitry Andric 
commit(BinaryStreamWriter & Writer) const2100b57cec5SDimitry Andric Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
211*5f757f3fSDimitry Andric   llvm::TimeTraceScope timeScope("Commit strings table");
2120b57cec5SDimitry Andric   BinaryStreamWriter SectionWriter;
2130b57cec5SDimitry Andric 
2140b57cec5SDimitry Andric   std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
2150b57cec5SDimitry Andric   if (auto EC = writeHeader(SectionWriter))
2160b57cec5SDimitry Andric     return EC;
2170b57cec5SDimitry Andric 
2180b57cec5SDimitry Andric   std::tie(SectionWriter, Writer) =
2190b57cec5SDimitry Andric       Writer.split(Strings.calculateSerializedSize());
2200b57cec5SDimitry Andric   if (auto EC = writeStrings(SectionWriter))
2210b57cec5SDimitry Andric     return EC;
2220b57cec5SDimitry Andric 
2230b57cec5SDimitry Andric   std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize());
2240b57cec5SDimitry Andric   if (auto EC = writeHashTable(SectionWriter))
2250b57cec5SDimitry Andric     return EC;
2260b57cec5SDimitry Andric 
2270b57cec5SDimitry Andric   std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t));
2280b57cec5SDimitry Andric   if (auto EC = writeEpilogue(SectionWriter))
2290b57cec5SDimitry Andric     return EC;
2300b57cec5SDimitry Andric 
2310b57cec5SDimitry Andric   return Error::success();
2320b57cec5SDimitry Andric }
233