xref: /openbsd-src/gnu/llvm/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp (revision d415bd752c734aee168c4ee86ff32e8cc249eb16)
109467b48Spatrick //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick 
909467b48Spatrick #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
1009467b48Spatrick 
1109467b48Spatrick #include "llvm/ADT/ArrayRef.h"
1209467b48Spatrick #include "llvm/DebugInfo/PDB/Native/Hash.h"
1309467b48Spatrick #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
1409467b48Spatrick #include "llvm/Support/BinaryStreamWriter.h"
1509467b48Spatrick #include "llvm/Support/Endian.h"
1609467b48Spatrick 
1709467b48Spatrick #include <map>
1809467b48Spatrick 
1909467b48Spatrick using namespace llvm;
2009467b48Spatrick using namespace llvm::msf;
2109467b48Spatrick using namespace llvm::support;
2209467b48Spatrick using namespace llvm::support::endian;
2309467b48Spatrick using namespace llvm::pdb;
2409467b48Spatrick 
StringTableHashTraits(PDBStringTableBuilder & Table)2509467b48Spatrick StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table)
2609467b48Spatrick     : Table(&Table) {}
2709467b48Spatrick 
hashLookupKey(StringRef S) const2809467b48Spatrick uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const {
2909467b48Spatrick   // The reference implementation doesn't include code for /src/headerblock
3009467b48Spatrick   // handling, but it can only read natvis entries lld's PDB files if
3109467b48Spatrick   // this hash function truncates the hash to 16 bit.
3209467b48Spatrick   // PDB/include/misc.h in the reference implementation has a hashSz() function
3309467b48Spatrick   // that returns an unsigned short, that seems what's being used for
3409467b48Spatrick   // /src/headerblock.
3509467b48Spatrick   return static_cast<uint16_t>(Table->getIdForString(S));
3609467b48Spatrick }
3709467b48Spatrick 
storageKeyToLookupKey(uint32_t Offset) const3809467b48Spatrick StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const {
3909467b48Spatrick   return Table->getStringForId(Offset);
4009467b48Spatrick }
4109467b48Spatrick 
lookupKeyToStorageKey(StringRef S)4209467b48Spatrick uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) {
4309467b48Spatrick   return Table->insert(S);
4409467b48Spatrick }
4509467b48Spatrick 
insert(StringRef S)4609467b48Spatrick uint32_t PDBStringTableBuilder::insert(StringRef S) {
4709467b48Spatrick   return Strings.insert(S);
4809467b48Spatrick }
4909467b48Spatrick 
getIdForString(StringRef S) const5009467b48Spatrick uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const {
5109467b48Spatrick   return Strings.getIdForString(S);
5209467b48Spatrick }
5309467b48Spatrick 
getStringForId(uint32_t Id) const5409467b48Spatrick StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const {
5509467b48Spatrick   return Strings.getStringForId(Id);
5609467b48Spatrick }
5709467b48Spatrick 
computeBucketCount(uint32_t NumStrings)5809467b48Spatrick static uint32_t computeBucketCount(uint32_t NumStrings) {
5909467b48Spatrick   // This is a precomputed list of Buckets given the specified number of
6009467b48Spatrick   // strings.  Matching the reference algorithm exactly is not strictly
6109467b48Spatrick   // necessary for correctness, but it helps when comparing LLD's PDBs with
6209467b48Spatrick   // Microsoft's PDBs so as to eliminate superfluous differences.
6309467b48Spatrick   // The reference implementation does (in nmt.h, NMT::grow()):
6409467b48Spatrick   //   unsigned StringCount = 0;
6509467b48Spatrick   //   unsigned BucketCount = 1;
6609467b48Spatrick   //   fn insert() {
6709467b48Spatrick   //     ++StringCount;
6809467b48Spatrick   //     if (BucketCount * 3 / 4 < StringCount)
6909467b48Spatrick   //       BucketCount = BucketCount * 3 / 2 + 1;
7009467b48Spatrick   //   }
7109467b48Spatrick   // This list contains all StringCount, BucketCount pairs where BucketCount was
7209467b48Spatrick   // just incremented.  It ends before the first BucketCount entry where
7309467b48Spatrick   // BucketCount * 3 would overflow a 32-bit unsigned int.
74*d415bd75Srobert   static const std::pair<uint32_t, uint32_t> StringsToBuckets[] = {
7509467b48Spatrick       {0, 1},
7609467b48Spatrick       {1, 2},
7709467b48Spatrick       {2, 4},
7809467b48Spatrick       {4, 7},
7909467b48Spatrick       {6, 11},
8009467b48Spatrick       {9, 17},
8109467b48Spatrick       {13, 26},
8209467b48Spatrick       {20, 40},
8309467b48Spatrick       {31, 61},
8409467b48Spatrick       {46, 92},
8509467b48Spatrick       {70, 139},
8609467b48Spatrick       {105, 209},
8709467b48Spatrick       {157, 314},
8809467b48Spatrick       {236, 472},
8909467b48Spatrick       {355, 709},
9009467b48Spatrick       {532, 1064},
9109467b48Spatrick       {799, 1597},
9209467b48Spatrick       {1198, 2396},
9309467b48Spatrick       {1798, 3595},
9409467b48Spatrick       {2697, 5393},
9509467b48Spatrick       {4045, 8090},
9609467b48Spatrick       {6068, 12136},
9709467b48Spatrick       {9103, 18205},
9809467b48Spatrick       {13654, 27308},
9909467b48Spatrick       {20482, 40963},
10009467b48Spatrick       {30723, 61445},
10109467b48Spatrick       {46084, 92168},
10209467b48Spatrick       {69127, 138253},
10309467b48Spatrick       {103690, 207380},
10409467b48Spatrick       {155536, 311071},
10509467b48Spatrick       {233304, 466607},
10609467b48Spatrick       {349956, 699911},
10709467b48Spatrick       {524934, 1049867},
10809467b48Spatrick       {787401, 1574801},
10909467b48Spatrick       {1181101, 2362202},
11009467b48Spatrick       {1771652, 3543304},
11109467b48Spatrick       {2657479, 5314957},
11209467b48Spatrick       {3986218, 7972436},
11309467b48Spatrick       {5979328, 11958655},
11409467b48Spatrick       {8968992, 17937983},
11509467b48Spatrick       {13453488, 26906975},
11609467b48Spatrick       {20180232, 40360463},
11709467b48Spatrick       {30270348, 60540695},
11809467b48Spatrick       {45405522, 90811043},
11909467b48Spatrick       {68108283, 136216565},
12009467b48Spatrick       {102162424, 204324848},
12109467b48Spatrick       {153243637, 306487273},
12209467b48Spatrick       {229865455, 459730910},
12309467b48Spatrick       {344798183, 689596366},
12409467b48Spatrick       {517197275, 1034394550},
12509467b48Spatrick       {775795913, 1551591826},
12609467b48Spatrick       {1163693870, 2327387740}};
127*d415bd75Srobert   const auto *Entry = llvm::lower_bound(
128*d415bd75Srobert       StringsToBuckets, std::make_pair(NumStrings, 0U), llvm::less_first());
129*d415bd75Srobert   assert(Entry != std::end(StringsToBuckets));
13009467b48Spatrick   return Entry->second;
13109467b48Spatrick }
13209467b48Spatrick 
calculateHashTableSize() const13309467b48Spatrick uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
13409467b48Spatrick   uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
13509467b48Spatrick   Size += sizeof(uint32_t) * computeBucketCount(Strings.size());
13609467b48Spatrick 
13709467b48Spatrick   return Size;
13809467b48Spatrick }
13909467b48Spatrick 
calculateSerializedSize() const14009467b48Spatrick uint32_t PDBStringTableBuilder::calculateSerializedSize() const {
14109467b48Spatrick   uint32_t Size = 0;
14209467b48Spatrick   Size += sizeof(PDBStringTableHeader);
14309467b48Spatrick   Size += Strings.calculateSerializedSize();
14409467b48Spatrick   Size += calculateHashTableSize();
14509467b48Spatrick   Size += sizeof(uint32_t); // The /names stream ends with the string count.
14609467b48Spatrick   return Size;
14709467b48Spatrick }
14809467b48Spatrick 
setStrings(const codeview::DebugStringTableSubsection & Strings)14909467b48Spatrick void PDBStringTableBuilder::setStrings(
15009467b48Spatrick     const codeview::DebugStringTableSubsection &Strings) {
15109467b48Spatrick   this->Strings = Strings;
15209467b48Spatrick }
15309467b48Spatrick 
writeHeader(BinaryStreamWriter & Writer) const15409467b48Spatrick Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
15509467b48Spatrick   // Write a header
15609467b48Spatrick   PDBStringTableHeader H;
15709467b48Spatrick   H.Signature = PDBStringTableSignature;
15809467b48Spatrick   H.HashVersion = 1;
15909467b48Spatrick   H.ByteSize = Strings.calculateSerializedSize();
16009467b48Spatrick   if (auto EC = Writer.writeObject(H))
16109467b48Spatrick     return EC;
16209467b48Spatrick   assert(Writer.bytesRemaining() == 0);
16309467b48Spatrick   return Error::success();
16409467b48Spatrick }
16509467b48Spatrick 
writeStrings(BinaryStreamWriter & Writer) const16609467b48Spatrick Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const {
16709467b48Spatrick   if (auto EC = Strings.commit(Writer))
16809467b48Spatrick     return EC;
16909467b48Spatrick 
17009467b48Spatrick   assert(Writer.bytesRemaining() == 0);
17109467b48Spatrick   return Error::success();
17209467b48Spatrick }
17309467b48Spatrick 
writeHashTable(BinaryStreamWriter & Writer) const17409467b48Spatrick Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
17509467b48Spatrick   // Write a hash table.
17609467b48Spatrick   uint32_t BucketCount = computeBucketCount(Strings.size());
17709467b48Spatrick   if (auto EC = Writer.writeInteger(BucketCount))
17809467b48Spatrick     return EC;
17909467b48Spatrick   std::vector<ulittle32_t> Buckets(BucketCount);
18009467b48Spatrick 
181*d415bd75Srobert   for (const auto &Pair : Strings) {
18209467b48Spatrick     StringRef S = Pair.getKey();
18309467b48Spatrick     uint32_t Offset = Pair.getValue();
18409467b48Spatrick     uint32_t Hash = hashStringV1(S);
18509467b48Spatrick 
18609467b48Spatrick     for (uint32_t I = 0; I != BucketCount; ++I) {
18709467b48Spatrick       uint32_t Slot = (Hash + I) % BucketCount;
18809467b48Spatrick       if (Buckets[Slot] != 0)
18909467b48Spatrick         continue;
19009467b48Spatrick       Buckets[Slot] = Offset;
19109467b48Spatrick       break;
19209467b48Spatrick     }
19309467b48Spatrick   }
19409467b48Spatrick 
19509467b48Spatrick   if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
19609467b48Spatrick     return EC;
19709467b48Spatrick 
19809467b48Spatrick   assert(Writer.bytesRemaining() == 0);
19909467b48Spatrick   return Error::success();
20009467b48Spatrick }
20109467b48Spatrick 
writeEpilogue(BinaryStreamWriter & Writer) const20209467b48Spatrick Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
20309467b48Spatrick   if (auto EC = Writer.writeInteger<uint32_t>(Strings.size()))
20409467b48Spatrick     return EC;
20509467b48Spatrick   assert(Writer.bytesRemaining() == 0);
20609467b48Spatrick   return Error::success();
20709467b48Spatrick }
20809467b48Spatrick 
commit(BinaryStreamWriter & Writer) const20909467b48Spatrick Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
21009467b48Spatrick   BinaryStreamWriter SectionWriter;
21109467b48Spatrick 
21209467b48Spatrick   std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
21309467b48Spatrick   if (auto EC = writeHeader(SectionWriter))
21409467b48Spatrick     return EC;
21509467b48Spatrick 
21609467b48Spatrick   std::tie(SectionWriter, Writer) =
21709467b48Spatrick       Writer.split(Strings.calculateSerializedSize());
21809467b48Spatrick   if (auto EC = writeStrings(SectionWriter))
21909467b48Spatrick     return EC;
22009467b48Spatrick 
22109467b48Spatrick   std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize());
22209467b48Spatrick   if (auto EC = writeHashTable(SectionWriter))
22309467b48Spatrick     return EC;
22409467b48Spatrick 
22509467b48Spatrick   std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t));
22609467b48Spatrick   if (auto EC = writeEpilogue(SectionWriter))
22709467b48Spatrick     return EC;
22809467b48Spatrick 
22909467b48Spatrick   return Error::success();
23009467b48Spatrick }
231