xref: /freebsd-src/contrib/llvm-project/llvm/lib/MC/StringTableBuilder.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric //===- StringTableBuilder.cpp - String table building utility -------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "llvm/MC/StringTableBuilder.h"
10fe6060f1SDimitry Andric #include "llvm/ADT/ArrayRef.h"
110b57cec5SDimitry Andric #include "llvm/ADT/CachedHashString.h"
120b57cec5SDimitry Andric #include "llvm/ADT/SmallString.h"
130b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
140b57cec5SDimitry Andric #include "llvm/BinaryFormat/COFF.h"
150b57cec5SDimitry Andric #include "llvm/Support/Endian.h"
160b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h"
170b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
180b57cec5SDimitry Andric #include <cassert>
190b57cec5SDimitry Andric #include <cstddef>
200b57cec5SDimitry Andric #include <cstdint>
210b57cec5SDimitry Andric #include <cstring>
220b57cec5SDimitry Andric #include <utility>
230b57cec5SDimitry Andric #include <vector>
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric using namespace llvm;
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric StringTableBuilder::~StringTableBuilder() = default;
280b57cec5SDimitry Andric 
initSize()290b57cec5SDimitry Andric void StringTableBuilder::initSize() {
300b57cec5SDimitry Andric   // Account for leading bytes in table so that offsets returned from add are
310b57cec5SDimitry Andric   // correct.
320b57cec5SDimitry Andric   switch (K) {
330b57cec5SDimitry Andric   case RAW:
340b57cec5SDimitry Andric   case DWARF:
350b57cec5SDimitry Andric     Size = 0;
360b57cec5SDimitry Andric     break;
37e8d8bef9SDimitry Andric   case MachOLinked:
38e8d8bef9SDimitry Andric   case MachO64Linked:
39e8d8bef9SDimitry Andric     Size = 2;
40e8d8bef9SDimitry Andric     break;
410b57cec5SDimitry Andric   case MachO:
42e8d8bef9SDimitry Andric   case MachO64:
430b57cec5SDimitry Andric   case ELF:
44*5f757f3fSDimitry Andric   case DXContainer:
450b57cec5SDimitry Andric     // Start the table with a NUL byte.
460b57cec5SDimitry Andric     Size = 1;
470b57cec5SDimitry Andric     break;
488bcb0991SDimitry Andric   case XCOFF:
490b57cec5SDimitry Andric   case WinCOFF:
500b57cec5SDimitry Andric     // Make room to write the table size later.
510b57cec5SDimitry Andric     Size = 4;
520b57cec5SDimitry Andric     break;
530b57cec5SDimitry Andric   }
540b57cec5SDimitry Andric }
550b57cec5SDimitry Andric 
StringTableBuilder(Kind K,Align Alignment)56bdd1243dSDimitry Andric StringTableBuilder::StringTableBuilder(Kind K, Align Alignment)
570b57cec5SDimitry Andric     : K(K), Alignment(Alignment) {
580b57cec5SDimitry Andric   initSize();
590b57cec5SDimitry Andric }
600b57cec5SDimitry Andric 
write(raw_ostream & OS) const610b57cec5SDimitry Andric void StringTableBuilder::write(raw_ostream &OS) const {
620b57cec5SDimitry Andric   assert(isFinalized());
630b57cec5SDimitry Andric   SmallString<0> Data;
640b57cec5SDimitry Andric   Data.resize(getSize());
650b57cec5SDimitry Andric   write((uint8_t *)Data.data());
660b57cec5SDimitry Andric   OS << Data;
670b57cec5SDimitry Andric }
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric using StringPair = std::pair<CachedHashStringRef, size_t>;
700b57cec5SDimitry Andric 
write(uint8_t * Buf) const710b57cec5SDimitry Andric void StringTableBuilder::write(uint8_t *Buf) const {
720b57cec5SDimitry Andric   assert(isFinalized());
730b57cec5SDimitry Andric   for (const StringPair &P : StringIndexMap) {
740b57cec5SDimitry Andric     StringRef Data = P.first.val();
750b57cec5SDimitry Andric     if (!Data.empty())
760b57cec5SDimitry Andric       memcpy(Buf + P.second, Data.data(), Data.size());
770b57cec5SDimitry Andric   }
788bcb0991SDimitry Andric   // The COFF formats store the size of the string table in the first 4 bytes.
798bcb0991SDimitry Andric   // For Windows, the format is little-endian; for AIX, it is big-endian.
808bcb0991SDimitry Andric   if (K == WinCOFF)
810b57cec5SDimitry Andric     support::endian::write32le(Buf, Size);
828bcb0991SDimitry Andric   else if (K == XCOFF)
838bcb0991SDimitry Andric     support::endian::write32be(Buf, Size);
840b57cec5SDimitry Andric }
850b57cec5SDimitry Andric 
860b57cec5SDimitry Andric // Returns the character at Pos from end of a string.
charTailAt(StringPair * P,size_t Pos)870b57cec5SDimitry Andric static int charTailAt(StringPair *P, size_t Pos) {
880b57cec5SDimitry Andric   StringRef S = P->first.val();
890b57cec5SDimitry Andric   if (Pos >= S.size())
900b57cec5SDimitry Andric     return -1;
910b57cec5SDimitry Andric   return (unsigned char)S[S.size() - Pos - 1];
920b57cec5SDimitry Andric }
930b57cec5SDimitry Andric 
940b57cec5SDimitry Andric // Three-way radix quicksort. This is much faster than std::sort with strcmp
950b57cec5SDimitry Andric // because it does not compare characters that we already know the same.
multikeySort(MutableArrayRef<StringPair * > Vec,int Pos)960b57cec5SDimitry Andric static void multikeySort(MutableArrayRef<StringPair *> Vec, int Pos) {
970b57cec5SDimitry Andric tailcall:
980b57cec5SDimitry Andric   if (Vec.size() <= 1)
990b57cec5SDimitry Andric     return;
1000b57cec5SDimitry Andric 
1010b57cec5SDimitry Andric   // Partition items so that items in [0, I) are greater than the pivot,
1020b57cec5SDimitry Andric   // [I, J) are the same as the pivot, and [J, Vec.size()) are less than
1030b57cec5SDimitry Andric   // the pivot.
1040b57cec5SDimitry Andric   int Pivot = charTailAt(Vec[0], Pos);
1050b57cec5SDimitry Andric   size_t I = 0;
1060b57cec5SDimitry Andric   size_t J = Vec.size();
1070b57cec5SDimitry Andric   for (size_t K = 1; K < J;) {
1080b57cec5SDimitry Andric     int C = charTailAt(Vec[K], Pos);
1090b57cec5SDimitry Andric     if (C > Pivot)
1100b57cec5SDimitry Andric       std::swap(Vec[I++], Vec[K++]);
1110b57cec5SDimitry Andric     else if (C < Pivot)
1120b57cec5SDimitry Andric       std::swap(Vec[--J], Vec[K]);
1130b57cec5SDimitry Andric     else
1140b57cec5SDimitry Andric       K++;
1150b57cec5SDimitry Andric   }
1160b57cec5SDimitry Andric 
1170b57cec5SDimitry Andric   multikeySort(Vec.slice(0, I), Pos);
1180b57cec5SDimitry Andric   multikeySort(Vec.slice(J), Pos);
1190b57cec5SDimitry Andric 
1200b57cec5SDimitry Andric   // multikeySort(Vec.slice(I, J - I), Pos + 1), but with
1210b57cec5SDimitry Andric   // tail call optimization.
1220b57cec5SDimitry Andric   if (Pivot != -1) {
1230b57cec5SDimitry Andric     Vec = Vec.slice(I, J - I);
1240b57cec5SDimitry Andric     ++Pos;
1250b57cec5SDimitry Andric     goto tailcall;
1260b57cec5SDimitry Andric   }
1270b57cec5SDimitry Andric }
1280b57cec5SDimitry Andric 
finalize()1290b57cec5SDimitry Andric void StringTableBuilder::finalize() {
1300b57cec5SDimitry Andric   assert(K != DWARF);
1310b57cec5SDimitry Andric   finalizeStringTable(/*Optimize=*/true);
1320b57cec5SDimitry Andric }
1330b57cec5SDimitry Andric 
finalizeInOrder()1340b57cec5SDimitry Andric void StringTableBuilder::finalizeInOrder() {
1350b57cec5SDimitry Andric   finalizeStringTable(/*Optimize=*/false);
1360b57cec5SDimitry Andric }
1370b57cec5SDimitry Andric 
finalizeStringTable(bool Optimize)1380b57cec5SDimitry Andric void StringTableBuilder::finalizeStringTable(bool Optimize) {
1390b57cec5SDimitry Andric   Finalized = true;
1400b57cec5SDimitry Andric 
1410b57cec5SDimitry Andric   if (Optimize) {
1420b57cec5SDimitry Andric     std::vector<StringPair *> Strings;
1430b57cec5SDimitry Andric     Strings.reserve(StringIndexMap.size());
1440b57cec5SDimitry Andric     for (StringPair &P : StringIndexMap)
1450b57cec5SDimitry Andric       Strings.push_back(&P);
1460b57cec5SDimitry Andric 
1470b57cec5SDimitry Andric     multikeySort(Strings, 0);
1480b57cec5SDimitry Andric     initSize();
1490b57cec5SDimitry Andric 
1500b57cec5SDimitry Andric     StringRef Previous;
1510b57cec5SDimitry Andric     for (StringPair *P : Strings) {
1520b57cec5SDimitry Andric       StringRef S = P->first.val();
153*5f757f3fSDimitry Andric       if (Previous.ends_with(S)) {
1540b57cec5SDimitry Andric         size_t Pos = Size - S.size() - (K != RAW);
155bdd1243dSDimitry Andric         if (isAligned(Alignment, Pos)) {
1560b57cec5SDimitry Andric           P->second = Pos;
1570b57cec5SDimitry Andric           continue;
1580b57cec5SDimitry Andric         }
1590b57cec5SDimitry Andric       }
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric       Size = alignTo(Size, Alignment);
1620b57cec5SDimitry Andric       P->second = Size;
1630b57cec5SDimitry Andric 
1640b57cec5SDimitry Andric       Size += S.size();
1650b57cec5SDimitry Andric       if (K != RAW)
1660b57cec5SDimitry Andric         ++Size;
1670b57cec5SDimitry Andric       Previous = S;
1680b57cec5SDimitry Andric     }
1690b57cec5SDimitry Andric   }
1700b57cec5SDimitry Andric 
171*5f757f3fSDimitry Andric   if (K == MachO || K == MachOLinked || K == DXContainer)
1720b57cec5SDimitry Andric     Size = alignTo(Size, 4); // Pad to multiple of 4.
173e8d8bef9SDimitry Andric   if (K == MachO64 || K == MachO64Linked)
174e8d8bef9SDimitry Andric     Size = alignTo(Size, 8); // Pad to multiple of 8.
175e8d8bef9SDimitry Andric 
176e8d8bef9SDimitry Andric   // According to ld64 the string table of a final linked Mach-O binary starts
177e8d8bef9SDimitry Andric   // with " ", i.e. the first byte is ' ' and the second byte is zero. In
178e8d8bef9SDimitry Andric   // 'initSize()' we reserved the first two bytes for holding this string.
179e8d8bef9SDimitry Andric   if (K == MachOLinked || K == MachO64Linked)
180e8d8bef9SDimitry Andric     StringIndexMap[CachedHashStringRef(" ")] = 0;
1810b57cec5SDimitry Andric 
1820b57cec5SDimitry Andric   // The first byte in an ELF string table must be null, according to the ELF
1830b57cec5SDimitry Andric   // specification. In 'initSize()' we reserved the first byte to hold null for
1840b57cec5SDimitry Andric   // this purpose and here we actually add the string to allow 'getOffset()' to
1850b57cec5SDimitry Andric   // be called on an empty string.
1860b57cec5SDimitry Andric   if (K == ELF)
1870b57cec5SDimitry Andric     StringIndexMap[CachedHashStringRef("")] = 0;
1880b57cec5SDimitry Andric }
1890b57cec5SDimitry Andric 
clear()1900b57cec5SDimitry Andric void StringTableBuilder::clear() {
1910b57cec5SDimitry Andric   Finalized = false;
1920b57cec5SDimitry Andric   StringIndexMap.clear();
1930b57cec5SDimitry Andric }
1940b57cec5SDimitry Andric 
getOffset(CachedHashStringRef S) const1950b57cec5SDimitry Andric size_t StringTableBuilder::getOffset(CachedHashStringRef S) const {
1960b57cec5SDimitry Andric   assert(isFinalized());
1970b57cec5SDimitry Andric   auto I = StringIndexMap.find(S);
1980b57cec5SDimitry Andric   assert(I != StringIndexMap.end() && "String is not in table!");
1990b57cec5SDimitry Andric   return I->second;
2000b57cec5SDimitry Andric }
2010b57cec5SDimitry Andric 
add(CachedHashStringRef S)2020b57cec5SDimitry Andric size_t StringTableBuilder::add(CachedHashStringRef S) {
2030b57cec5SDimitry Andric   if (K == WinCOFF)
2040b57cec5SDimitry Andric     assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
2050b57cec5SDimitry Andric 
2060b57cec5SDimitry Andric   assert(!isFinalized());
2070b57cec5SDimitry Andric   auto P = StringIndexMap.insert(std::make_pair(S, 0));
2080b57cec5SDimitry Andric   if (P.second) {
2090b57cec5SDimitry Andric     size_t Start = alignTo(Size, Alignment);
2100b57cec5SDimitry Andric     P.first->second = Start;
2110b57cec5SDimitry Andric     Size = Start + S.size() + (K != RAW);
2120b57cec5SDimitry Andric   }
2130b57cec5SDimitry Andric   return P.first->second;
2140b57cec5SDimitry Andric }
215