xref: /freebsd-src/contrib/llvm-project/llvm/lib/MC/StringTableBuilder.cpp (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric //===- StringTableBuilder.cpp - String table building utility -------------===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric 
9*0b57cec5SDimitry Andric #include "llvm/MC/StringTableBuilder.h"
10*0b57cec5SDimitry Andric #include "llvm/ADT/CachedHashString.h"
11*0b57cec5SDimitry Andric #include "llvm/ADT/SmallString.h"
12*0b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
13*0b57cec5SDimitry Andric #include "llvm/BinaryFormat/COFF.h"
14*0b57cec5SDimitry Andric #include "llvm/Support/Endian.h"
15*0b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h"
16*0b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
17*0b57cec5SDimitry Andric #include <cassert>
18*0b57cec5SDimitry Andric #include <cstddef>
19*0b57cec5SDimitry Andric #include <cstdint>
20*0b57cec5SDimitry Andric #include <cstring>
21*0b57cec5SDimitry Andric #include <utility>
22*0b57cec5SDimitry Andric #include <vector>
23*0b57cec5SDimitry Andric 
24*0b57cec5SDimitry Andric using namespace llvm;
25*0b57cec5SDimitry Andric 
26*0b57cec5SDimitry Andric StringTableBuilder::~StringTableBuilder() = default;
27*0b57cec5SDimitry Andric 
28*0b57cec5SDimitry Andric void StringTableBuilder::initSize() {
29*0b57cec5SDimitry Andric   // Account for leading bytes in table so that offsets returned from add are
30*0b57cec5SDimitry Andric   // correct.
31*0b57cec5SDimitry Andric   switch (K) {
32*0b57cec5SDimitry Andric   case RAW:
33*0b57cec5SDimitry Andric   case DWARF:
34*0b57cec5SDimitry Andric     Size = 0;
35*0b57cec5SDimitry Andric     break;
36*0b57cec5SDimitry Andric   case MachO:
37*0b57cec5SDimitry Andric   case ELF:
38*0b57cec5SDimitry Andric     // Start the table with a NUL byte.
39*0b57cec5SDimitry Andric     Size = 1;
40*0b57cec5SDimitry Andric     break;
41*0b57cec5SDimitry Andric   case WinCOFF:
42*0b57cec5SDimitry Andric     // Make room to write the table size later.
43*0b57cec5SDimitry Andric     Size = 4;
44*0b57cec5SDimitry Andric     break;
45*0b57cec5SDimitry Andric   }
46*0b57cec5SDimitry Andric }
47*0b57cec5SDimitry Andric 
48*0b57cec5SDimitry Andric StringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment)
49*0b57cec5SDimitry Andric     : K(K), Alignment(Alignment) {
50*0b57cec5SDimitry Andric   initSize();
51*0b57cec5SDimitry Andric }
52*0b57cec5SDimitry Andric 
53*0b57cec5SDimitry Andric void StringTableBuilder::write(raw_ostream &OS) const {
54*0b57cec5SDimitry Andric   assert(isFinalized());
55*0b57cec5SDimitry Andric   SmallString<0> Data;
56*0b57cec5SDimitry Andric   Data.resize(getSize());
57*0b57cec5SDimitry Andric   write((uint8_t *)Data.data());
58*0b57cec5SDimitry Andric   OS << Data;
59*0b57cec5SDimitry Andric }
60*0b57cec5SDimitry Andric 
61*0b57cec5SDimitry Andric using StringPair = std::pair<CachedHashStringRef, size_t>;
62*0b57cec5SDimitry Andric 
63*0b57cec5SDimitry Andric void StringTableBuilder::write(uint8_t *Buf) const {
64*0b57cec5SDimitry Andric   assert(isFinalized());
65*0b57cec5SDimitry Andric   for (const StringPair &P : StringIndexMap) {
66*0b57cec5SDimitry Andric     StringRef Data = P.first.val();
67*0b57cec5SDimitry Andric     if (!Data.empty())
68*0b57cec5SDimitry Andric       memcpy(Buf + P.second, Data.data(), Data.size());
69*0b57cec5SDimitry Andric   }
70*0b57cec5SDimitry Andric   if (K != WinCOFF)
71*0b57cec5SDimitry Andric     return;
72*0b57cec5SDimitry Andric   support::endian::write32le(Buf, Size);
73*0b57cec5SDimitry Andric }
74*0b57cec5SDimitry Andric 
75*0b57cec5SDimitry Andric // Returns the character at Pos from end of a string.
76*0b57cec5SDimitry Andric static int charTailAt(StringPair *P, size_t Pos) {
77*0b57cec5SDimitry Andric   StringRef S = P->first.val();
78*0b57cec5SDimitry Andric   if (Pos >= S.size())
79*0b57cec5SDimitry Andric     return -1;
80*0b57cec5SDimitry Andric   return (unsigned char)S[S.size() - Pos - 1];
81*0b57cec5SDimitry Andric }
82*0b57cec5SDimitry Andric 
83*0b57cec5SDimitry Andric // Three-way radix quicksort. This is much faster than std::sort with strcmp
84*0b57cec5SDimitry Andric // because it does not compare characters that we already know the same.
85*0b57cec5SDimitry Andric static void multikeySort(MutableArrayRef<StringPair *> Vec, int Pos) {
86*0b57cec5SDimitry Andric tailcall:
87*0b57cec5SDimitry Andric   if (Vec.size() <= 1)
88*0b57cec5SDimitry Andric     return;
89*0b57cec5SDimitry Andric 
90*0b57cec5SDimitry Andric   // Partition items so that items in [0, I) are greater than the pivot,
91*0b57cec5SDimitry Andric   // [I, J) are the same as the pivot, and [J, Vec.size()) are less than
92*0b57cec5SDimitry Andric   // the pivot.
93*0b57cec5SDimitry Andric   int Pivot = charTailAt(Vec[0], Pos);
94*0b57cec5SDimitry Andric   size_t I = 0;
95*0b57cec5SDimitry Andric   size_t J = Vec.size();
96*0b57cec5SDimitry Andric   for (size_t K = 1; K < J;) {
97*0b57cec5SDimitry Andric     int C = charTailAt(Vec[K], Pos);
98*0b57cec5SDimitry Andric     if (C > Pivot)
99*0b57cec5SDimitry Andric       std::swap(Vec[I++], Vec[K++]);
100*0b57cec5SDimitry Andric     else if (C < Pivot)
101*0b57cec5SDimitry Andric       std::swap(Vec[--J], Vec[K]);
102*0b57cec5SDimitry Andric     else
103*0b57cec5SDimitry Andric       K++;
104*0b57cec5SDimitry Andric   }
105*0b57cec5SDimitry Andric 
106*0b57cec5SDimitry Andric   multikeySort(Vec.slice(0, I), Pos);
107*0b57cec5SDimitry Andric   multikeySort(Vec.slice(J), Pos);
108*0b57cec5SDimitry Andric 
109*0b57cec5SDimitry Andric   // multikeySort(Vec.slice(I, J - I), Pos + 1), but with
110*0b57cec5SDimitry Andric   // tail call optimization.
111*0b57cec5SDimitry Andric   if (Pivot != -1) {
112*0b57cec5SDimitry Andric     Vec = Vec.slice(I, J - I);
113*0b57cec5SDimitry Andric     ++Pos;
114*0b57cec5SDimitry Andric     goto tailcall;
115*0b57cec5SDimitry Andric   }
116*0b57cec5SDimitry Andric }
117*0b57cec5SDimitry Andric 
118*0b57cec5SDimitry Andric void StringTableBuilder::finalize() {
119*0b57cec5SDimitry Andric   assert(K != DWARF);
120*0b57cec5SDimitry Andric   finalizeStringTable(/*Optimize=*/true);
121*0b57cec5SDimitry Andric }
122*0b57cec5SDimitry Andric 
123*0b57cec5SDimitry Andric void StringTableBuilder::finalizeInOrder() {
124*0b57cec5SDimitry Andric   finalizeStringTable(/*Optimize=*/false);
125*0b57cec5SDimitry Andric }
126*0b57cec5SDimitry Andric 
127*0b57cec5SDimitry Andric void StringTableBuilder::finalizeStringTable(bool Optimize) {
128*0b57cec5SDimitry Andric   Finalized = true;
129*0b57cec5SDimitry Andric 
130*0b57cec5SDimitry Andric   if (Optimize) {
131*0b57cec5SDimitry Andric     std::vector<StringPair *> Strings;
132*0b57cec5SDimitry Andric     Strings.reserve(StringIndexMap.size());
133*0b57cec5SDimitry Andric     for (StringPair &P : StringIndexMap)
134*0b57cec5SDimitry Andric       Strings.push_back(&P);
135*0b57cec5SDimitry Andric 
136*0b57cec5SDimitry Andric     multikeySort(Strings, 0);
137*0b57cec5SDimitry Andric     initSize();
138*0b57cec5SDimitry Andric 
139*0b57cec5SDimitry Andric     StringRef Previous;
140*0b57cec5SDimitry Andric     for (StringPair *P : Strings) {
141*0b57cec5SDimitry Andric       StringRef S = P->first.val();
142*0b57cec5SDimitry Andric       if (Previous.endswith(S)) {
143*0b57cec5SDimitry Andric         size_t Pos = Size - S.size() - (K != RAW);
144*0b57cec5SDimitry Andric         if (!(Pos & (Alignment - 1))) {
145*0b57cec5SDimitry Andric           P->second = Pos;
146*0b57cec5SDimitry Andric           continue;
147*0b57cec5SDimitry Andric         }
148*0b57cec5SDimitry Andric       }
149*0b57cec5SDimitry Andric 
150*0b57cec5SDimitry Andric       Size = alignTo(Size, Alignment);
151*0b57cec5SDimitry Andric       P->second = Size;
152*0b57cec5SDimitry Andric 
153*0b57cec5SDimitry Andric       Size += S.size();
154*0b57cec5SDimitry Andric       if (K != RAW)
155*0b57cec5SDimitry Andric         ++Size;
156*0b57cec5SDimitry Andric       Previous = S;
157*0b57cec5SDimitry Andric     }
158*0b57cec5SDimitry Andric   }
159*0b57cec5SDimitry Andric 
160*0b57cec5SDimitry Andric   if (K == MachO)
161*0b57cec5SDimitry Andric     Size = alignTo(Size, 4); // Pad to multiple of 4.
162*0b57cec5SDimitry Andric 
163*0b57cec5SDimitry Andric   // The first byte in an ELF string table must be null, according to the ELF
164*0b57cec5SDimitry Andric   // specification. In 'initSize()' we reserved the first byte to hold null for
165*0b57cec5SDimitry Andric   // this purpose and here we actually add the string to allow 'getOffset()' to
166*0b57cec5SDimitry Andric   // be called on an empty string.
167*0b57cec5SDimitry Andric   if (K == ELF)
168*0b57cec5SDimitry Andric     StringIndexMap[CachedHashStringRef("")] = 0;
169*0b57cec5SDimitry Andric }
170*0b57cec5SDimitry Andric 
171*0b57cec5SDimitry Andric void StringTableBuilder::clear() {
172*0b57cec5SDimitry Andric   Finalized = false;
173*0b57cec5SDimitry Andric   StringIndexMap.clear();
174*0b57cec5SDimitry Andric }
175*0b57cec5SDimitry Andric 
176*0b57cec5SDimitry Andric size_t StringTableBuilder::getOffset(CachedHashStringRef S) const {
177*0b57cec5SDimitry Andric   assert(isFinalized());
178*0b57cec5SDimitry Andric   auto I = StringIndexMap.find(S);
179*0b57cec5SDimitry Andric   assert(I != StringIndexMap.end() && "String is not in table!");
180*0b57cec5SDimitry Andric   return I->second;
181*0b57cec5SDimitry Andric }
182*0b57cec5SDimitry Andric 
183*0b57cec5SDimitry Andric size_t StringTableBuilder::add(CachedHashStringRef S) {
184*0b57cec5SDimitry Andric   if (K == WinCOFF)
185*0b57cec5SDimitry Andric     assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
186*0b57cec5SDimitry Andric 
187*0b57cec5SDimitry Andric   assert(!isFinalized());
188*0b57cec5SDimitry Andric   auto P = StringIndexMap.insert(std::make_pair(S, 0));
189*0b57cec5SDimitry Andric   if (P.second) {
190*0b57cec5SDimitry Andric     size_t Start = alignTo(Size, Alignment);
191*0b57cec5SDimitry Andric     P.first->second = Start;
192*0b57cec5SDimitry Andric     Size = Start + S.size() + (K != RAW);
193*0b57cec5SDimitry Andric   }
194*0b57cec5SDimitry Andric   return P.first->second;
195*0b57cec5SDimitry Andric }
196