xref: /llvm-project/llvm/lib/MC/StringTableBuilder.cpp (revision fda3dc9266b59bc278ef2c3ef438b29f86e25912)
1 //===-- StringTableBuilder.cpp - String table building utility ------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/MC/StringTableBuilder.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/Support/COFF.h"
13 #include "llvm/Support/Endian.h"
14 
15 #include <vector>
16 
17 using namespace llvm;
18 
19 StringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment)
20     : K(K), Alignment(Alignment) {
21   // Account for leading bytes in table so that offsets returned from add are
22   // correct.
23   switch (K) {
24   case RAW:
25     Size = 0;
26     break;
27   case MachO:
28   case ELF:
29     Size = 1;
30     break;
31   case WinCOFF:
32     Size = 4;
33     break;
34   }
35 }
36 
37 typedef std::pair<CachedHash<StringRef>, size_t> StringPair;
38 
39 // Returns the character at Pos from end of a string.
40 static int charTailAt(StringPair *P, size_t Pos) {
41   StringRef S = P->first.Val;
42   if (Pos >= S.size())
43     return -1;
44   return (unsigned char)S[S.size() - Pos - 1];
45 }
46 
47 // Three-way radix quicksort. This is much faster than std::sort with strcmp
48 // because it does not compare characters that we already know the same.
49 static void multikey_qsort(StringPair **Begin, StringPair **End, int Pos) {
50 tailcall:
51   if (End - Begin <= 1)
52     return;
53 
54   // Partition items. Items in [Begin, P) are greater than the pivot,
55   // [P, Q) are the same as the pivot, and [Q, End) are less than the pivot.
56   int Pivot = charTailAt(*Begin, Pos);
57   StringPair **P = Begin;
58   StringPair **Q = End;
59   for (StringPair **R = Begin + 1; R < Q;) {
60     int C = charTailAt(*R, Pos);
61     if (C > Pivot)
62       std::swap(*P++, *R++);
63     else if (C < Pivot)
64       std::swap(*--Q, *R);
65     else
66       R++;
67   }
68 
69   multikey_qsort(Begin, P, Pos);
70   multikey_qsort(Q, End, Pos);
71   if (Pivot != -1) {
72     // qsort(P, Q, Pos + 1), but with tail call optimization.
73     Begin = P;
74     End = Q;
75     ++Pos;
76     goto tailcall;
77   }
78 }
79 
80 void StringTableBuilder::finalize() {
81   finalizeStringTable(/*Optimize=*/true);
82 }
83 
84 void StringTableBuilder::finalizeInOrder() {
85   finalizeStringTable(/*Optimize=*/false);
86 }
87 
88 void StringTableBuilder::finalizeStringTable(bool Optimize) {
89   std::vector<StringPair *> Strings;
90   Strings.reserve(StringIndexMap.size());
91   for (StringPair &P : StringIndexMap)
92     Strings.push_back(&P);
93 
94   if (!Strings.empty()) {
95     // If we're optimizing, sort by name. If not, sort by previously assigned
96     // offset.
97     if (Optimize) {
98       multikey_qsort(&Strings[0], &Strings[0] + Strings.size(), 0);
99     } else {
100       std::sort(Strings.begin(), Strings.end(),
101                 [](const StringPair *LHS, const StringPair *RHS) {
102                   return LHS->second < RHS->second;
103                 });
104     }
105   }
106 
107   switch (K) {
108   case RAW:
109     break;
110   case ELF:
111   case MachO:
112     // Start the table with a NUL byte.
113     StringTable += '\x00';
114     break;
115   case WinCOFF:
116     // Make room to write the table size later.
117     StringTable.append(4, '\x00');
118     break;
119   }
120 
121   StringRef Previous;
122   for (StringPair *P : Strings) {
123     StringRef S = P->first.Val;
124     if (K == WinCOFF)
125       assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
126 
127     if (Optimize && Previous.endswith(S)) {
128       size_t Pos = StringTable.size() - S.size() - (K != RAW);
129       if (!(Pos & (Alignment - 1))) {
130         P->second = Pos;
131         continue;
132       }
133     }
134 
135     if (Optimize) {
136       size_t Start = alignTo(StringTable.size(), Alignment);
137       P->second = Start;
138       StringTable.append(Start - StringTable.size(), '\0');
139     } else {
140       assert(P->second == StringTable.size() &&
141              "different strtab offset after finalization");
142     }
143 
144     StringTable += S;
145     if (K != RAW)
146       StringTable += '\x00';
147     Previous = S;
148   }
149 
150   switch (K) {
151   case RAW:
152   case ELF:
153     break;
154   case MachO:
155     // Pad to multiple of 4.
156     while (StringTable.size() % 4)
157       StringTable += '\x00';
158     break;
159   case WinCOFF:
160     // Write the table size in the first word.
161     assert(StringTable.size() <= std::numeric_limits<uint32_t>::max());
162     uint32_t Size = static_cast<uint32_t>(StringTable.size());
163     support::endian::write<uint32_t, support::little, support::unaligned>(
164         StringTable.data(), Size);
165     break;
166   }
167 
168   Size = StringTable.size();
169 }
170 
171 void StringTableBuilder::clear() {
172   StringTable.clear();
173   StringIndexMap.clear();
174 }
175 
176 size_t StringTableBuilder::getOffset(StringRef S) const {
177   assert(isFinalized());
178   auto I = StringIndexMap.find(S);
179   assert(I != StringIndexMap.end() && "String is not in table!");
180   return I->second;
181 }
182 
183 size_t StringTableBuilder::add(StringRef S) {
184   assert(!isFinalized());
185   size_t Start = alignTo(Size, Alignment);
186   auto P = StringIndexMap.insert(std::make_pair(S, Start));
187   if (P.second)
188     Size = Start + S.size() + (K != RAW);
189   return P.first->second;
190 }
191