xref: /llvm-project/llvm/lib/MC/StringTableBuilder.cpp (revision 2214ed8937a05b9457b167a84d6732a448fad27f)
1 //===-- StringTableBuilder.cpp - String table building utility ------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/MC/StringTableBuilder.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/Support/COFF.h"
13 #include "llvm/Support/Endian.h"
14 
15 #include <vector>
16 
17 using namespace llvm;
18 
19 StringTableBuilder::StringTableBuilder(Kind K) : K(K) {
20   // Account for leading bytes in table so that offsets returned from add are
21   // correct.
22   switch (K) {
23   case RAW:
24     Size = 0;
25     break;
26   case MachO:
27   case ELF:
28     Size = 1;
29     break;
30   case WinCOFF:
31     Size = 4;
32     break;
33   }
34 }
35 
36 typedef std::pair<StringRef, size_t> StringPair;
37 
38 // Returns the character at Pos from end of a string.
39 static int charTailAt(StringPair *P, size_t Pos) {
40   StringRef S = P->first;
41   if (Pos >= S.size())
42     return -1;
43   return (unsigned char)S[S.size() - Pos - 1];
44 }
45 
46 // Three-way radix quicksort. This is much faster than std::sort with strcmp
47 // because it does not compare characters that we already know the same.
48 static void multikey_qsort(StringPair **Begin, StringPair **End, int Pos) {
49 tailcall:
50   if (End - Begin <= 1)
51     return;
52 
53   // Partition items. Items in [Begin, P) are greater than the pivot,
54   // [P, Q) are the same as the pivot, and [Q, End) are less than the pivot.
55   int Pivot = charTailAt(*Begin, Pos);
56   StringPair **P = Begin;
57   StringPair **Q = End;
58   for (StringPair **R = Begin + 1; R < Q;) {
59     int C = charTailAt(*R, Pos);
60     if (C > Pivot)
61       std::swap(*P++, *R++);
62     else if (C < Pivot)
63       std::swap(*--Q, *R);
64     else
65       R++;
66   }
67 
68   multikey_qsort(Begin, P, Pos);
69   multikey_qsort(Q, End, Pos);
70   if (Pivot != -1) {
71     // qsort(P, Q, Pos + 1), but with tail call optimization.
72     Begin = P;
73     End = Q;
74     ++Pos;
75     goto tailcall;
76   }
77 }
78 
79 void StringTableBuilder::finalize() {
80   finalizeStringTable(/*Optimize=*/true);
81 }
82 
83 void StringTableBuilder::finalizeInOrder() {
84   finalizeStringTable(/*Optimize=*/false);
85 }
86 
87 void StringTableBuilder::finalizeStringTable(bool Optimize) {
88   typedef std::pair<StringRef, size_t> StringOffsetPair;
89   std::vector<StringOffsetPair *> Strings;
90   Strings.reserve(StringIndexMap.size());
91   for (StringOffsetPair &P : StringIndexMap)
92     Strings.push_back(&P);
93 
94   if (!Strings.empty()) {
95     // If we're optimizing, sort by name. If not, sort by previously assigned
96     // offset.
97     if (Optimize) {
98       multikey_qsort(&Strings[0], &Strings[0] + Strings.size(), 0);
99     } else {
100       std::sort(Strings.begin(), Strings.end(),
101                 [](const StringOffsetPair *LHS, const StringOffsetPair *RHS) {
102                   return LHS->second < RHS->second;
103                 });
104     }
105   }
106 
107   switch (K) {
108   case RAW:
109     break;
110   case ELF:
111   case MachO:
112     // Start the table with a NUL byte.
113     StringTable += '\x00';
114     break;
115   case WinCOFF:
116     // Make room to write the table size later.
117     StringTable.append(4, '\x00');
118     break;
119   }
120 
121   StringRef Previous;
122   for (StringOffsetPair *P : Strings) {
123     StringRef S = P->first;
124     if (K == WinCOFF)
125       assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
126 
127     if (Optimize && Previous.endswith(S)) {
128       P->second = StringTable.size() - S.size() - (K != RAW);
129       continue;
130     }
131 
132     if (Optimize)
133       P->second = StringTable.size();
134     else
135       assert(P->second == StringTable.size() &&
136              "different strtab offset after finalization");
137 
138     StringTable += S;
139     if (K != RAW)
140       StringTable += '\x00';
141     Previous = S;
142   }
143 
144   switch (K) {
145   case RAW:
146   case ELF:
147     break;
148   case MachO:
149     // Pad to multiple of 4.
150     while (StringTable.size() % 4)
151       StringTable += '\x00';
152     break;
153   case WinCOFF:
154     // Write the table size in the first word.
155     assert(StringTable.size() <= std::numeric_limits<uint32_t>::max());
156     uint32_t Size = static_cast<uint32_t>(StringTable.size());
157     support::endian::write<uint32_t, support::little, support::unaligned>(
158         StringTable.data(), Size);
159     break;
160   }
161 
162   Size = StringTable.size();
163 }
164 
165 void StringTableBuilder::clear() {
166   StringTable.clear();
167   StringIndexMap.clear();
168 }
169 
170 size_t StringTableBuilder::getOffset(StringRef S) const {
171   assert(isFinalized());
172   auto I = StringIndexMap.find(S);
173   assert(I != StringIndexMap.end() && "String is not in table!");
174   return I->second;
175 }
176 
177 size_t StringTableBuilder::add(StringRef S) {
178   assert(!isFinalized());
179   auto P = StringIndexMap.insert(std::make_pair(S, Size));
180   if (P.second)
181     Size += S.size() + (K != RAW);
182   return P.first->second;
183 }
184