1*0b57cec5SDimitry Andric //===- StringTableBuilder.cpp - String table building utility -------------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric 9*0b57cec5SDimitry Andric #include "llvm/MC/StringTableBuilder.h" 10*0b57cec5SDimitry Andric #include "llvm/ADT/CachedHashString.h" 11*0b57cec5SDimitry Andric #include "llvm/ADT/SmallString.h" 12*0b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 13*0b57cec5SDimitry Andric #include "llvm/BinaryFormat/COFF.h" 14*0b57cec5SDimitry Andric #include "llvm/Support/Endian.h" 15*0b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h" 16*0b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 17*0b57cec5SDimitry Andric #include <cassert> 18*0b57cec5SDimitry Andric #include <cstddef> 19*0b57cec5SDimitry Andric #include <cstdint> 20*0b57cec5SDimitry Andric #include <cstring> 21*0b57cec5SDimitry Andric #include <utility> 22*0b57cec5SDimitry Andric #include <vector> 23*0b57cec5SDimitry Andric 24*0b57cec5SDimitry Andric using namespace llvm; 25*0b57cec5SDimitry Andric 26*0b57cec5SDimitry Andric StringTableBuilder::~StringTableBuilder() = default; 27*0b57cec5SDimitry Andric 28*0b57cec5SDimitry Andric void StringTableBuilder::initSize() { 29*0b57cec5SDimitry Andric // Account for leading bytes in table so that offsets returned from add are 30*0b57cec5SDimitry Andric // correct. 31*0b57cec5SDimitry Andric switch (K) { 32*0b57cec5SDimitry Andric case RAW: 33*0b57cec5SDimitry Andric case DWARF: 34*0b57cec5SDimitry Andric Size = 0; 35*0b57cec5SDimitry Andric break; 36*0b57cec5SDimitry Andric case MachO: 37*0b57cec5SDimitry Andric case ELF: 38*0b57cec5SDimitry Andric // Start the table with a NUL byte. 39*0b57cec5SDimitry Andric Size = 1; 40*0b57cec5SDimitry Andric break; 41*0b57cec5SDimitry Andric case WinCOFF: 42*0b57cec5SDimitry Andric // Make room to write the table size later. 43*0b57cec5SDimitry Andric Size = 4; 44*0b57cec5SDimitry Andric break; 45*0b57cec5SDimitry Andric } 46*0b57cec5SDimitry Andric } 47*0b57cec5SDimitry Andric 48*0b57cec5SDimitry Andric StringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment) 49*0b57cec5SDimitry Andric : K(K), Alignment(Alignment) { 50*0b57cec5SDimitry Andric initSize(); 51*0b57cec5SDimitry Andric } 52*0b57cec5SDimitry Andric 53*0b57cec5SDimitry Andric void StringTableBuilder::write(raw_ostream &OS) const { 54*0b57cec5SDimitry Andric assert(isFinalized()); 55*0b57cec5SDimitry Andric SmallString<0> Data; 56*0b57cec5SDimitry Andric Data.resize(getSize()); 57*0b57cec5SDimitry Andric write((uint8_t *)Data.data()); 58*0b57cec5SDimitry Andric OS << Data; 59*0b57cec5SDimitry Andric } 60*0b57cec5SDimitry Andric 61*0b57cec5SDimitry Andric using StringPair = std::pair<CachedHashStringRef, size_t>; 62*0b57cec5SDimitry Andric 63*0b57cec5SDimitry Andric void StringTableBuilder::write(uint8_t *Buf) const { 64*0b57cec5SDimitry Andric assert(isFinalized()); 65*0b57cec5SDimitry Andric for (const StringPair &P : StringIndexMap) { 66*0b57cec5SDimitry Andric StringRef Data = P.first.val(); 67*0b57cec5SDimitry Andric if (!Data.empty()) 68*0b57cec5SDimitry Andric memcpy(Buf + P.second, Data.data(), Data.size()); 69*0b57cec5SDimitry Andric } 70*0b57cec5SDimitry Andric if (K != WinCOFF) 71*0b57cec5SDimitry Andric return; 72*0b57cec5SDimitry Andric support::endian::write32le(Buf, Size); 73*0b57cec5SDimitry Andric } 74*0b57cec5SDimitry Andric 75*0b57cec5SDimitry Andric // Returns the character at Pos from end of a string. 76*0b57cec5SDimitry Andric static int charTailAt(StringPair *P, size_t Pos) { 77*0b57cec5SDimitry Andric StringRef S = P->first.val(); 78*0b57cec5SDimitry Andric if (Pos >= S.size()) 79*0b57cec5SDimitry Andric return -1; 80*0b57cec5SDimitry Andric return (unsigned char)S[S.size() - Pos - 1]; 81*0b57cec5SDimitry Andric } 82*0b57cec5SDimitry Andric 83*0b57cec5SDimitry Andric // Three-way radix quicksort. This is much faster than std::sort with strcmp 84*0b57cec5SDimitry Andric // because it does not compare characters that we already know the same. 85*0b57cec5SDimitry Andric static void multikeySort(MutableArrayRef<StringPair *> Vec, int Pos) { 86*0b57cec5SDimitry Andric tailcall: 87*0b57cec5SDimitry Andric if (Vec.size() <= 1) 88*0b57cec5SDimitry Andric return; 89*0b57cec5SDimitry Andric 90*0b57cec5SDimitry Andric // Partition items so that items in [0, I) are greater than the pivot, 91*0b57cec5SDimitry Andric // [I, J) are the same as the pivot, and [J, Vec.size()) are less than 92*0b57cec5SDimitry Andric // the pivot. 93*0b57cec5SDimitry Andric int Pivot = charTailAt(Vec[0], Pos); 94*0b57cec5SDimitry Andric size_t I = 0; 95*0b57cec5SDimitry Andric size_t J = Vec.size(); 96*0b57cec5SDimitry Andric for (size_t K = 1; K < J;) { 97*0b57cec5SDimitry Andric int C = charTailAt(Vec[K], Pos); 98*0b57cec5SDimitry Andric if (C > Pivot) 99*0b57cec5SDimitry Andric std::swap(Vec[I++], Vec[K++]); 100*0b57cec5SDimitry Andric else if (C < Pivot) 101*0b57cec5SDimitry Andric std::swap(Vec[--J], Vec[K]); 102*0b57cec5SDimitry Andric else 103*0b57cec5SDimitry Andric K++; 104*0b57cec5SDimitry Andric } 105*0b57cec5SDimitry Andric 106*0b57cec5SDimitry Andric multikeySort(Vec.slice(0, I), Pos); 107*0b57cec5SDimitry Andric multikeySort(Vec.slice(J), Pos); 108*0b57cec5SDimitry Andric 109*0b57cec5SDimitry Andric // multikeySort(Vec.slice(I, J - I), Pos + 1), but with 110*0b57cec5SDimitry Andric // tail call optimization. 111*0b57cec5SDimitry Andric if (Pivot != -1) { 112*0b57cec5SDimitry Andric Vec = Vec.slice(I, J - I); 113*0b57cec5SDimitry Andric ++Pos; 114*0b57cec5SDimitry Andric goto tailcall; 115*0b57cec5SDimitry Andric } 116*0b57cec5SDimitry Andric } 117*0b57cec5SDimitry Andric 118*0b57cec5SDimitry Andric void StringTableBuilder::finalize() { 119*0b57cec5SDimitry Andric assert(K != DWARF); 120*0b57cec5SDimitry Andric finalizeStringTable(/*Optimize=*/true); 121*0b57cec5SDimitry Andric } 122*0b57cec5SDimitry Andric 123*0b57cec5SDimitry Andric void StringTableBuilder::finalizeInOrder() { 124*0b57cec5SDimitry Andric finalizeStringTable(/*Optimize=*/false); 125*0b57cec5SDimitry Andric } 126*0b57cec5SDimitry Andric 127*0b57cec5SDimitry Andric void StringTableBuilder::finalizeStringTable(bool Optimize) { 128*0b57cec5SDimitry Andric Finalized = true; 129*0b57cec5SDimitry Andric 130*0b57cec5SDimitry Andric if (Optimize) { 131*0b57cec5SDimitry Andric std::vector<StringPair *> Strings; 132*0b57cec5SDimitry Andric Strings.reserve(StringIndexMap.size()); 133*0b57cec5SDimitry Andric for (StringPair &P : StringIndexMap) 134*0b57cec5SDimitry Andric Strings.push_back(&P); 135*0b57cec5SDimitry Andric 136*0b57cec5SDimitry Andric multikeySort(Strings, 0); 137*0b57cec5SDimitry Andric initSize(); 138*0b57cec5SDimitry Andric 139*0b57cec5SDimitry Andric StringRef Previous; 140*0b57cec5SDimitry Andric for (StringPair *P : Strings) { 141*0b57cec5SDimitry Andric StringRef S = P->first.val(); 142*0b57cec5SDimitry Andric if (Previous.endswith(S)) { 143*0b57cec5SDimitry Andric size_t Pos = Size - S.size() - (K != RAW); 144*0b57cec5SDimitry Andric if (!(Pos & (Alignment - 1))) { 145*0b57cec5SDimitry Andric P->second = Pos; 146*0b57cec5SDimitry Andric continue; 147*0b57cec5SDimitry Andric } 148*0b57cec5SDimitry Andric } 149*0b57cec5SDimitry Andric 150*0b57cec5SDimitry Andric Size = alignTo(Size, Alignment); 151*0b57cec5SDimitry Andric P->second = Size; 152*0b57cec5SDimitry Andric 153*0b57cec5SDimitry Andric Size += S.size(); 154*0b57cec5SDimitry Andric if (K != RAW) 155*0b57cec5SDimitry Andric ++Size; 156*0b57cec5SDimitry Andric Previous = S; 157*0b57cec5SDimitry Andric } 158*0b57cec5SDimitry Andric } 159*0b57cec5SDimitry Andric 160*0b57cec5SDimitry Andric if (K == MachO) 161*0b57cec5SDimitry Andric Size = alignTo(Size, 4); // Pad to multiple of 4. 162*0b57cec5SDimitry Andric 163*0b57cec5SDimitry Andric // The first byte in an ELF string table must be null, according to the ELF 164*0b57cec5SDimitry Andric // specification. In 'initSize()' we reserved the first byte to hold null for 165*0b57cec5SDimitry Andric // this purpose and here we actually add the string to allow 'getOffset()' to 166*0b57cec5SDimitry Andric // be called on an empty string. 167*0b57cec5SDimitry Andric if (K == ELF) 168*0b57cec5SDimitry Andric StringIndexMap[CachedHashStringRef("")] = 0; 169*0b57cec5SDimitry Andric } 170*0b57cec5SDimitry Andric 171*0b57cec5SDimitry Andric void StringTableBuilder::clear() { 172*0b57cec5SDimitry Andric Finalized = false; 173*0b57cec5SDimitry Andric StringIndexMap.clear(); 174*0b57cec5SDimitry Andric } 175*0b57cec5SDimitry Andric 176*0b57cec5SDimitry Andric size_t StringTableBuilder::getOffset(CachedHashStringRef S) const { 177*0b57cec5SDimitry Andric assert(isFinalized()); 178*0b57cec5SDimitry Andric auto I = StringIndexMap.find(S); 179*0b57cec5SDimitry Andric assert(I != StringIndexMap.end() && "String is not in table!"); 180*0b57cec5SDimitry Andric return I->second; 181*0b57cec5SDimitry Andric } 182*0b57cec5SDimitry Andric 183*0b57cec5SDimitry Andric size_t StringTableBuilder::add(CachedHashStringRef S) { 184*0b57cec5SDimitry Andric if (K == WinCOFF) 185*0b57cec5SDimitry Andric assert(S.size() > COFF::NameSize && "Short string in COFF string table!"); 186*0b57cec5SDimitry Andric 187*0b57cec5SDimitry Andric assert(!isFinalized()); 188*0b57cec5SDimitry Andric auto P = StringIndexMap.insert(std::make_pair(S, 0)); 189*0b57cec5SDimitry Andric if (P.second) { 190*0b57cec5SDimitry Andric size_t Start = alignTo(Size, Alignment); 191*0b57cec5SDimitry Andric P.first->second = Start; 192*0b57cec5SDimitry Andric Size = Start + S.size() + (K != RAW); 193*0b57cec5SDimitry Andric } 194*0b57cec5SDimitry Andric return P.first->second; 195*0b57cec5SDimitry Andric } 196