10b57cec5SDimitry Andric //===- StringTableBuilder.cpp - String table building utility -------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "llvm/MC/StringTableBuilder.h" 100b57cec5SDimitry Andric #include "llvm/ADT/CachedHashString.h" 110b57cec5SDimitry Andric #include "llvm/ADT/SmallString.h" 120b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 130b57cec5SDimitry Andric #include "llvm/BinaryFormat/COFF.h" 140b57cec5SDimitry Andric #include "llvm/Support/Endian.h" 150b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h" 160b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 170b57cec5SDimitry Andric #include <cassert> 180b57cec5SDimitry Andric #include <cstddef> 190b57cec5SDimitry Andric #include <cstdint> 200b57cec5SDimitry Andric #include <cstring> 210b57cec5SDimitry Andric #include <utility> 220b57cec5SDimitry Andric #include <vector> 230b57cec5SDimitry Andric 240b57cec5SDimitry Andric using namespace llvm; 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric StringTableBuilder::~StringTableBuilder() = default; 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric void StringTableBuilder::initSize() { 290b57cec5SDimitry Andric // Account for leading bytes in table so that offsets returned from add are 300b57cec5SDimitry Andric // correct. 310b57cec5SDimitry Andric switch (K) { 320b57cec5SDimitry Andric case RAW: 330b57cec5SDimitry Andric case DWARF: 340b57cec5SDimitry Andric Size = 0; 350b57cec5SDimitry Andric break; 36*e8d8bef9SDimitry Andric case MachOLinked: 37*e8d8bef9SDimitry Andric case MachO64Linked: 38*e8d8bef9SDimitry Andric Size = 2; 39*e8d8bef9SDimitry Andric break; 400b57cec5SDimitry Andric case MachO: 41*e8d8bef9SDimitry Andric case MachO64: 420b57cec5SDimitry Andric case ELF: 430b57cec5SDimitry Andric // Start the table with a NUL byte. 440b57cec5SDimitry Andric Size = 1; 450b57cec5SDimitry Andric break; 468bcb0991SDimitry Andric case XCOFF: 470b57cec5SDimitry Andric case WinCOFF: 480b57cec5SDimitry Andric // Make room to write the table size later. 490b57cec5SDimitry Andric Size = 4; 500b57cec5SDimitry Andric break; 510b57cec5SDimitry Andric } 520b57cec5SDimitry Andric } 530b57cec5SDimitry Andric 540b57cec5SDimitry Andric StringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment) 550b57cec5SDimitry Andric : K(K), Alignment(Alignment) { 560b57cec5SDimitry Andric initSize(); 570b57cec5SDimitry Andric } 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric void StringTableBuilder::write(raw_ostream &OS) const { 600b57cec5SDimitry Andric assert(isFinalized()); 610b57cec5SDimitry Andric SmallString<0> Data; 620b57cec5SDimitry Andric Data.resize(getSize()); 630b57cec5SDimitry Andric write((uint8_t *)Data.data()); 640b57cec5SDimitry Andric OS << Data; 650b57cec5SDimitry Andric } 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric using StringPair = std::pair<CachedHashStringRef, size_t>; 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric void StringTableBuilder::write(uint8_t *Buf) const { 700b57cec5SDimitry Andric assert(isFinalized()); 710b57cec5SDimitry Andric for (const StringPair &P : StringIndexMap) { 720b57cec5SDimitry Andric StringRef Data = P.first.val(); 730b57cec5SDimitry Andric if (!Data.empty()) 740b57cec5SDimitry Andric memcpy(Buf + P.second, Data.data(), Data.size()); 750b57cec5SDimitry Andric } 768bcb0991SDimitry Andric // The COFF formats store the size of the string table in the first 4 bytes. 778bcb0991SDimitry Andric // For Windows, the format is little-endian; for AIX, it is big-endian. 788bcb0991SDimitry Andric if (K == WinCOFF) 790b57cec5SDimitry Andric support::endian::write32le(Buf, Size); 808bcb0991SDimitry Andric else if (K == XCOFF) 818bcb0991SDimitry Andric support::endian::write32be(Buf, Size); 820b57cec5SDimitry Andric } 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric // Returns the character at Pos from end of a string. 850b57cec5SDimitry Andric static int charTailAt(StringPair *P, size_t Pos) { 860b57cec5SDimitry Andric StringRef S = P->first.val(); 870b57cec5SDimitry Andric if (Pos >= S.size()) 880b57cec5SDimitry Andric return -1; 890b57cec5SDimitry Andric return (unsigned char)S[S.size() - Pos - 1]; 900b57cec5SDimitry Andric } 910b57cec5SDimitry Andric 920b57cec5SDimitry Andric // Three-way radix quicksort. This is much faster than std::sort with strcmp 930b57cec5SDimitry Andric // because it does not compare characters that we already know the same. 940b57cec5SDimitry Andric static void multikeySort(MutableArrayRef<StringPair *> Vec, int Pos) { 950b57cec5SDimitry Andric tailcall: 960b57cec5SDimitry Andric if (Vec.size() <= 1) 970b57cec5SDimitry Andric return; 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric // Partition items so that items in [0, I) are greater than the pivot, 1000b57cec5SDimitry Andric // [I, J) are the same as the pivot, and [J, Vec.size()) are less than 1010b57cec5SDimitry Andric // the pivot. 1020b57cec5SDimitry Andric int Pivot = charTailAt(Vec[0], Pos); 1030b57cec5SDimitry Andric size_t I = 0; 1040b57cec5SDimitry Andric size_t J = Vec.size(); 1050b57cec5SDimitry Andric for (size_t K = 1; K < J;) { 1060b57cec5SDimitry Andric int C = charTailAt(Vec[K], Pos); 1070b57cec5SDimitry Andric if (C > Pivot) 1080b57cec5SDimitry Andric std::swap(Vec[I++], Vec[K++]); 1090b57cec5SDimitry Andric else if (C < Pivot) 1100b57cec5SDimitry Andric std::swap(Vec[--J], Vec[K]); 1110b57cec5SDimitry Andric else 1120b57cec5SDimitry Andric K++; 1130b57cec5SDimitry Andric } 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric multikeySort(Vec.slice(0, I), Pos); 1160b57cec5SDimitry Andric multikeySort(Vec.slice(J), Pos); 1170b57cec5SDimitry Andric 1180b57cec5SDimitry Andric // multikeySort(Vec.slice(I, J - I), Pos + 1), but with 1190b57cec5SDimitry Andric // tail call optimization. 1200b57cec5SDimitry Andric if (Pivot != -1) { 1210b57cec5SDimitry Andric Vec = Vec.slice(I, J - I); 1220b57cec5SDimitry Andric ++Pos; 1230b57cec5SDimitry Andric goto tailcall; 1240b57cec5SDimitry Andric } 1250b57cec5SDimitry Andric } 1260b57cec5SDimitry Andric 1270b57cec5SDimitry Andric void StringTableBuilder::finalize() { 1280b57cec5SDimitry Andric assert(K != DWARF); 1290b57cec5SDimitry Andric finalizeStringTable(/*Optimize=*/true); 1300b57cec5SDimitry Andric } 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric void StringTableBuilder::finalizeInOrder() { 1330b57cec5SDimitry Andric finalizeStringTable(/*Optimize=*/false); 1340b57cec5SDimitry Andric } 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric void StringTableBuilder::finalizeStringTable(bool Optimize) { 1370b57cec5SDimitry Andric Finalized = true; 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric if (Optimize) { 1400b57cec5SDimitry Andric std::vector<StringPair *> Strings; 1410b57cec5SDimitry Andric Strings.reserve(StringIndexMap.size()); 1420b57cec5SDimitry Andric for (StringPair &P : StringIndexMap) 1430b57cec5SDimitry Andric Strings.push_back(&P); 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric multikeySort(Strings, 0); 1460b57cec5SDimitry Andric initSize(); 1470b57cec5SDimitry Andric 1480b57cec5SDimitry Andric StringRef Previous; 1490b57cec5SDimitry Andric for (StringPair *P : Strings) { 1500b57cec5SDimitry Andric StringRef S = P->first.val(); 1510b57cec5SDimitry Andric if (Previous.endswith(S)) { 1520b57cec5SDimitry Andric size_t Pos = Size - S.size() - (K != RAW); 1530b57cec5SDimitry Andric if (!(Pos & (Alignment - 1))) { 1540b57cec5SDimitry Andric P->second = Pos; 1550b57cec5SDimitry Andric continue; 1560b57cec5SDimitry Andric } 1570b57cec5SDimitry Andric } 1580b57cec5SDimitry Andric 1590b57cec5SDimitry Andric Size = alignTo(Size, Alignment); 1600b57cec5SDimitry Andric P->second = Size; 1610b57cec5SDimitry Andric 1620b57cec5SDimitry Andric Size += S.size(); 1630b57cec5SDimitry Andric if (K != RAW) 1640b57cec5SDimitry Andric ++Size; 1650b57cec5SDimitry Andric Previous = S; 1660b57cec5SDimitry Andric } 1670b57cec5SDimitry Andric } 1680b57cec5SDimitry Andric 169*e8d8bef9SDimitry Andric if (K == MachO || K == MachOLinked) 1700b57cec5SDimitry Andric Size = alignTo(Size, 4); // Pad to multiple of 4. 171*e8d8bef9SDimitry Andric if (K == MachO64 || K == MachO64Linked) 172*e8d8bef9SDimitry Andric Size = alignTo(Size, 8); // Pad to multiple of 8. 173*e8d8bef9SDimitry Andric 174*e8d8bef9SDimitry Andric // According to ld64 the string table of a final linked Mach-O binary starts 175*e8d8bef9SDimitry Andric // with " ", i.e. the first byte is ' ' and the second byte is zero. In 176*e8d8bef9SDimitry Andric // 'initSize()' we reserved the first two bytes for holding this string. 177*e8d8bef9SDimitry Andric if (K == MachOLinked || K == MachO64Linked) 178*e8d8bef9SDimitry Andric StringIndexMap[CachedHashStringRef(" ")] = 0; 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric // The first byte in an ELF string table must be null, according to the ELF 1810b57cec5SDimitry Andric // specification. In 'initSize()' we reserved the first byte to hold null for 1820b57cec5SDimitry Andric // this purpose and here we actually add the string to allow 'getOffset()' to 1830b57cec5SDimitry Andric // be called on an empty string. 1840b57cec5SDimitry Andric if (K == ELF) 1850b57cec5SDimitry Andric StringIndexMap[CachedHashStringRef("")] = 0; 1860b57cec5SDimitry Andric } 1870b57cec5SDimitry Andric 1880b57cec5SDimitry Andric void StringTableBuilder::clear() { 1890b57cec5SDimitry Andric Finalized = false; 1900b57cec5SDimitry Andric StringIndexMap.clear(); 1910b57cec5SDimitry Andric } 1920b57cec5SDimitry Andric 1930b57cec5SDimitry Andric size_t StringTableBuilder::getOffset(CachedHashStringRef S) const { 1940b57cec5SDimitry Andric assert(isFinalized()); 1950b57cec5SDimitry Andric auto I = StringIndexMap.find(S); 1960b57cec5SDimitry Andric assert(I != StringIndexMap.end() && "String is not in table!"); 1970b57cec5SDimitry Andric return I->second; 1980b57cec5SDimitry Andric } 1990b57cec5SDimitry Andric 2000b57cec5SDimitry Andric size_t StringTableBuilder::add(CachedHashStringRef S) { 2010b57cec5SDimitry Andric if (K == WinCOFF) 2020b57cec5SDimitry Andric assert(S.size() > COFF::NameSize && "Short string in COFF string table!"); 2030b57cec5SDimitry Andric 2040b57cec5SDimitry Andric assert(!isFinalized()); 2050b57cec5SDimitry Andric auto P = StringIndexMap.insert(std::make_pair(S, 0)); 2060b57cec5SDimitry Andric if (P.second) { 2070b57cec5SDimitry Andric size_t Start = alignTo(Size, Alignment); 2080b57cec5SDimitry Andric P.first->second = Start; 2090b57cec5SDimitry Andric Size = Start + S.size() + (K != RAW); 2100b57cec5SDimitry Andric } 2110b57cec5SDimitry Andric return P.first->second; 2120b57cec5SDimitry Andric } 213