xref: /openbsd-src/gnu/llvm/lld/wasm/InputChunks.h (revision dfe94b169149f14cc1aee2cf6dad58a8d9a1860c)
1ece8a530Spatrick //===- InputChunks.h --------------------------------------------*- C++ -*-===//
2ece8a530Spatrick //
3ece8a530Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4ece8a530Spatrick // See https://llvm.org/LICENSE.txt for license information.
5ece8a530Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6ece8a530Spatrick //
7ece8a530Spatrick //===----------------------------------------------------------------------===//
8ece8a530Spatrick //
9ece8a530Spatrick // An InputChunks represents an indivisible opaque region of a input wasm file.
10ece8a530Spatrick // i.e. a single wasm data segment or a single wasm function.
11ece8a530Spatrick //
12ece8a530Spatrick // They are written directly to the mmap'd output file after which relocations
13ece8a530Spatrick // are applied.  Because each Chunk is independent they can be written in
14ece8a530Spatrick // parallel.
15ece8a530Spatrick //
16ece8a530Spatrick // Chunks are also unit on which garbage collection (--gc-sections) operates.
17ece8a530Spatrick //
18ece8a530Spatrick //===----------------------------------------------------------------------===//
19ece8a530Spatrick 
20ece8a530Spatrick #ifndef LLD_WASM_INPUT_CHUNKS_H
21ece8a530Spatrick #define LLD_WASM_INPUT_CHUNKS_H
22ece8a530Spatrick 
23ece8a530Spatrick #include "Config.h"
24ece8a530Spatrick #include "InputFiles.h"
25ece8a530Spatrick #include "lld/Common/ErrorHandler.h"
26ece8a530Spatrick #include "lld/Common/LLVM.h"
271cf9926bSpatrick #include "llvm/ADT/CachedHashString.h"
281cf9926bSpatrick #include "llvm/MC/StringTableBuilder.h"
29ece8a530Spatrick #include "llvm/Object/Wasm.h"
30*dfe94b16Srobert #include <optional>
31ece8a530Spatrick 
32ece8a530Spatrick namespace lld {
33ece8a530Spatrick namespace wasm {
34ece8a530Spatrick 
35ece8a530Spatrick class ObjFile;
36ece8a530Spatrick class OutputSegment;
37ece8a530Spatrick class OutputSection;
38ece8a530Spatrick 
39ece8a530Spatrick class InputChunk {
40ece8a530Spatrick public:
411cf9926bSpatrick   enum Kind {
421cf9926bSpatrick     DataSegment,
431cf9926bSpatrick     Merge,
441cf9926bSpatrick     MergedChunk,
451cf9926bSpatrick     Function,
461cf9926bSpatrick     SyntheticFunction,
471cf9926bSpatrick     Section,
481cf9926bSpatrick   };
49ece8a530Spatrick 
501cf9926bSpatrick   StringRef name;
511cf9926bSpatrick   StringRef debugName;
52ece8a530Spatrick 
kind()531cf9926bSpatrick   Kind kind() const { return (Kind)sectionKind; }
54ece8a530Spatrick 
551cf9926bSpatrick   uint32_t getSize() const;
561cf9926bSpatrick   uint32_t getInputSize() const;
571cf9926bSpatrick 
581cf9926bSpatrick   void writeTo(uint8_t *buf) const;
591cf9926bSpatrick   void relocate(uint8_t *buf) const;
60ece8a530Spatrick 
getRelocations()61ece8a530Spatrick   ArrayRef<WasmRelocation> getRelocations() const { return relocations; }
setRelocations(ArrayRef<WasmRelocation> rs)62ece8a530Spatrick   void setRelocations(ArrayRef<WasmRelocation> rs) { relocations = rs; }
63ece8a530Spatrick 
641cf9926bSpatrick   // Translate an offset into the input chunk to an offset in the output
651cf9926bSpatrick   // section.
661cf9926bSpatrick   uint64_t getOffset(uint64_t offset) const;
671cf9926bSpatrick   // Translate an offset into the input chunk into an offset into the output
681cf9926bSpatrick   // chunk.  For data segments (InputSegment) this will return and offset into
691cf9926bSpatrick   // the output segment.  For MergeInputChunk, this will return an offset into
701cf9926bSpatrick   // the parent merged chunk.  For other chunk types this is no-op and we just
711cf9926bSpatrick   // return unmodified offset.
721cf9926bSpatrick   uint64_t getChunkOffset(uint64_t offset) const;
731cf9926bSpatrick   uint64_t getVA(uint64_t offset = 0) const;
741cf9926bSpatrick 
getComdat()751cf9926bSpatrick   uint32_t getComdat() const { return comdat; }
76ece8a530Spatrick   StringRef getComdatName() const;
getInputSectionOffset()771cf9926bSpatrick   uint32_t getInputSectionOffset() const { return inputSectionOffset; }
78ece8a530Spatrick 
getNumRelocations()79ece8a530Spatrick   size_t getNumRelocations() const { return relocations.size(); }
80ece8a530Spatrick   void writeRelocations(llvm::raw_ostream &os) const;
811cf9926bSpatrick   void generateRelocationCode(raw_ostream &os) const;
821cf9926bSpatrick 
isTLS()83*dfe94b16Srobert   bool isTLS() const { return flags & llvm::wasm::WASM_SEG_FLAG_TLS; }
84ece8a530Spatrick 
85ece8a530Spatrick   ObjFile *file;
861cf9926bSpatrick   OutputSection *outputSec = nullptr;
871cf9926bSpatrick   uint32_t comdat = UINT32_MAX;
881cf9926bSpatrick   uint32_t inputSectionOffset = 0;
891cf9926bSpatrick   uint32_t alignment;
901cf9926bSpatrick   uint32_t flags;
911cf9926bSpatrick 
921cf9926bSpatrick   // Only applies to data segments.
931cf9926bSpatrick   uint32_t outputSegmentOffset = 0;
941cf9926bSpatrick   const OutputSegment *outputSeg = nullptr;
951cf9926bSpatrick 
961cf9926bSpatrick   // After assignAddresses is called, this represents the offset from
971cf9926bSpatrick   // the beginning of the output section this chunk was assigned to.
981cf9926bSpatrick   int32_t outSecOff = 0;
991cf9926bSpatrick 
1001cf9926bSpatrick   uint8_t sectionKind : 3;
101ece8a530Spatrick 
102ece8a530Spatrick   // Signals that the section is part of the output.  The garbage collector,
103ece8a530Spatrick   // and COMDAT handling can set a sections' Live bit.
104ece8a530Spatrick   // If GC is disabled, all sections start out as live by default.
105ece8a530Spatrick   unsigned live : 1;
106ece8a530Spatrick 
107ece8a530Spatrick   // Signals the chunk was discarded by COMDAT handling.
108ece8a530Spatrick   unsigned discarded : 1;
109ece8a530Spatrick 
110ece8a530Spatrick protected:
1111cf9926bSpatrick   InputChunk(ObjFile *f, Kind k, StringRef name, uint32_t alignment = 0,
1121cf9926bSpatrick              uint32_t flags = 0)
name(name)1131cf9926bSpatrick       : name(name), file(f), alignment(alignment), flags(flags), sectionKind(k),
1141cf9926bSpatrick         live(!config->gcSections), discarded(false) {}
data()1151cf9926bSpatrick   ArrayRef<uint8_t> data() const { return rawData; }
1161cf9926bSpatrick   uint64_t getTombstone() const;
117ece8a530Spatrick 
118ece8a530Spatrick   ArrayRef<WasmRelocation> relocations;
1191cf9926bSpatrick   ArrayRef<uint8_t> rawData;
120ece8a530Spatrick };
121ece8a530Spatrick 
122ece8a530Spatrick // Represents a WebAssembly data segment which can be included as part of
123ece8a530Spatrick // an output data segments.  Note that in WebAssembly, unlike ELF and other
124ece8a530Spatrick // formats, used the term "data segment" to refer to the continuous regions of
125ece8a530Spatrick // memory that make on the data section. See:
126ece8a530Spatrick // https://webassembly.github.io/spec/syntax/modules.html#syntax-data
127ece8a530Spatrick //
128ece8a530Spatrick // For example, by default, clang will produce a separate data section for
129ece8a530Spatrick // each global variable.
130ece8a530Spatrick class InputSegment : public InputChunk {
131ece8a530Spatrick public:
InputSegment(const WasmSegment & seg,ObjFile * f)132ece8a530Spatrick   InputSegment(const WasmSegment &seg, ObjFile *f)
1331cf9926bSpatrick       : InputChunk(f, InputChunk::DataSegment, seg.Data.Name,
1341cf9926bSpatrick                    seg.Data.Alignment, seg.Data.LinkingFlags),
1351cf9926bSpatrick         segment(seg) {
1361cf9926bSpatrick     rawData = segment.Data.Content;
1371cf9926bSpatrick     comdat = segment.Data.Comdat;
1381cf9926bSpatrick     inputSectionOffset = segment.SectionOffset;
1391cf9926bSpatrick   }
140ece8a530Spatrick 
classof(const InputChunk * c)141ece8a530Spatrick   static bool classof(const InputChunk *c) { return c->kind() == DataSegment; }
142ece8a530Spatrick 
1431cf9926bSpatrick protected:
1441cf9926bSpatrick   const WasmSegment &segment;
1451cf9926bSpatrick };
146ece8a530Spatrick 
1471cf9926bSpatrick class SyntheticMergedChunk;
1481cf9926bSpatrick 
1491cf9926bSpatrick // Merge segment handling copied from lld/ELF/InputSection.h.  Keep in sync
1501cf9926bSpatrick // where possible.
1511cf9926bSpatrick 
1521cf9926bSpatrick // SectionPiece represents a piece of splittable segment contents.
1531cf9926bSpatrick // We allocate a lot of these and binary search on them. This means that they
1541cf9926bSpatrick // have to be as compact as possible, which is why we don't store the size (can
1551cf9926bSpatrick // be found by looking at the next one).
1561cf9926bSpatrick struct SectionPiece {
SectionPieceSectionPiece1571cf9926bSpatrick   SectionPiece(size_t off, uint32_t hash, bool live)
1581cf9926bSpatrick       : inputOff(off), live(live || !config->gcSections), hash(hash >> 1) {}
1591cf9926bSpatrick 
1601cf9926bSpatrick   uint32_t inputOff;
1611cf9926bSpatrick   uint32_t live : 1;
1621cf9926bSpatrick   uint32_t hash : 31;
1631cf9926bSpatrick   uint64_t outputOff = 0;
1641cf9926bSpatrick };
1651cf9926bSpatrick 
1661cf9926bSpatrick static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big");
1671cf9926bSpatrick 
1681cf9926bSpatrick // This corresponds segments marked as WASM_SEG_FLAG_STRINGS.
1691cf9926bSpatrick class MergeInputChunk : public InputChunk {
1701cf9926bSpatrick public:
MergeInputChunk(const WasmSegment & seg,ObjFile * f)1711cf9926bSpatrick   MergeInputChunk(const WasmSegment &seg, ObjFile *f)
1721cf9926bSpatrick       : InputChunk(f, Merge, seg.Data.Name, seg.Data.Alignment,
1731cf9926bSpatrick                    seg.Data.LinkingFlags) {
1741cf9926bSpatrick     rawData = seg.Data.Content;
1751cf9926bSpatrick     comdat = seg.Data.Comdat;
1761cf9926bSpatrick     inputSectionOffset = seg.SectionOffset;
177ece8a530Spatrick   }
178ece8a530Spatrick 
MergeInputChunk(const WasmSection & s,ObjFile * f)1791cf9926bSpatrick   MergeInputChunk(const WasmSection &s, ObjFile *f)
1801cf9926bSpatrick       : InputChunk(f, Merge, s.Name, 0, llvm::wasm::WASM_SEG_FLAG_STRINGS) {
1811cf9926bSpatrick     assert(s.Type == llvm::wasm::WASM_SEC_CUSTOM);
1821cf9926bSpatrick     comdat = s.Comdat;
1831cf9926bSpatrick     rawData = s.Content;
1841cf9926bSpatrick   }
1851cf9926bSpatrick 
classof(const InputChunk * s)1861cf9926bSpatrick   static bool classof(const InputChunk *s) { return s->kind() == Merge; }
1871cf9926bSpatrick   void splitIntoPieces();
1881cf9926bSpatrick 
1891cf9926bSpatrick   // Translate an offset in the input section to an offset in the parent
1901cf9926bSpatrick   // MergeSyntheticSection.
1911cf9926bSpatrick   uint64_t getParentOffset(uint64_t offset) const;
1921cf9926bSpatrick 
1931cf9926bSpatrick   // Splittable sections are handled as a sequence of data
1941cf9926bSpatrick   // rather than a single large blob of data.
1951cf9926bSpatrick   std::vector<SectionPiece> pieces;
1961cf9926bSpatrick 
1971cf9926bSpatrick   // Returns I'th piece's data. This function is very hot when
1981cf9926bSpatrick   // string merging is enabled, so we want to inline.
1991cf9926bSpatrick   LLVM_ATTRIBUTE_ALWAYS_INLINE
getData(size_t i)2001cf9926bSpatrick   llvm::CachedHashStringRef getData(size_t i) const {
2011cf9926bSpatrick     size_t begin = pieces[i].inputOff;
2021cf9926bSpatrick     size_t end =
2031cf9926bSpatrick         (pieces.size() - 1 == i) ? data().size() : pieces[i + 1].inputOff;
2041cf9926bSpatrick     return {toStringRef(data().slice(begin, end - begin)), pieces[i].hash};
2051cf9926bSpatrick   }
2061cf9926bSpatrick 
2071cf9926bSpatrick   // Returns the SectionPiece at a given input section offset.
2081cf9926bSpatrick   SectionPiece *getSectionPiece(uint64_t offset);
getSectionPiece(uint64_t offset)2091cf9926bSpatrick   const SectionPiece *getSectionPiece(uint64_t offset) const {
2101cf9926bSpatrick     return const_cast<MergeInputChunk *>(this)->getSectionPiece(offset);
2111cf9926bSpatrick   }
2121cf9926bSpatrick 
2131cf9926bSpatrick   SyntheticMergedChunk *parent = nullptr;
2141cf9926bSpatrick 
2151cf9926bSpatrick private:
2161cf9926bSpatrick   void splitStrings(ArrayRef<uint8_t> a);
2171cf9926bSpatrick };
2181cf9926bSpatrick 
2191cf9926bSpatrick // SyntheticMergedChunk is a class that allows us to put mergeable
2201cf9926bSpatrick // sections with different attributes in a single output sections. To do that we
2211cf9926bSpatrick // put them into SyntheticMergedChunk synthetic input sections which are
2221cf9926bSpatrick // attached to regular output sections.
2231cf9926bSpatrick class SyntheticMergedChunk : public InputChunk {
2241cf9926bSpatrick public:
SyntheticMergedChunk(StringRef name,uint32_t alignment,uint32_t flags)2251cf9926bSpatrick   SyntheticMergedChunk(StringRef name, uint32_t alignment, uint32_t flags)
2261cf9926bSpatrick       : InputChunk(nullptr, InputChunk::MergedChunk, name, alignment, flags),
227*dfe94b16Srobert         builder(llvm::StringTableBuilder::RAW, llvm::Align(1ULL << alignment)) {
228*dfe94b16Srobert   }
2291cf9926bSpatrick 
classof(const InputChunk * c)2301cf9926bSpatrick   static bool classof(const InputChunk *c) {
2311cf9926bSpatrick     return c->kind() == InputChunk::MergedChunk;
2321cf9926bSpatrick   }
2331cf9926bSpatrick 
addMergeChunk(MergeInputChunk * ms)2341cf9926bSpatrick   void addMergeChunk(MergeInputChunk *ms) {
2351cf9926bSpatrick     comdat = ms->getComdat();
2361cf9926bSpatrick     ms->parent = this;
2371cf9926bSpatrick     chunks.push_back(ms);
2381cf9926bSpatrick   }
2391cf9926bSpatrick 
2401cf9926bSpatrick   void finalizeContents();
2411cf9926bSpatrick 
2421cf9926bSpatrick   llvm::StringTableBuilder builder;
243ece8a530Spatrick 
244ece8a530Spatrick protected:
2451cf9926bSpatrick   std::vector<MergeInputChunk *> chunks;
246ece8a530Spatrick };
247ece8a530Spatrick 
248ece8a530Spatrick // Represents a single wasm function within and input file.  These are
249ece8a530Spatrick // combined to create the final output CODE section.
250ece8a530Spatrick class InputFunction : public InputChunk {
251ece8a530Spatrick public:
InputFunction(const WasmSignature & s,const WasmFunction * func,ObjFile * f)252ece8a530Spatrick   InputFunction(const WasmSignature &s, const WasmFunction *func, ObjFile *f)
2531cf9926bSpatrick       : InputChunk(f, InputChunk::Function, func->SymbolName), signature(s),
254*dfe94b16Srobert         function(func),
255*dfe94b16Srobert         exportName(func && func->ExportName ? (*func->ExportName).str()
256*dfe94b16Srobert                                             : std::optional<std::string>()) {
2571cf9926bSpatrick     inputSectionOffset = function->CodeSectionOffset;
2581cf9926bSpatrick     rawData =
2591cf9926bSpatrick         file->codeSection->Content.slice(inputSectionOffset, function->Size);
2601cf9926bSpatrick     debugName = function->DebugName;
2611cf9926bSpatrick     comdat = function->Comdat;
2621cf9926bSpatrick   }
2631cf9926bSpatrick 
InputFunction(StringRef name,const WasmSignature & s)2641cf9926bSpatrick   InputFunction(StringRef name, const WasmSignature &s)
2651cf9926bSpatrick       : InputChunk(nullptr, InputChunk::Function, name), signature(s) {}
266ece8a530Spatrick 
classof(const InputChunk * c)267ece8a530Spatrick   static bool classof(const InputChunk *c) {
268ece8a530Spatrick     return c->kind() == InputChunk::Function ||
269ece8a530Spatrick            c->kind() == InputChunk::SyntheticFunction;
270ece8a530Spatrick   }
271ece8a530Spatrick 
getExportName()272*dfe94b16Srobert   std::optional<StringRef> getExportName() const {
273*dfe94b16Srobert     return exportName ? std::optional<StringRef>(*exportName)
274*dfe94b16Srobert                       : std::optional<StringRef>();
275ece8a530Spatrick   }
setExportName(std::string exportName)2761cf9926bSpatrick   void setExportName(std::string exportName) { this->exportName = exportName; }
getFunctionInputOffset()277ece8a530Spatrick   uint32_t getFunctionInputOffset() const { return getInputSectionOffset(); }
getFunctionCodeOffset()278ece8a530Spatrick   uint32_t getFunctionCodeOffset() const { return function->CodeOffset; }
getFunctionIndex()279*dfe94b16Srobert   uint32_t getFunctionIndex() const { return *functionIndex; }
hasFunctionIndex()280*dfe94b16Srobert   bool hasFunctionIndex() const { return functionIndex.has_value(); }
281ece8a530Spatrick   void setFunctionIndex(uint32_t index);
getTableIndex()282*dfe94b16Srobert   uint32_t getTableIndex() const { return *tableIndex; }
hasTableIndex()283*dfe94b16Srobert   bool hasTableIndex() const { return tableIndex.has_value(); }
284ece8a530Spatrick   void setTableIndex(uint32_t index);
2851cf9926bSpatrick   void writeCompressed(uint8_t *buf) const;
286ece8a530Spatrick 
287ece8a530Spatrick   // The size of a given input function can depend on the values of the
288ece8a530Spatrick   // LEB relocations within it.  This finalizeContents method is called after
289ece8a530Spatrick   // all the symbol values have be calculated but before getSize() is ever
290ece8a530Spatrick   // called.
291ece8a530Spatrick   void calculateSize();
292ece8a530Spatrick 
293ece8a530Spatrick   const WasmSignature &signature;
294ece8a530Spatrick 
getCompressedSize()2951cf9926bSpatrick   uint32_t getCompressedSize() const {
2961cf9926bSpatrick     assert(compressedSize);
2971cf9926bSpatrick     return compressedSize;
298ece8a530Spatrick   }
299ece8a530Spatrick 
300ece8a530Spatrick   const WasmFunction *function;
3011cf9926bSpatrick 
3021cf9926bSpatrick protected:
303*dfe94b16Srobert   std::optional<std::string> exportName;
304*dfe94b16Srobert   std::optional<uint32_t> functionIndex;
305*dfe94b16Srobert   std::optional<uint32_t> tableIndex;
306ece8a530Spatrick   uint32_t compressedFuncSize = 0;
307ece8a530Spatrick   uint32_t compressedSize = 0;
308ece8a530Spatrick };
309ece8a530Spatrick 
310ece8a530Spatrick class SyntheticFunction : public InputFunction {
311ece8a530Spatrick public:
312ece8a530Spatrick   SyntheticFunction(const WasmSignature &s, StringRef name,
313ece8a530Spatrick                     StringRef debugName = {})
InputFunction(name,s)3141cf9926bSpatrick       : InputFunction(name, s) {
315ece8a530Spatrick     sectionKind = InputChunk::SyntheticFunction;
3161cf9926bSpatrick     this->debugName = debugName;
317ece8a530Spatrick   }
318ece8a530Spatrick 
classof(const InputChunk * c)319ece8a530Spatrick   static bool classof(const InputChunk *c) {
320ece8a530Spatrick     return c->kind() == InputChunk::SyntheticFunction;
321ece8a530Spatrick   }
322ece8a530Spatrick 
setBody(ArrayRef<uint8_t> body)3231cf9926bSpatrick   void setBody(ArrayRef<uint8_t> body) { rawData = body; }
324ece8a530Spatrick };
325ece8a530Spatrick 
326ece8a530Spatrick // Represents a single Wasm Section within an input file.
327ece8a530Spatrick class InputSection : public InputChunk {
328ece8a530Spatrick public:
InputSection(const WasmSection & s,ObjFile * f)329ece8a530Spatrick   InputSection(const WasmSection &s, ObjFile *f)
3301cf9926bSpatrick       : InputChunk(f, InputChunk::Section, s.Name),
3311cf9926bSpatrick         tombstoneValue(getTombstoneForSection(s.Name)), section(s) {
332ece8a530Spatrick     assert(section.Type == llvm::wasm::WASM_SEC_CUSTOM);
3331cf9926bSpatrick     comdat = section.Comdat;
3341cf9926bSpatrick     rawData = section.Content;
335ece8a530Spatrick   }
336ece8a530Spatrick 
classof(const InputChunk * c)3371cf9926bSpatrick   static bool classof(const InputChunk *c) {
3381cf9926bSpatrick     return c->kind() == InputChunk::Section;
3391cf9926bSpatrick   }
340ece8a530Spatrick 
3411cf9926bSpatrick   const uint64_t tombstoneValue;
342ece8a530Spatrick 
343ece8a530Spatrick protected:
3441cf9926bSpatrick   static uint64_t getTombstoneForSection(StringRef name);
345ece8a530Spatrick   const WasmSection &section;
346ece8a530Spatrick };
347ece8a530Spatrick 
348ece8a530Spatrick } // namespace wasm
349ece8a530Spatrick 
350ece8a530Spatrick std::string toString(const wasm::InputChunk *);
351ece8a530Spatrick StringRef relocTypeToString(uint8_t relocType);
352ece8a530Spatrick 
353ece8a530Spatrick } // namespace lld
354ece8a530Spatrick 
355ece8a530Spatrick #endif // LLD_WASM_INPUT_CHUNKS_H
356