xref: /llvm-project/lld/MachO/BPSectionOrderer.cpp (revision e0c7f081f1582d49f81ec4c6cdbf5d6ef13c58ba)
1 //===- BPSectionOrderer.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "BPSectionOrderer.h"
10 #include "InputSection.h"
11 #include "Relocations.h"
12 #include "Symbols.h"
13 #include "lld/Common/BPSectionOrdererBase.inc"
14 #include "llvm/ADT/DenseMap.h"
15 #include "llvm/ADT/StableHashing.h"
16 #include "llvm/Support/Endian.h"
17 #include "llvm/Support/xxhash.h"
18 
19 #define DEBUG_TYPE "bp-section-orderer"
20 
21 using namespace llvm;
22 using namespace lld::macho;
23 
24 namespace {
25 struct BPOrdererMachO;
26 }
27 template <> struct lld::BPOrdererTraits<struct BPOrdererMachO> {
28   using Section = macho::InputSection;
29   using Symbol = macho::Symbol;
30 };
31 namespace {
32 struct BPOrdererMachO : lld::BPOrderer<BPOrdererMachO> {
33   static uint64_t getSize(const Section &sec) { return sec.getSize(); }
34   static bool isCodeSection(const Section &sec) {
35     return macho::isCodeSection(&sec);
36   }
37   static SmallVector<Symbol *, 0> getSymbols(const Section &sec) {
38     SmallVector<Symbol *, 0> symbols;
39     for (auto *sym : sec.symbols)
40       if (auto *d = llvm::dyn_cast_or_null<Defined>(sym))
41         symbols.emplace_back(d);
42     return symbols;
43   }
44 
45   // Linkage names can be prefixed with "_" or "l_" on Mach-O. See
46   // Mangler::getNameWithPrefix() for details.
47   std::optional<StringRef> static getResolvedLinkageName(llvm::StringRef name) {
48     if (name.consume_front("_") || name.consume_front("l_"))
49       return name;
50     return {};
51   }
52 
53   static void
54   getSectionHashes(const Section &sec, llvm::SmallVectorImpl<uint64_t> &hashes,
55                    const llvm::DenseMap<const void *, uint64_t> &sectionToIdx) {
56     constexpr unsigned windowSize = 4;
57 
58     // Calculate content hashes: k-mers and the last k-1 bytes.
59     ArrayRef<uint8_t> data = sec.data;
60     if (data.size() >= windowSize)
61       for (size_t i = 0; i <= data.size() - windowSize; ++i)
62         hashes.push_back(llvm::support::endian::read32le(data.data() + i));
63     for (uint8_t byte : data.take_back(windowSize - 1))
64       hashes.push_back(byte);
65 
66     // Calculate relocation hashes
67     for (const auto &r : sec.relocs) {
68       if (r.length == 0 || r.referent.isNull() || r.offset >= data.size())
69         continue;
70 
71       uint64_t relocHash = getRelocHash(r, sectionToIdx);
72       uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1;
73       for (uint32_t i = start; i < r.offset + r.length; i++) {
74         auto window = data.drop_front(i).take_front(windowSize);
75         hashes.push_back(xxh3_64bits(window) ^ relocHash);
76       }
77     }
78 
79     llvm::sort(hashes);
80     hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
81   }
82 
83   static llvm::StringRef getSymName(const Symbol &sym) { return sym.getName(); }
84   static uint64_t getSymValue(const Symbol &sym) {
85     if (auto *d = dyn_cast<Defined>(&sym))
86       return d->value;
87     return 0;
88   }
89   static uint64_t getSymSize(const Symbol &sym) {
90     if (auto *d = dyn_cast<Defined>(&sym))
91       return d->size;
92     return 0;
93   }
94 
95 private:
96   static uint64_t
97   getRelocHash(const Reloc &reloc,
98                const llvm::DenseMap<const void *, uint64_t> &sectionToIdx) {
99     auto *isec = reloc.getReferentInputSection();
100     std::optional<uint64_t> sectionIdx;
101     if (auto it = sectionToIdx.find(isec); it != sectionToIdx.end())
102       sectionIdx = it->second;
103     uint64_t kind = -1, value = 0;
104     if (isec)
105       kind = uint64_t(isec->kind());
106 
107     if (auto *sym = reloc.referent.dyn_cast<Symbol *>()) {
108       kind = (kind << 8) | uint8_t(sym->kind());
109       if (auto *d = llvm::dyn_cast<Defined>(sym))
110         value = d->value;
111     }
112     return llvm::stable_hash_combine(kind, sectionIdx.value_or(0), value,
113                                      reloc.addend);
114   }
115 };
116 } // namespace
117 
118 DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
119     StringRef profilePath, bool forFunctionCompression, bool forDataCompression,
120     bool compressionSortStartupFunctions, bool verbose) {
121   // Collect candidate sections and associated symbols.
122   SmallVector<InputSection *> sections;
123   DenseMap<CachedHashStringRef, DenseSet<unsigned>> rootSymbolToSectionIdxs;
124   for (const auto *file : inputFiles) {
125     for (auto *sec : file->sections) {
126       for (auto &subsec : sec->subsections) {
127         auto *isec = subsec.isec;
128         if (!isec || isec->data.empty())
129           continue;
130         size_t idx = sections.size();
131         sections.emplace_back(isec);
132         for (auto *sym : BPOrdererMachO::getSymbols(*isec)) {
133           auto rootName = getRootSymbol(sym->getName());
134           rootSymbolToSectionIdxs[CachedHashStringRef(rootName)].insert(idx);
135           if (auto linkageName =
136                   BPOrdererMachO::getResolvedLinkageName(rootName))
137             rootSymbolToSectionIdxs[CachedHashStringRef(*linkageName)].insert(
138                 idx);
139         }
140       }
141     }
142   }
143 
144   return BPOrdererMachO::computeOrder(profilePath, forFunctionCompression,
145                                       forDataCompression,
146                                       compressionSortStartupFunctions, verbose,
147                                       sections, rootSymbolToSectionIdxs);
148 }
149