xref: /llvm-project/bolt/lib/Core/BinarySection.cpp (revision 5daf2001a1e4d71ce1273a1e7e31cf6e6ac37c10)
1 //===- bolt/Core/BinarySection.cpp - Section in a binary file -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinarySection class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinarySection.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Utils/Utils.h"
16 #include "llvm/MC/MCStreamer.h"
17 #include "llvm/Support/CommandLine.h"
18 
19 #define DEBUG_TYPE "bolt"
20 
21 using namespace llvm;
22 using namespace bolt;
23 
24 namespace opts {
25 extern cl::opt<bool> PrintRelocations;
26 extern cl::opt<bool> HotData;
27 } // namespace opts
28 
29 uint64_t BinarySection::Count = 0;
30 
31 bool BinarySection::isELF() const { return BC.isELF(); }
32 
33 bool BinarySection::isMachO() const { return BC.isMachO(); }
34 
35 uint64_t
36 BinarySection::hash(const BinaryData &BD,
37                     std::map<const BinaryData *, uint64_t> &Cache) const {
38   auto Itr = Cache.find(&BD);
39   if (Itr != Cache.end())
40     return Itr->second;
41 
42   hash_code Hash =
43       hash_combine(hash_value(BD.getSize()), hash_value(BD.getSectionName()));
44 
45   Cache[&BD] = Hash;
46 
47   if (!containsRange(BD.getAddress(), BD.getSize()))
48     return Hash;
49 
50   uint64_t Offset = BD.getAddress() - getAddress();
51   const uint64_t EndOffset = BD.getEndAddress() - getAddress();
52   auto Begin = Relocations.lower_bound(Relocation{Offset, 0, 0, 0, 0});
53   auto End = Relocations.upper_bound(Relocation{EndOffset, 0, 0, 0, 0});
54   const StringRef Contents = getContents();
55 
56   while (Begin != End) {
57     const Relocation &Rel = *Begin++;
58     Hash = hash_combine(
59         Hash, hash_value(Contents.substr(Offset, Begin->Offset - Offset)));
60     if (BinaryData *RelBD = BC.getBinaryDataByName(Rel.Symbol->getName()))
61       Hash = hash_combine(Hash, hash(*RelBD, Cache));
62     Offset = Rel.Offset + Rel.getSize();
63   }
64 
65   Hash = hash_combine(Hash,
66                       hash_value(Contents.substr(Offset, EndOffset - Offset)));
67 
68   Cache[&BD] = Hash;
69 
70   return Hash;
71 }
72 
73 void BinarySection::emitAsData(MCStreamer &Streamer,
74                                const Twine &SectionName) const {
75   StringRef SectionContents =
76       isFinalized() ? getOutputContents() : getContents();
77   MCSectionELF *ELFSection =
78       BC.Ctx->getELFSection(SectionName, getELFType(), getELFFlags());
79 
80   Streamer.switchSection(ELFSection);
81   Streamer.emitValueToAlignment(getAlign());
82 
83   if (BC.HasRelocations && opts::HotData && isReordered())
84     Streamer.emitLabel(BC.Ctx->getOrCreateSymbol("__hot_data_start"));
85 
86   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitting "
87                     << (isAllocatable() ? "" : "non-")
88                     << "allocatable data section " << SectionName << '\n');
89 
90   if (!hasRelocations()) {
91     Streamer.emitBytes(SectionContents);
92   } else {
93     uint64_t SectionOffset = 0;
94     for (auto RI = Relocations.begin(), RE = Relocations.end(); RI != RE;) {
95       auto RelocationOffset = RI->Offset;
96       assert(RelocationOffset < SectionContents.size() && "overflow detected");
97 
98       if (SectionOffset < RelocationOffset) {
99         Streamer.emitBytes(SectionContents.substr(
100             SectionOffset, RelocationOffset - SectionOffset));
101         SectionOffset = RelocationOffset;
102       }
103 
104       // Get iterators to all relocations with the same offset. Usually, there
105       // is only one such relocation but there can be more for composed
106       // relocations.
107       auto ROI = RI;
108       auto ROE = Relocations.upper_bound(RelocationOffset);
109 
110       // Start from the next offset on the next iteration.
111       RI = ROE;
112 
113       // Skip undefined symbols.
114       auto HasUndefSym = [this](const auto &Relocation) {
115         return BC.UndefinedSymbols.count(Relocation.Symbol);
116       };
117 
118       if (std::any_of(ROI, ROE, HasUndefSym))
119         continue;
120 
121 #ifndef NDEBUG
122       for (const auto &Relocation : make_range(ROI, ROE)) {
123         LLVM_DEBUG(
124             dbgs() << "BOLT-DEBUG: emitting relocation for symbol "
125                    << (Relocation.Symbol ? Relocation.Symbol->getName()
126                                          : StringRef("<none>"))
127                    << " at offset 0x" << Twine::utohexstr(Relocation.Offset)
128                    << " with size "
129                    << Relocation::getSizeForType(Relocation.Type) << '\n');
130       }
131 #endif
132 
133       size_t RelocationSize = Relocation::emit(ROI, ROE, &Streamer);
134       SectionOffset += RelocationSize;
135     }
136     assert(SectionOffset <= SectionContents.size() && "overflow error");
137     if (SectionOffset < SectionContents.size())
138       Streamer.emitBytes(SectionContents.substr(SectionOffset));
139   }
140 
141   if (BC.HasRelocations && opts::HotData && isReordered())
142     Streamer.emitLabel(BC.Ctx->getOrCreateSymbol("__hot_data_end"));
143 }
144 
145 void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS,
146                                             SymbolResolverFuncTy Resolver) {
147   if (PendingRelocations.empty() && Patches.empty())
148     return;
149 
150   const uint64_t SectionAddress = getAddress();
151 
152   // We apply relocations to original section contents. For allocatable sections
153   // this means using their input file offsets, since the output file offset
154   // could change (e.g. for new instance of .text). For non-allocatable
155   // sections, the output offset should always be a valid one.
156   const uint64_t SectionFileOffset =
157       isAllocatable() ? getInputFileOffset() : getOutputFileOffset();
158   LLVM_DEBUG(
159       dbgs() << "BOLT-DEBUG: flushing pending relocations for section "
160              << getName() << '\n'
161              << "  address: 0x" << Twine::utohexstr(SectionAddress) << '\n'
162              << "  offset: 0x" << Twine::utohexstr(SectionFileOffset) << '\n');
163 
164   for (BinaryPatch &Patch : Patches)
165     OS.pwrite(Patch.Bytes.data(), Patch.Bytes.size(),
166               SectionFileOffset + Patch.Offset);
167 
168   for (Relocation &Reloc : PendingRelocations) {
169     uint64_t Value = Reloc.Addend;
170     if (Reloc.Symbol)
171       Value += Resolver(Reloc.Symbol);
172 
173     Value = Relocation::encodeValue(Reloc.Type, Value,
174                                     SectionAddress + Reloc.Offset);
175 
176     OS.pwrite(reinterpret_cast<const char *>(&Value),
177               Relocation::getSizeForType(Reloc.Type),
178               SectionFileOffset + Reloc.Offset);
179 
180     LLVM_DEBUG(
181         dbgs() << "BOLT-DEBUG: writing value 0x" << Twine::utohexstr(Value)
182                << " of size " << Relocation::getSizeForType(Reloc.Type)
183                << " at section offset 0x" << Twine::utohexstr(Reloc.Offset)
184                << " address 0x"
185                << Twine::utohexstr(SectionAddress + Reloc.Offset)
186                << " file offset 0x"
187                << Twine::utohexstr(SectionFileOffset + Reloc.Offset) << '\n';);
188   }
189 
190   clearList(PendingRelocations);
191 }
192 
193 BinarySection::~BinarySection() { updateContents(nullptr, 0); }
194 
195 void BinarySection::clearRelocations() { clearList(Relocations); }
196 
197 void BinarySection::print(raw_ostream &OS) const {
198   OS << getName() << ", "
199      << "0x" << Twine::utohexstr(getAddress()) << ", " << getSize() << " (0x"
200      << Twine::utohexstr(getOutputAddress()) << ", " << getOutputSize() << ")"
201      << ", data = " << getData() << ", output data = " << getOutputData();
202 
203   if (isAllocatable())
204     OS << " (allocatable)";
205 
206   if (isVirtual())
207     OS << " (virtual)";
208 
209   if (isTLS())
210     OS << " (tls)";
211 
212   if (opts::PrintRelocations)
213     for (const Relocation &R : relocations())
214       OS << "\n  " << R;
215 }
216 
217 BinarySection::RelocationSetType
218 BinarySection::reorderRelocations(bool Inplace) const {
219   assert(PendingRelocations.empty() &&
220          "reordering pending relocations not supported");
221   RelocationSetType NewRelocations;
222   for (const Relocation &Rel : relocations()) {
223     uint64_t RelAddr = Rel.Offset + getAddress();
224     BinaryData *BD = BC.getBinaryDataContainingAddress(RelAddr);
225     BD = BD->getAtomicRoot();
226     assert(BD);
227 
228     if ((!BD->isMoved() && !Inplace) || BD->isJumpTable())
229       continue;
230 
231     Relocation NewRel(Rel);
232     uint64_t RelOffset = RelAddr - BD->getAddress();
233     NewRel.Offset = BD->getOutputOffset() + RelOffset;
234     assert(NewRel.Offset < getSize());
235     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: moving " << Rel << " -> " << NewRel
236                       << "\n");
237     NewRelocations.emplace(std::move(NewRel));
238   }
239   return NewRelocations;
240 }
241 
242 void BinarySection::reorderContents(const std::vector<BinaryData *> &Order,
243                                     bool Inplace) {
244   IsReordered = true;
245 
246   Relocations = reorderRelocations(Inplace);
247 
248   std::string Str;
249   raw_string_ostream OS(Str);
250   const char *Src = Contents.data();
251   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: reorderContents for " << Name << "\n");
252   for (BinaryData *BD : Order) {
253     assert((BD->isMoved() || !Inplace) && !BD->isJumpTable());
254     assert(BD->isAtomic() && BD->isMoveable());
255     const uint64_t SrcOffset = BD->getAddress() - getAddress();
256     assert(SrcOffset < Contents.size());
257     assert(SrcOffset == BD->getOffset());
258     while (OS.tell() < BD->getOutputOffset())
259       OS.write((unsigned char)0);
260     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << BD->getName() << " @ " << OS.tell()
261                       << "\n");
262     OS.write(&Src[SrcOffset], BD->getOutputSize());
263   }
264   if (Relocations.empty()) {
265     // If there are no existing relocations, tack a phony one at the end
266     // of the reordered segment to force LLVM to recognize and map this
267     // section.
268     MCSymbol *ZeroSym = BC.registerNameAtAddress("Zero", 0, 0, 0);
269     addRelocation(OS.tell(), ZeroSym, Relocation::getAbs64(), 0xdeadbeef);
270 
271     uint64_t Zero = 0;
272     OS.write(reinterpret_cast<const char *>(&Zero), sizeof(Zero));
273   }
274   auto *NewData = reinterpret_cast<char *>(copyByteArray(OS.str()));
275   Contents = OutputContents = StringRef(NewData, OS.str().size());
276   OutputSize = Contents.size();
277 }
278 
279 std::string BinarySection::encodeELFNote(StringRef NameStr, StringRef DescStr,
280                                          uint32_t Type) {
281   std::string Str;
282   raw_string_ostream OS(Str);
283   const uint32_t NameSz = NameStr.size() + 1;
284   const uint32_t DescSz = DescStr.size();
285   OS.write(reinterpret_cast<const char *>(&(NameSz)), 4);
286   OS.write(reinterpret_cast<const char *>(&(DescSz)), 4);
287   OS.write(reinterpret_cast<const char *>(&(Type)), 4);
288   OS << NameStr << '\0';
289   for (uint64_t I = NameSz; I < alignTo(NameSz, 4); ++I)
290     OS << '\0';
291   OS << DescStr;
292   for (uint64_t I = DescStr.size(); I < alignTo(DescStr.size(), 4); ++I)
293     OS << '\0';
294   return OS.str();
295 }
296