xref: /llvm-project/bolt/lib/Core/BinarySection.cpp (revision 996553228f8b2f3219451a2514bd6f9380f13e28)
1 //===- bolt/Core/BinarySection.cpp - Section in a binary file -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinarySection class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinarySection.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Utils/Utils.h"
16 #include "llvm/MC/MCStreamer.h"
17 #include "llvm/Support/CommandLine.h"
18 
19 #define DEBUG_TYPE "bolt"
20 
21 using namespace llvm;
22 using namespace bolt;
23 
24 namespace opts {
25 extern cl::opt<bool> PrintRelocations;
26 extern cl::opt<bool> HotData;
27 } // namespace opts
28 
29 uint64_t BinarySection::Count = 0;
30 
31 bool BinarySection::isELF() const { return BC.isELF(); }
32 
33 bool BinarySection::isMachO() const { return BC.isMachO(); }
34 
35 uint64_t
36 BinarySection::hash(const BinaryData &BD,
37                     std::map<const BinaryData *, uint64_t> &Cache) const {
38   auto Itr = Cache.find(&BD);
39   if (Itr != Cache.end())
40     return Itr->second;
41 
42   hash_code Hash =
43       hash_combine(hash_value(BD.getSize()), hash_value(BD.getSectionName()));
44 
45   Cache[&BD] = Hash;
46 
47   if (!containsRange(BD.getAddress(), BD.getSize()))
48     return Hash;
49 
50   uint64_t Offset = BD.getAddress() - getAddress();
51   const uint64_t EndOffset = BD.getEndAddress() - getAddress();
52   auto Begin = Relocations.lower_bound(Relocation{Offset, 0, 0, 0, 0});
53   auto End = Relocations.upper_bound(Relocation{EndOffset, 0, 0, 0, 0});
54   const StringRef Contents = getContents();
55 
56   while (Begin != End) {
57     const Relocation &Rel = *Begin++;
58     Hash = hash_combine(
59         Hash, hash_value(Contents.substr(Offset, Begin->Offset - Offset)));
60     if (BinaryData *RelBD = BC.getBinaryDataByName(Rel.Symbol->getName()))
61       Hash = hash_combine(Hash, hash(*RelBD, Cache));
62     Offset = Rel.Offset + Rel.getSize();
63   }
64 
65   Hash = hash_combine(Hash,
66                       hash_value(Contents.substr(Offset, EndOffset - Offset)));
67 
68   Cache[&BD] = Hash;
69 
70   return Hash;
71 }
72 
73 void BinarySection::emitAsData(MCStreamer &Streamer,
74                                const Twine &SectionName) const {
75   StringRef SectionContents =
76       isFinalized() ? getOutputContents() : getContents();
77   MCSectionELF *ELFSection =
78       BC.Ctx->getELFSection(SectionName, getELFType(), getELFFlags());
79 
80   Streamer.switchSection(ELFSection);
81   Streamer.emitValueToAlignment(getAlign());
82 
83   if (BC.HasRelocations && opts::HotData && isReordered())
84     Streamer.emitLabel(BC.Ctx->getOrCreateSymbol("__hot_data_start"));
85 
86   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitting "
87                     << (isAllocatable() ? "" : "non-")
88                     << "allocatable data section " << SectionName << '\n');
89 
90   if (!hasRelocations()) {
91     Streamer.emitBytes(SectionContents);
92   } else {
93     uint64_t SectionOffset = 0;
94     for (auto RI = Relocations.begin(), RE = Relocations.end(); RI != RE;) {
95       auto RelocationOffset = RI->Offset;
96       assert(RelocationOffset < SectionContents.size() && "overflow detected");
97 
98       if (SectionOffset < RelocationOffset) {
99         Streamer.emitBytes(SectionContents.substr(
100             SectionOffset, RelocationOffset - SectionOffset));
101         SectionOffset = RelocationOffset;
102       }
103 
104       // Get iterators to all relocations with the same offset. Usually, there
105       // is only one such relocation but there can be more for composed
106       // relocations.
107       auto ROI = RI;
108       auto ROE = Relocations.upper_bound(RelocationOffset);
109 
110       // Start from the next offset on the next iteration.
111       RI = ROE;
112 
113       // Skip undefined symbols.
114       auto HasUndefSym = [this](const auto &Relocation) {
115         return BC.UndefinedSymbols.count(Relocation.Symbol);
116       };
117 
118       if (std::any_of(ROI, ROE, HasUndefSym))
119         continue;
120 
121 #ifndef NDEBUG
122       for (const auto &Relocation : make_range(ROI, ROE)) {
123         LLVM_DEBUG(
124             dbgs() << "BOLT-DEBUG: emitting relocation for symbol "
125                    << (Relocation.Symbol ? Relocation.Symbol->getName()
126                                          : StringRef("<none>"))
127                    << " at offset 0x" << Twine::utohexstr(Relocation.Offset)
128                    << " with size "
129                    << Relocation::getSizeForType(Relocation.Type) << '\n');
130       }
131 #endif
132 
133       size_t RelocationSize = Relocation::emit(ROI, ROE, &Streamer);
134       SectionOffset += RelocationSize;
135     }
136     assert(SectionOffset <= SectionContents.size() && "overflow error");
137     if (SectionOffset < SectionContents.size())
138       Streamer.emitBytes(SectionContents.substr(SectionOffset));
139   }
140 
141   if (BC.HasRelocations && opts::HotData && isReordered())
142     Streamer.emitLabel(BC.Ctx->getOrCreateSymbol("__hot_data_end"));
143 }
144 
145 uint64_t BinarySection::write(raw_ostream &OS) const {
146   const uint64_t NumValidContentBytes =
147       std::min<uint64_t>(getOutputContents().size(), getOutputSize());
148   OS.write(getOutputContents().data(), NumValidContentBytes);
149   if (getOutputSize() > NumValidContentBytes)
150     OS.write_zeros(getOutputSize() - NumValidContentBytes);
151   return getOutputSize();
152 }
153 
154 void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS,
155                                             SymbolResolverFuncTy Resolver) {
156   if (PendingRelocations.empty() && Patches.empty())
157     return;
158 
159   const uint64_t SectionAddress = getAddress();
160 
161   // We apply relocations to original section contents. For allocatable sections
162   // this means using their input file offsets, since the output file offset
163   // could change (e.g. for new instance of .text). For non-allocatable
164   // sections, the output offset should always be a valid one.
165   const uint64_t SectionFileOffset =
166       isAllocatable() ? getInputFileOffset() : getOutputFileOffset();
167   LLVM_DEBUG(
168       dbgs() << "BOLT-DEBUG: flushing pending relocations for section "
169              << getName() << '\n'
170              << "  address: 0x" << Twine::utohexstr(SectionAddress) << '\n'
171              << "  offset: 0x" << Twine::utohexstr(SectionFileOffset) << '\n');
172 
173   for (BinaryPatch &Patch : Patches)
174     OS.pwrite(Patch.Bytes.data(), Patch.Bytes.size(),
175               SectionFileOffset + Patch.Offset);
176 
177   for (Relocation &Reloc : PendingRelocations) {
178     uint64_t Value = Reloc.Addend;
179     if (Reloc.Symbol)
180       Value += Resolver(Reloc.Symbol);
181 
182     Value = Relocation::encodeValue(Reloc.Type, Value,
183                                     SectionAddress + Reloc.Offset);
184 
185     OS.pwrite(reinterpret_cast<const char *>(&Value),
186               Relocation::getSizeForType(Reloc.Type),
187               SectionFileOffset + Reloc.Offset);
188 
189     LLVM_DEBUG(
190         dbgs() << "BOLT-DEBUG: writing value 0x" << Twine::utohexstr(Value)
191                << " of size " << Relocation::getSizeForType(Reloc.Type)
192                << " at section offset 0x" << Twine::utohexstr(Reloc.Offset)
193                << " address 0x"
194                << Twine::utohexstr(SectionAddress + Reloc.Offset)
195                << " file offset 0x"
196                << Twine::utohexstr(SectionFileOffset + Reloc.Offset) << '\n';);
197   }
198 
199   clearList(PendingRelocations);
200 }
201 
202 BinarySection::~BinarySection() { updateContents(nullptr, 0); }
203 
204 void BinarySection::clearRelocations() { clearList(Relocations); }
205 
206 void BinarySection::print(raw_ostream &OS) const {
207   OS << getName() << ", "
208      << "0x" << Twine::utohexstr(getAddress()) << ", " << getSize() << " (0x"
209      << Twine::utohexstr(getOutputAddress()) << ", " << getOutputSize() << ")"
210      << ", data = " << getData() << ", output data = " << getOutputData();
211 
212   if (isAllocatable())
213     OS << " (allocatable)";
214 
215   if (isVirtual())
216     OS << " (virtual)";
217 
218   if (isTLS())
219     OS << " (tls)";
220 
221   if (opts::PrintRelocations)
222     for (const Relocation &R : relocations())
223       OS << "\n  " << R;
224 }
225 
226 BinarySection::RelocationSetType
227 BinarySection::reorderRelocations(bool Inplace) const {
228   assert(PendingRelocations.empty() &&
229          "reordering pending relocations not supported");
230   RelocationSetType NewRelocations;
231   for (const Relocation &Rel : relocations()) {
232     uint64_t RelAddr = Rel.Offset + getAddress();
233     BinaryData *BD = BC.getBinaryDataContainingAddress(RelAddr);
234     BD = BD->getAtomicRoot();
235     assert(BD);
236 
237     if ((!BD->isMoved() && !Inplace) || BD->isJumpTable())
238       continue;
239 
240     Relocation NewRel(Rel);
241     uint64_t RelOffset = RelAddr - BD->getAddress();
242     NewRel.Offset = BD->getOutputOffset() + RelOffset;
243     assert(NewRel.Offset < getSize());
244     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: moving " << Rel << " -> " << NewRel
245                       << "\n");
246     NewRelocations.emplace(std::move(NewRel));
247   }
248   return NewRelocations;
249 }
250 
251 void BinarySection::reorderContents(const std::vector<BinaryData *> &Order,
252                                     bool Inplace) {
253   IsReordered = true;
254 
255   Relocations = reorderRelocations(Inplace);
256 
257   std::string Str;
258   raw_string_ostream OS(Str);
259   const char *Src = Contents.data();
260   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: reorderContents for " << Name << "\n");
261   for (BinaryData *BD : Order) {
262     assert((BD->isMoved() || !Inplace) && !BD->isJumpTable());
263     assert(BD->isAtomic() && BD->isMoveable());
264     const uint64_t SrcOffset = BD->getAddress() - getAddress();
265     assert(SrcOffset < Contents.size());
266     assert(SrcOffset == BD->getOffset());
267     while (OS.tell() < BD->getOutputOffset())
268       OS.write((unsigned char)0);
269     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << BD->getName() << " @ " << OS.tell()
270                       << "\n");
271     OS.write(&Src[SrcOffset], BD->getOutputSize());
272   }
273   if (Relocations.empty()) {
274     // If there are no existing relocations, tack a phony one at the end
275     // of the reordered segment to force LLVM to recognize and map this
276     // section.
277     MCSymbol *ZeroSym = BC.registerNameAtAddress("Zero", 0, 0, 0);
278     addRelocation(OS.tell(), ZeroSym, Relocation::getAbs64(), 0xdeadbeef);
279 
280     uint64_t Zero = 0;
281     OS.write(reinterpret_cast<const char *>(&Zero), sizeof(Zero));
282   }
283   auto *NewData = reinterpret_cast<char *>(copyByteArray(OS.str()));
284   Contents = OutputContents = StringRef(NewData, OS.str().size());
285   OutputSize = Contents.size();
286 }
287 
288 std::string BinarySection::encodeELFNote(StringRef NameStr, StringRef DescStr,
289                                          uint32_t Type) {
290   std::string Str;
291   raw_string_ostream OS(Str);
292   const uint32_t NameSz = NameStr.size() + 1;
293   const uint32_t DescSz = DescStr.size();
294   OS.write(reinterpret_cast<const char *>(&(NameSz)), 4);
295   OS.write(reinterpret_cast<const char *>(&(DescSz)), 4);
296   OS.write(reinterpret_cast<const char *>(&(Type)), 4);
297   OS << NameStr << '\0';
298   for (uint64_t I = NameSz; I < alignTo(NameSz, 4); ++I)
299     OS << '\0';
300   OS << DescStr;
301   for (uint64_t I = DescStr.size(); I < alignTo(DescStr.size(), 4); ++I)
302     OS << '\0';
303   return OS.str();
304 }
305