xref: /llvm-project/bolt/lib/Core/BinarySection.cpp (revision 1a2f83366b86433bb86f3b60fa19b3f096313a21)
1 //===- bolt/Core/BinarySection.cpp - Section in a binary file -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinarySection class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinarySection.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Utils/Utils.h"
16 #include "llvm/MC/MCStreamer.h"
17 #include "llvm/Support/CommandLine.h"
18 
19 #define DEBUG_TYPE "bolt"
20 
21 using namespace llvm;
22 using namespace bolt;
23 
24 namespace opts {
25 extern cl::opt<bool> PrintRelocations;
26 extern cl::opt<bool> HotData;
27 } // namespace opts
28 
29 uint64_t BinarySection::Count = 0;
30 
31 bool BinarySection::isELF() const { return BC.isELF(); }
32 
33 bool BinarySection::isMachO() const { return BC.isMachO(); }
34 
35 uint64_t
36 BinarySection::hash(const BinaryData &BD,
37                     std::map<const BinaryData *, uint64_t> &Cache) const {
38   auto Itr = Cache.find(&BD);
39   if (Itr != Cache.end())
40     return Itr->second;
41 
42   hash_code Hash =
43       hash_combine(hash_value(BD.getSize()), hash_value(BD.getSectionName()));
44 
45   Cache[&BD] = Hash;
46 
47   if (!containsRange(BD.getAddress(), BD.getSize()))
48     return Hash;
49 
50   uint64_t Offset = BD.getAddress() - getAddress();
51   const uint64_t EndOffset = BD.getEndAddress() - getAddress();
52   auto Begin = Relocations.lower_bound(Relocation{Offset, 0, 0, 0, 0});
53   auto End = Relocations.upper_bound(Relocation{EndOffset, 0, 0, 0, 0});
54   const StringRef Contents = getContents();
55 
56   while (Begin != End) {
57     const Relocation &Rel = *Begin++;
58     Hash = hash_combine(
59         Hash, hash_value(Contents.substr(Offset, Begin->Offset - Offset)));
60     if (BinaryData *RelBD = BC.getBinaryDataByName(Rel.Symbol->getName()))
61       Hash = hash_combine(Hash, hash(*RelBD, Cache));
62     Offset = Rel.Offset + Rel.getSize();
63   }
64 
65   Hash = hash_combine(Hash,
66                       hash_value(Contents.substr(Offset, EndOffset - Offset)));
67 
68   Cache[&BD] = Hash;
69 
70   return Hash;
71 }
72 
73 void BinarySection::emitAsData(MCStreamer &Streamer,
74                                const Twine &SectionName) const {
75   StringRef SectionContents = getContents();
76   MCSectionELF *ELFSection =
77       BC.Ctx->getELFSection(SectionName, getELFType(), getELFFlags());
78 
79   Streamer.switchSection(ELFSection);
80   Streamer.emitValueToAlignment(getAlign());
81 
82   if (BC.HasRelocations && opts::HotData && isReordered())
83     Streamer.emitLabel(BC.Ctx->getOrCreateSymbol("__hot_data_start"));
84 
85   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitting "
86                     << (isAllocatable() ? "" : "non-")
87                     << "allocatable data section " << SectionName << '\n');
88 
89   if (!hasRelocations()) {
90     Streamer.emitBytes(SectionContents);
91   } else {
92     uint64_t SectionOffset = 0;
93     for (auto RI = Relocations.begin(), RE = Relocations.end(); RI != RE;) {
94       auto RelocationOffset = RI->Offset;
95       assert(RelocationOffset < SectionContents.size() && "overflow detected");
96 
97       if (SectionOffset < RelocationOffset) {
98         Streamer.emitBytes(SectionContents.substr(
99             SectionOffset, RelocationOffset - SectionOffset));
100         SectionOffset = RelocationOffset;
101       }
102 
103       // Get iterators to all relocations with the same offset. Usually, there
104       // is only one such relocation but there can be more for composed
105       // relocations.
106       auto ROI = RI;
107       auto ROE = Relocations.upper_bound(RelocationOffset);
108 
109       // Start from the next offset on the next iteration.
110       RI = ROE;
111 
112       // Skip undefined symbols.
113       auto HasUndefSym = [this](const auto &Relocation) {
114         return BC.UndefinedSymbols.count(Relocation.Symbol);
115       };
116 
117       if (std::any_of(ROI, ROE, HasUndefSym))
118         continue;
119 
120 #ifndef NDEBUG
121       for (const auto &Relocation : make_range(ROI, ROE)) {
122         LLVM_DEBUG(
123             dbgs() << "BOLT-DEBUG: emitting relocation for symbol "
124                    << (Relocation.Symbol ? Relocation.Symbol->getName()
125                                          : StringRef("<none>"))
126                    << " at offset 0x" << Twine::utohexstr(Relocation.Offset)
127                    << " with size "
128                    << Relocation::getSizeForType(Relocation.Type) << '\n');
129       }
130 #endif
131 
132       size_t RelocationSize = Relocation::emit(ROI, ROE, &Streamer);
133       SectionOffset += RelocationSize;
134     }
135     assert(SectionOffset <= SectionContents.size() && "overflow error");
136     if (SectionOffset < SectionContents.size())
137       Streamer.emitBytes(SectionContents.substr(SectionOffset));
138   }
139 
140   if (BC.HasRelocations && opts::HotData && isReordered())
141     Streamer.emitLabel(BC.Ctx->getOrCreateSymbol("__hot_data_end"));
142 }
143 
144 void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS,
145                                             SymbolResolverFuncTy Resolver) {
146   if (PendingRelocations.empty() && Patches.empty())
147     return;
148 
149   const uint64_t SectionAddress = getAddress();
150 
151   // We apply relocations to original section contents. For allocatable sections
152   // this means using their input file offsets, since the output file offset
153   // could change (e.g. for new instance of .text). For non-allocatable
154   // sections, the output offset should always be a valid one.
155   const uint64_t SectionFileOffset =
156       isAllocatable() ? getInputFileOffset() : getOutputFileOffset();
157   LLVM_DEBUG(
158       dbgs() << "BOLT-DEBUG: flushing pending relocations for section "
159              << getName() << '\n'
160              << "  address: 0x" << Twine::utohexstr(SectionAddress) << '\n'
161              << "  offset: 0x" << Twine::utohexstr(SectionFileOffset) << '\n');
162 
163   for (BinaryPatch &Patch : Patches)
164     OS.pwrite(Patch.Bytes.data(), Patch.Bytes.size(),
165               SectionFileOffset + Patch.Offset);
166 
167   for (Relocation &Reloc : PendingRelocations) {
168     uint64_t Value = Reloc.Addend;
169     if (Reloc.Symbol)
170       Value += Resolver(Reloc.Symbol);
171 
172     Value = Relocation::encodeValue(Reloc.Type, Value,
173                                     SectionAddress + Reloc.Offset);
174 
175     OS.pwrite(reinterpret_cast<const char *>(&Value),
176               Relocation::getSizeForType(Reloc.Type),
177               SectionFileOffset + Reloc.Offset);
178 
179     LLVM_DEBUG(
180         dbgs() << "BOLT-DEBUG: writing value 0x" << Twine::utohexstr(Value)
181                << " of size " << Relocation::getSizeForType(Reloc.Type)
182                << " at section offset 0x" << Twine::utohexstr(Reloc.Offset)
183                << " address 0x"
184                << Twine::utohexstr(SectionAddress + Reloc.Offset)
185                << " file offset 0x"
186                << Twine::utohexstr(SectionFileOffset + Reloc.Offset) << '\n';);
187   }
188 
189   clearList(PendingRelocations);
190 }
191 
192 BinarySection::~BinarySection() {
193   if (isReordered()) {
194     delete[] getData();
195     return;
196   }
197 
198   if (!isAllocatable() && !hasValidSectionID() &&
199       (!hasSectionRef() ||
200        OutputContents.data() != getContents(Section).data())) {
201     delete[] getOutputData();
202   }
203 }
204 
205 void BinarySection::clearRelocations() { clearList(Relocations); }
206 
207 void BinarySection::print(raw_ostream &OS) const {
208   OS << getName() << ", "
209      << "0x" << Twine::utohexstr(getAddress()) << ", " << getSize() << " (0x"
210      << Twine::utohexstr(getOutputAddress()) << ", " << getOutputSize() << ")"
211      << ", data = " << getData() << ", output data = " << getOutputData();
212 
213   if (isAllocatable())
214     OS << " (allocatable)";
215 
216   if (isVirtual())
217     OS << " (virtual)";
218 
219   if (isTLS())
220     OS << " (tls)";
221 
222   if (opts::PrintRelocations)
223     for (const Relocation &R : relocations())
224       OS << "\n  " << R;
225 }
226 
227 BinarySection::RelocationSetType
228 BinarySection::reorderRelocations(bool Inplace) const {
229   assert(PendingRelocations.empty() &&
230          "reordering pending relocations not supported");
231   RelocationSetType NewRelocations;
232   for (const Relocation &Rel : relocations()) {
233     uint64_t RelAddr = Rel.Offset + getAddress();
234     BinaryData *BD = BC.getBinaryDataContainingAddress(RelAddr);
235     BD = BD->getAtomicRoot();
236     assert(BD);
237 
238     if ((!BD->isMoved() && !Inplace) || BD->isJumpTable())
239       continue;
240 
241     Relocation NewRel(Rel);
242     uint64_t RelOffset = RelAddr - BD->getAddress();
243     NewRel.Offset = BD->getOutputOffset() + RelOffset;
244     assert(NewRel.Offset < getSize());
245     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: moving " << Rel << " -> " << NewRel
246                       << "\n");
247     NewRelocations.emplace(std::move(NewRel));
248   }
249   return NewRelocations;
250 }
251 
252 void BinarySection::reorderContents(const std::vector<BinaryData *> &Order,
253                                     bool Inplace) {
254   IsReordered = true;
255 
256   Relocations = reorderRelocations(Inplace);
257 
258   std::string Str;
259   raw_string_ostream OS(Str);
260   const char *Src = Contents.data();
261   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: reorderContents for " << Name << "\n");
262   for (BinaryData *BD : Order) {
263     assert((BD->isMoved() || !Inplace) && !BD->isJumpTable());
264     assert(BD->isAtomic() && BD->isMoveable());
265     const uint64_t SrcOffset = BD->getAddress() - getAddress();
266     assert(SrcOffset < Contents.size());
267     assert(SrcOffset == BD->getOffset());
268     while (OS.tell() < BD->getOutputOffset())
269       OS.write((unsigned char)0);
270     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << BD->getName() << " @ " << OS.tell()
271                       << "\n");
272     OS.write(&Src[SrcOffset], BD->getOutputSize());
273   }
274   if (Relocations.empty()) {
275     // If there are no existing relocations, tack a phony one at the end
276     // of the reordered segment to force LLVM to recognize and map this
277     // section.
278     MCSymbol *ZeroSym = BC.registerNameAtAddress("Zero", 0, 0, 0);
279     addRelocation(OS.tell(), ZeroSym, Relocation::getAbs64(), 0xdeadbeef);
280 
281     uint64_t Zero = 0;
282     OS.write(reinterpret_cast<const char *>(&Zero), sizeof(Zero));
283   }
284   auto *NewData = reinterpret_cast<char *>(copyByteArray(OS.str()));
285   Contents = OutputContents = StringRef(NewData, OS.str().size());
286   OutputSize = Contents.size();
287 }
288 
289 std::string BinarySection::encodeELFNote(StringRef NameStr, StringRef DescStr,
290                                          uint32_t Type) {
291   std::string Str;
292   raw_string_ostream OS(Str);
293   const uint32_t NameSz = NameStr.size() + 1;
294   const uint32_t DescSz = DescStr.size();
295   OS.write(reinterpret_cast<const char *>(&(NameSz)), 4);
296   OS.write(reinterpret_cast<const char *>(&(DescSz)), 4);
297   OS.write(reinterpret_cast<const char *>(&(Type)), 4);
298   OS << NameStr << '\0';
299   for (uint64_t I = NameSz; I < alignTo(NameSz, 4); ++I)
300     OS << '\0';
301   OS << DescStr;
302   for (uint64_t I = DescStr.size(); I < alignTo(DescStr.size(), 4); ++I)
303     OS << '\0';
304   return OS.str();
305 }
306