xref: /llvm-project/bolt/lib/Core/BinarySection.cpp (revision 996553228f8b2f3219451a2514bd6f9380f13e28)
12f09f445SMaksim Panchenko //===- bolt/Core/BinarySection.cpp - Section in a binary file -------------===//
2a34c753fSRafael Auler //
3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information.
5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a34c753fSRafael Auler //
7a34c753fSRafael Auler //===----------------------------------------------------------------------===//
8a34c753fSRafael Auler //
92f09f445SMaksim Panchenko // This file implements the BinarySection class.
102f09f445SMaksim Panchenko //
11a34c753fSRafael Auler //===----------------------------------------------------------------------===//
12a34c753fSRafael Auler 
13a34c753fSRafael Auler #include "bolt/Core/BinarySection.h"
14a34c753fSRafael Auler #include "bolt/Core/BinaryContext.h"
15a34c753fSRafael Auler #include "bolt/Utils/Utils.h"
16a34c753fSRafael Auler #include "llvm/MC/MCStreamer.h"
17a34c753fSRafael Auler #include "llvm/Support/CommandLine.h"
18a34c753fSRafael Auler 
19a34c753fSRafael Auler #define DEBUG_TYPE "bolt"
20a34c753fSRafael Auler 
21a34c753fSRafael Auler using namespace llvm;
22a34c753fSRafael Auler using namespace bolt;
23a34c753fSRafael Auler 
24a34c753fSRafael Auler namespace opts {
25a34c753fSRafael Auler extern cl::opt<bool> PrintRelocations;
26a34c753fSRafael Auler extern cl::opt<bool> HotData;
2740c2e0faSMaksim Panchenko } // namespace opts
28a34c753fSRafael Auler 
295fca9c57SMaksim Panchenko uint64_t BinarySection::Count = 0;
305fca9c57SMaksim Panchenko 
3140c2e0faSMaksim Panchenko bool BinarySection::isELF() const { return BC.isELF(); }
32a34c753fSRafael Auler 
3340c2e0faSMaksim Panchenko bool BinarySection::isMachO() const { return BC.isMachO(); }
34a34c753fSRafael Auler 
35a34c753fSRafael Auler uint64_t
36a34c753fSRafael Auler BinarySection::hash(const BinaryData &BD,
37a34c753fSRafael Auler                     std::map<const BinaryData *, uint64_t> &Cache) const {
38a34c753fSRafael Auler   auto Itr = Cache.find(&BD);
39a34c753fSRafael Auler   if (Itr != Cache.end())
40a34c753fSRafael Auler     return Itr->second;
41a34c753fSRafael Auler 
4222a4aaf2SDenis Revunov   hash_code Hash =
4322a4aaf2SDenis Revunov       hash_combine(hash_value(BD.getSize()), hash_value(BD.getSectionName()));
4422a4aaf2SDenis Revunov 
4522a4aaf2SDenis Revunov   Cache[&BD] = Hash;
4622a4aaf2SDenis Revunov 
4722a4aaf2SDenis Revunov   if (!containsRange(BD.getAddress(), BD.getSize()))
4822a4aaf2SDenis Revunov     return Hash;
49a34c753fSRafael Auler 
50a34c753fSRafael Auler   uint64_t Offset = BD.getAddress() - getAddress();
51a34c753fSRafael Auler   const uint64_t EndOffset = BD.getEndAddress() - getAddress();
52a34c753fSRafael Auler   auto Begin = Relocations.lower_bound(Relocation{Offset, 0, 0, 0, 0});
53a34c753fSRafael Auler   auto End = Relocations.upper_bound(Relocation{EndOffset, 0, 0, 0, 0});
54a34c753fSRafael Auler   const StringRef Contents = getContents();
55a34c753fSRafael Auler 
56a34c753fSRafael Auler   while (Begin != End) {
57a34c753fSRafael Auler     const Relocation &Rel = *Begin++;
58a34c753fSRafael Auler     Hash = hash_combine(
5940c2e0faSMaksim Panchenko         Hash, hash_value(Contents.substr(Offset, Begin->Offset - Offset)));
603652483cSRafael Auler     if (BinaryData *RelBD = BC.getBinaryDataByName(Rel.Symbol->getName()))
61a34c753fSRafael Auler       Hash = hash_combine(Hash, hash(*RelBD, Cache));
62a34c753fSRafael Auler     Offset = Rel.Offset + Rel.getSize();
63a34c753fSRafael Auler   }
64a34c753fSRafael Auler 
6540c2e0faSMaksim Panchenko   Hash = hash_combine(Hash,
66a34c753fSRafael Auler                       hash_value(Contents.substr(Offset, EndOffset - Offset)));
67a34c753fSRafael Auler 
68a34c753fSRafael Auler   Cache[&BD] = Hash;
69a34c753fSRafael Auler 
70a34c753fSRafael Auler   return Hash;
71a34c753fSRafael Auler }
72a34c753fSRafael Auler 
734d3a0cadSMaksim Panchenko void BinarySection::emitAsData(MCStreamer &Streamer,
744d3a0cadSMaksim Panchenko                                const Twine &SectionName) const {
758075f0dbSMaksim Panchenko   StringRef SectionContents =
768075f0dbSMaksim Panchenko       isFinalized() ? getOutputContents() : getContents();
77a34c753fSRafael Auler   MCSectionELF *ELFSection =
78a34c753fSRafael Auler       BC.Ctx->getELFSection(SectionName, getELFType(), getELFFlags());
79a34c753fSRafael Auler 
80adf4142fSFangrui Song   Streamer.switchSection(ELFSection);
816c09ea3fSGuillaume Chatelet   Streamer.emitValueToAlignment(getAlign());
82a34c753fSRafael Auler 
83a34c753fSRafael Auler   if (BC.HasRelocations && opts::HotData && isReordered())
84a34c753fSRafael Auler     Streamer.emitLabel(BC.Ctx->getOrCreateSymbol("__hot_data_start"));
85a34c753fSRafael Auler 
86a34c753fSRafael Auler   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitting "
87a34c753fSRafael Auler                     << (isAllocatable() ? "" : "non-")
88a34c753fSRafael Auler                     << "allocatable data section " << SectionName << '\n');
89a34c753fSRafael Auler 
90a34c753fSRafael Auler   if (!hasRelocations()) {
91a34c753fSRafael Auler     Streamer.emitBytes(SectionContents);
92a34c753fSRafael Auler   } else {
93a34c753fSRafael Auler     uint64_t SectionOffset = 0;
94b4bb6211SJob Noorman     for (auto RI = Relocations.begin(), RE = Relocations.end(); RI != RE;) {
95b4bb6211SJob Noorman       auto RelocationOffset = RI->Offset;
96b4bb6211SJob Noorman       assert(RelocationOffset < SectionContents.size() && "overflow detected");
97b4bb6211SJob Noorman 
98b4bb6211SJob Noorman       if (SectionOffset < RelocationOffset) {
9940c2e0faSMaksim Panchenko         Streamer.emitBytes(SectionContents.substr(
100b4bb6211SJob Noorman             SectionOffset, RelocationOffset - SectionOffset));
101b4bb6211SJob Noorman         SectionOffset = RelocationOffset;
102a34c753fSRafael Auler       }
103b4bb6211SJob Noorman 
104b4bb6211SJob Noorman       // Get iterators to all relocations with the same offset. Usually, there
105b4bb6211SJob Noorman       // is only one such relocation but there can be more for composed
106b4bb6211SJob Noorman       // relocations.
107b4bb6211SJob Noorman       auto ROI = RI;
108b4bb6211SJob Noorman       auto ROE = Relocations.upper_bound(RelocationOffset);
109b4bb6211SJob Noorman 
110b4bb6211SJob Noorman       // Start from the next offset on the next iteration.
111b4bb6211SJob Noorman       RI = ROE;
112b4bb6211SJob Noorman 
113b4bb6211SJob Noorman       // Skip undefined symbols.
114b4bb6211SJob Noorman       auto HasUndefSym = [this](const auto &Relocation) {
115b4bb6211SJob Noorman         return BC.UndefinedSymbols.count(Relocation.Symbol);
116b4bb6211SJob Noorman       };
117b4bb6211SJob Noorman 
118b4bb6211SJob Noorman       if (std::any_of(ROI, ROE, HasUndefSym))
119b4bb6211SJob Noorman         continue;
120b4bb6211SJob Noorman 
121a132f5ebSKazu Hirata #ifndef NDEBUG
122b4bb6211SJob Noorman       for (const auto &Relocation : make_range(ROI, ROE)) {
123b4bb6211SJob Noorman         LLVM_DEBUG(
124b4bb6211SJob Noorman             dbgs() << "BOLT-DEBUG: emitting relocation for symbol "
125a34c753fSRafael Auler                    << (Relocation.Symbol ? Relocation.Symbol->getName()
126a34c753fSRafael Auler                                          : StringRef("<none>"))
127b4bb6211SJob Noorman                    << " at offset 0x" << Twine::utohexstr(Relocation.Offset)
128b4bb6211SJob Noorman                    << " with size "
129a34c753fSRafael Auler                    << Relocation::getSizeForType(Relocation.Type) << '\n');
130b4bb6211SJob Noorman       }
131a132f5ebSKazu Hirata #endif
132b4bb6211SJob Noorman 
133b4bb6211SJob Noorman       size_t RelocationSize = Relocation::emit(ROI, ROE, &Streamer);
134a34c753fSRafael Auler       SectionOffset += RelocationSize;
135a34c753fSRafael Auler     }
136a34c753fSRafael Auler     assert(SectionOffset <= SectionContents.size() && "overflow error");
1373652483cSRafael Auler     if (SectionOffset < SectionContents.size())
138a34c753fSRafael Auler       Streamer.emitBytes(SectionContents.substr(SectionOffset));
139a34c753fSRafael Auler   }
140a34c753fSRafael Auler 
141a34c753fSRafael Auler   if (BC.HasRelocations && opts::HotData && isReordered())
142a34c753fSRafael Auler     Streamer.emitLabel(BC.Ctx->getOrCreateSymbol("__hot_data_end"));
143a34c753fSRafael Auler }
144a34c753fSRafael Auler 
145*99655322SMaksim Panchenko uint64_t BinarySection::write(raw_ostream &OS) const {
146*99655322SMaksim Panchenko   const uint64_t NumValidContentBytes =
147*99655322SMaksim Panchenko       std::min<uint64_t>(getOutputContents().size(), getOutputSize());
148*99655322SMaksim Panchenko   OS.write(getOutputContents().data(), NumValidContentBytes);
149*99655322SMaksim Panchenko   if (getOutputSize() > NumValidContentBytes)
150*99655322SMaksim Panchenko     OS.write_zeros(getOutputSize() - NumValidContentBytes);
151*99655322SMaksim Panchenko   return getOutputSize();
152*99655322SMaksim Panchenko }
153*99655322SMaksim Panchenko 
154a34c753fSRafael Auler void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS,
155a34c753fSRafael Auler                                             SymbolResolverFuncTy Resolver) {
156a34c753fSRafael Auler   if (PendingRelocations.empty() && Patches.empty())
157a34c753fSRafael Auler     return;
158a34c753fSRafael Auler 
159a34c753fSRafael Auler   const uint64_t SectionAddress = getAddress();
160a34c753fSRafael Auler 
161a34c753fSRafael Auler   // We apply relocations to original section contents. For allocatable sections
162a34c753fSRafael Auler   // this means using their input file offsets, since the output file offset
163a34c753fSRafael Auler   // could change (e.g. for new instance of .text). For non-allocatable
164a34c753fSRafael Auler   // sections, the output offset should always be a valid one.
16540c2e0faSMaksim Panchenko   const uint64_t SectionFileOffset =
16640c2e0faSMaksim Panchenko       isAllocatable() ? getInputFileOffset() : getOutputFileOffset();
167a34c753fSRafael Auler   LLVM_DEBUG(
168a34c753fSRafael Auler       dbgs() << "BOLT-DEBUG: flushing pending relocations for section "
169a34c753fSRafael Auler              << getName() << '\n'
170a34c753fSRafael Auler              << "  address: 0x" << Twine::utohexstr(SectionAddress) << '\n'
171a34c753fSRafael Auler              << "  offset: 0x" << Twine::utohexstr(SectionFileOffset) << '\n');
172a34c753fSRafael Auler 
1733652483cSRafael Auler   for (BinaryPatch &Patch : Patches)
17440c2e0faSMaksim Panchenko     OS.pwrite(Patch.Bytes.data(), Patch.Bytes.size(),
175a34c753fSRafael Auler               SectionFileOffset + Patch.Offset);
176a34c753fSRafael Auler 
177a34c753fSRafael Auler   for (Relocation &Reloc : PendingRelocations) {
178a34c753fSRafael Auler     uint64_t Value = Reloc.Addend;
179a34c753fSRafael Auler     if (Reloc.Symbol)
180a34c753fSRafael Auler       Value += Resolver(Reloc.Symbol);
1814a4045f7SElvina Yakubova 
18277811752SRafael Auler     Value = Relocation::encodeValue(Reloc.Type, Value,
1834a4045f7SElvina Yakubova                                     SectionAddress + Reloc.Offset);
1844a4045f7SElvina Yakubova 
185a34c753fSRafael Auler     OS.pwrite(reinterpret_cast<const char *>(&Value),
186a34c753fSRafael Auler               Relocation::getSizeForType(Reloc.Type),
187a34c753fSRafael Auler               SectionFileOffset + Reloc.Offset);
1884a4045f7SElvina Yakubova 
189a34c753fSRafael Auler     LLVM_DEBUG(
190a34c753fSRafael Auler         dbgs() << "BOLT-DEBUG: writing value 0x" << Twine::utohexstr(Value)
191a34c753fSRafael Auler                << " of size " << Relocation::getSizeForType(Reloc.Type)
192a34c753fSRafael Auler                << " at section offset 0x" << Twine::utohexstr(Reloc.Offset)
193a34c753fSRafael Auler                << " address 0x"
194a34c753fSRafael Auler                << Twine::utohexstr(SectionAddress + Reloc.Offset)
195a34c753fSRafael Auler                << " file offset 0x"
196a34c753fSRafael Auler                << Twine::utohexstr(SectionFileOffset + Reloc.Offset) << '\n';);
197a34c753fSRafael Auler   }
198a34c753fSRafael Auler 
199a34c753fSRafael Auler   clearList(PendingRelocations);
200a34c753fSRafael Auler }
201a34c753fSRafael Auler 
2025daf2001SMaksim Panchenko BinarySection::~BinarySection() { updateContents(nullptr, 0); }
203a34c753fSRafael Auler 
20440c2e0faSMaksim Panchenko void BinarySection::clearRelocations() { clearList(Relocations); }
205a34c753fSRafael Auler 
206a34c753fSRafael Auler void BinarySection::print(raw_ostream &OS) const {
207a34c753fSRafael Auler   OS << getName() << ", "
20840c2e0faSMaksim Panchenko      << "0x" << Twine::utohexstr(getAddress()) << ", " << getSize() << " (0x"
20940c2e0faSMaksim Panchenko      << Twine::utohexstr(getOutputAddress()) << ", " << getOutputSize() << ")"
21040c2e0faSMaksim Panchenko      << ", data = " << getData() << ", output data = " << getOutputData();
211a34c753fSRafael Auler 
212a34c753fSRafael Auler   if (isAllocatable())
213a34c753fSRafael Auler     OS << " (allocatable)";
214a34c753fSRafael Auler 
215a34c753fSRafael Auler   if (isVirtual())
216a34c753fSRafael Auler     OS << " (virtual)";
217a34c753fSRafael Auler 
218a34c753fSRafael Auler   if (isTLS())
219a34c753fSRafael Auler     OS << " (tls)";
220a34c753fSRafael Auler 
2213652483cSRafael Auler   if (opts::PrintRelocations)
222a34c753fSRafael Auler     for (const Relocation &R : relocations())
223a34c753fSRafael Auler       OS << "\n  " << R;
224a34c753fSRafael Auler }
225a34c753fSRafael Auler 
226a34c753fSRafael Auler BinarySection::RelocationSetType
227a34c753fSRafael Auler BinarySection::reorderRelocations(bool Inplace) const {
228a34c753fSRafael Auler   assert(PendingRelocations.empty() &&
2291a2f8336Sspaette          "reordering pending relocations not supported");
230a34c753fSRafael Auler   RelocationSetType NewRelocations;
231a34c753fSRafael Auler   for (const Relocation &Rel : relocations()) {
232a34c753fSRafael Auler     uint64_t RelAddr = Rel.Offset + getAddress();
233a34c753fSRafael Auler     BinaryData *BD = BC.getBinaryDataContainingAddress(RelAddr);
234a34c753fSRafael Auler     BD = BD->getAtomicRoot();
235a34c753fSRafael Auler     assert(BD);
236a34c753fSRafael Auler 
237a34c753fSRafael Auler     if ((!BD->isMoved() && !Inplace) || BD->isJumpTable())
238a34c753fSRafael Auler       continue;
239a34c753fSRafael Auler 
240a34c753fSRafael Auler     Relocation NewRel(Rel);
241a34c753fSRafael Auler     uint64_t RelOffset = RelAddr - BD->getAddress();
242a34c753fSRafael Auler     NewRel.Offset = BD->getOutputOffset() + RelOffset;
243a34c753fSRafael Auler     assert(NewRel.Offset < getSize());
244a34c753fSRafael Auler     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: moving " << Rel << " -> " << NewRel
245a34c753fSRafael Auler                       << "\n");
246b4bb6211SJob Noorman     NewRelocations.emplace(std::move(NewRel));
247a34c753fSRafael Auler   }
248a34c753fSRafael Auler   return NewRelocations;
249a34c753fSRafael Auler }
250a34c753fSRafael Auler 
251a34c753fSRafael Auler void BinarySection::reorderContents(const std::vector<BinaryData *> &Order,
252a34c753fSRafael Auler                                     bool Inplace) {
253a34c753fSRafael Auler   IsReordered = true;
254a34c753fSRafael Auler 
255a34c753fSRafael Auler   Relocations = reorderRelocations(Inplace);
256a34c753fSRafael Auler 
257a34c753fSRafael Auler   std::string Str;
258a34c753fSRafael Auler   raw_string_ostream OS(Str);
259a34c753fSRafael Auler   const char *Src = Contents.data();
260a34c753fSRafael Auler   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: reorderContents for " << Name << "\n");
261a34c753fSRafael Auler   for (BinaryData *BD : Order) {
262a34c753fSRafael Auler     assert((BD->isMoved() || !Inplace) && !BD->isJumpTable());
263a34c753fSRafael Auler     assert(BD->isAtomic() && BD->isMoveable());
264a34c753fSRafael Auler     const uint64_t SrcOffset = BD->getAddress() - getAddress();
265a34c753fSRafael Auler     assert(SrcOffset < Contents.size());
266a34c753fSRafael Auler     assert(SrcOffset == BD->getOffset());
2673652483cSRafael Auler     while (OS.tell() < BD->getOutputOffset())
268a34c753fSRafael Auler       OS.write((unsigned char)0);
269a34c753fSRafael Auler     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << BD->getName() << " @ " << OS.tell()
270a34c753fSRafael Auler                       << "\n");
271a34c753fSRafael Auler     OS.write(&Src[SrcOffset], BD->getOutputSize());
272a34c753fSRafael Auler   }
273a34c753fSRafael Auler   if (Relocations.empty()) {
274a34c753fSRafael Auler     // If there are no existing relocations, tack a phony one at the end
275a34c753fSRafael Auler     // of the reordered segment to force LLVM to recognize and map this
276a34c753fSRafael Auler     // section.
277a34c753fSRafael Auler     MCSymbol *ZeroSym = BC.registerNameAtAddress("Zero", 0, 0, 0);
278b0d1f87bSVladislav Khmelevsky     addRelocation(OS.tell(), ZeroSym, Relocation::getAbs64(), 0xdeadbeef);
279a34c753fSRafael Auler 
280a34c753fSRafael Auler     uint64_t Zero = 0;
281a34c753fSRafael Auler     OS.write(reinterpret_cast<const char *>(&Zero), sizeof(Zero));
282a34c753fSRafael Auler   }
283a34c753fSRafael Auler   auto *NewData = reinterpret_cast<char *>(copyByteArray(OS.str()));
284a34c753fSRafael Auler   Contents = OutputContents = StringRef(NewData, OS.str().size());
285a34c753fSRafael Auler   OutputSize = Contents.size();
286a34c753fSRafael Auler }
287a34c753fSRafael Auler 
288a34c753fSRafael Auler std::string BinarySection::encodeELFNote(StringRef NameStr, StringRef DescStr,
289a34c753fSRafael Auler                                          uint32_t Type) {
290a34c753fSRafael Auler   std::string Str;
291a34c753fSRafael Auler   raw_string_ostream OS(Str);
292a34c753fSRafael Auler   const uint32_t NameSz = NameStr.size() + 1;
293a34c753fSRafael Auler   const uint32_t DescSz = DescStr.size();
294a34c753fSRafael Auler   OS.write(reinterpret_cast<const char *>(&(NameSz)), 4);
295a34c753fSRafael Auler   OS.write(reinterpret_cast<const char *>(&(DescSz)), 4);
296a34c753fSRafael Auler   OS.write(reinterpret_cast<const char *>(&(Type)), 4);
297a34c753fSRafael Auler   OS << NameStr << '\0';
2983652483cSRafael Auler   for (uint64_t I = NameSz; I < alignTo(NameSz, 4); ++I)
299a34c753fSRafael Auler     OS << '\0';
300a34c753fSRafael Auler   OS << DescStr;
3013652483cSRafael Auler   for (uint64_t I = DescStr.size(); I < alignTo(DescStr.size(), 4); ++I)
302a34c753fSRafael Auler     OS << '\0';
303a34c753fSRafael Auler   return OS.str();
304a34c753fSRafael Auler }
305