12f09f445SMaksim Panchenko //===- bolt/Core/BinarySection.cpp - Section in a binary file -------------===// 2a34c753fSRafael Auler // 3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information. 5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a34c753fSRafael Auler // 7a34c753fSRafael Auler //===----------------------------------------------------------------------===// 8a34c753fSRafael Auler // 92f09f445SMaksim Panchenko // This file implements the BinarySection class. 102f09f445SMaksim Panchenko // 11a34c753fSRafael Auler //===----------------------------------------------------------------------===// 12a34c753fSRafael Auler 13a34c753fSRafael Auler #include "bolt/Core/BinarySection.h" 14a34c753fSRafael Auler #include "bolt/Core/BinaryContext.h" 15a34c753fSRafael Auler #include "bolt/Utils/Utils.h" 16a34c753fSRafael Auler #include "llvm/MC/MCStreamer.h" 17a34c753fSRafael Auler #include "llvm/Support/CommandLine.h" 18a34c753fSRafael Auler 19a34c753fSRafael Auler #define DEBUG_TYPE "bolt" 20a34c753fSRafael Auler 21a34c753fSRafael Auler using namespace llvm; 22a34c753fSRafael Auler using namespace bolt; 23a34c753fSRafael Auler 24a34c753fSRafael Auler namespace opts { 25a34c753fSRafael Auler extern cl::opt<bool> PrintRelocations; 26a34c753fSRafael Auler extern cl::opt<bool> HotData; 2740c2e0faSMaksim Panchenko } // namespace opts 28a34c753fSRafael Auler 295fca9c57SMaksim Panchenko uint64_t BinarySection::Count = 0; 305fca9c57SMaksim Panchenko 3140c2e0faSMaksim Panchenko bool BinarySection::isELF() const { return BC.isELF(); } 32a34c753fSRafael Auler 3340c2e0faSMaksim Panchenko bool BinarySection::isMachO() const { return BC.isMachO(); } 34a34c753fSRafael Auler 35a34c753fSRafael Auler uint64_t 36a34c753fSRafael Auler BinarySection::hash(const BinaryData &BD, 37a34c753fSRafael Auler std::map<const BinaryData *, uint64_t> &Cache) const { 38a34c753fSRafael Auler auto Itr = Cache.find(&BD); 39a34c753fSRafael Auler if (Itr != Cache.end()) 40a34c753fSRafael Auler return Itr->second; 41a34c753fSRafael Auler 4222a4aaf2SDenis Revunov hash_code Hash = 4322a4aaf2SDenis Revunov hash_combine(hash_value(BD.getSize()), hash_value(BD.getSectionName())); 4422a4aaf2SDenis Revunov 4522a4aaf2SDenis Revunov Cache[&BD] = Hash; 4622a4aaf2SDenis Revunov 4722a4aaf2SDenis Revunov if (!containsRange(BD.getAddress(), BD.getSize())) 4822a4aaf2SDenis Revunov return Hash; 49a34c753fSRafael Auler 50a34c753fSRafael Auler uint64_t Offset = BD.getAddress() - getAddress(); 51a34c753fSRafael Auler const uint64_t EndOffset = BD.getEndAddress() - getAddress(); 52a34c753fSRafael Auler auto Begin = Relocations.lower_bound(Relocation{Offset, 0, 0, 0, 0}); 53a34c753fSRafael Auler auto End = Relocations.upper_bound(Relocation{EndOffset, 0, 0, 0, 0}); 54a34c753fSRafael Auler const StringRef Contents = getContents(); 55a34c753fSRafael Auler 56a34c753fSRafael Auler while (Begin != End) { 57a34c753fSRafael Auler const Relocation &Rel = *Begin++; 58a34c753fSRafael Auler Hash = hash_combine( 5940c2e0faSMaksim Panchenko Hash, hash_value(Contents.substr(Offset, Begin->Offset - Offset))); 603652483cSRafael Auler if (BinaryData *RelBD = BC.getBinaryDataByName(Rel.Symbol->getName())) 61a34c753fSRafael Auler Hash = hash_combine(Hash, hash(*RelBD, Cache)); 62a34c753fSRafael Auler Offset = Rel.Offset + Rel.getSize(); 63a34c753fSRafael Auler } 64a34c753fSRafael Auler 6540c2e0faSMaksim Panchenko Hash = hash_combine(Hash, 66a34c753fSRafael Auler hash_value(Contents.substr(Offset, EndOffset - Offset))); 67a34c753fSRafael Auler 68a34c753fSRafael Auler Cache[&BD] = Hash; 69a34c753fSRafael Auler 70a34c753fSRafael Auler return Hash; 71a34c753fSRafael Auler } 72a34c753fSRafael Auler 734d3a0cadSMaksim Panchenko void BinarySection::emitAsData(MCStreamer &Streamer, 744d3a0cadSMaksim Panchenko const Twine &SectionName) const { 758075f0dbSMaksim Panchenko StringRef SectionContents = 768075f0dbSMaksim Panchenko isFinalized() ? getOutputContents() : getContents(); 77a34c753fSRafael Auler MCSectionELF *ELFSection = 78a34c753fSRafael Auler BC.Ctx->getELFSection(SectionName, getELFType(), getELFFlags()); 79a34c753fSRafael Auler 80adf4142fSFangrui Song Streamer.switchSection(ELFSection); 816c09ea3fSGuillaume Chatelet Streamer.emitValueToAlignment(getAlign()); 82a34c753fSRafael Auler 83a34c753fSRafael Auler if (BC.HasRelocations && opts::HotData && isReordered()) 84a34c753fSRafael Auler Streamer.emitLabel(BC.Ctx->getOrCreateSymbol("__hot_data_start")); 85a34c753fSRafael Auler 86a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitting " 87a34c753fSRafael Auler << (isAllocatable() ? "" : "non-") 88a34c753fSRafael Auler << "allocatable data section " << SectionName << '\n'); 89a34c753fSRafael Auler 90a34c753fSRafael Auler if (!hasRelocations()) { 91a34c753fSRafael Auler Streamer.emitBytes(SectionContents); 92a34c753fSRafael Auler } else { 93a34c753fSRafael Auler uint64_t SectionOffset = 0; 94b4bb6211SJob Noorman for (auto RI = Relocations.begin(), RE = Relocations.end(); RI != RE;) { 95b4bb6211SJob Noorman auto RelocationOffset = RI->Offset; 96b4bb6211SJob Noorman assert(RelocationOffset < SectionContents.size() && "overflow detected"); 97b4bb6211SJob Noorman 98b4bb6211SJob Noorman if (SectionOffset < RelocationOffset) { 9940c2e0faSMaksim Panchenko Streamer.emitBytes(SectionContents.substr( 100b4bb6211SJob Noorman SectionOffset, RelocationOffset - SectionOffset)); 101b4bb6211SJob Noorman SectionOffset = RelocationOffset; 102a34c753fSRafael Auler } 103b4bb6211SJob Noorman 104b4bb6211SJob Noorman // Get iterators to all relocations with the same offset. Usually, there 105b4bb6211SJob Noorman // is only one such relocation but there can be more for composed 106b4bb6211SJob Noorman // relocations. 107b4bb6211SJob Noorman auto ROI = RI; 108b4bb6211SJob Noorman auto ROE = Relocations.upper_bound(RelocationOffset); 109b4bb6211SJob Noorman 110b4bb6211SJob Noorman // Start from the next offset on the next iteration. 111b4bb6211SJob Noorman RI = ROE; 112b4bb6211SJob Noorman 113b4bb6211SJob Noorman // Skip undefined symbols. 114b4bb6211SJob Noorman auto HasUndefSym = [this](const auto &Relocation) { 115b4bb6211SJob Noorman return BC.UndefinedSymbols.count(Relocation.Symbol); 116b4bb6211SJob Noorman }; 117b4bb6211SJob Noorman 118b4bb6211SJob Noorman if (std::any_of(ROI, ROE, HasUndefSym)) 119b4bb6211SJob Noorman continue; 120b4bb6211SJob Noorman 121a132f5ebSKazu Hirata #ifndef NDEBUG 122b4bb6211SJob Noorman for (const auto &Relocation : make_range(ROI, ROE)) { 123b4bb6211SJob Noorman LLVM_DEBUG( 124b4bb6211SJob Noorman dbgs() << "BOLT-DEBUG: emitting relocation for symbol " 125a34c753fSRafael Auler << (Relocation.Symbol ? Relocation.Symbol->getName() 126a34c753fSRafael Auler : StringRef("<none>")) 127b4bb6211SJob Noorman << " at offset 0x" << Twine::utohexstr(Relocation.Offset) 128b4bb6211SJob Noorman << " with size " 129a34c753fSRafael Auler << Relocation::getSizeForType(Relocation.Type) << '\n'); 130b4bb6211SJob Noorman } 131a132f5ebSKazu Hirata #endif 132b4bb6211SJob Noorman 133b4bb6211SJob Noorman size_t RelocationSize = Relocation::emit(ROI, ROE, &Streamer); 134a34c753fSRafael Auler SectionOffset += RelocationSize; 135a34c753fSRafael Auler } 136a34c753fSRafael Auler assert(SectionOffset <= SectionContents.size() && "overflow error"); 1373652483cSRafael Auler if (SectionOffset < SectionContents.size()) 138a34c753fSRafael Auler Streamer.emitBytes(SectionContents.substr(SectionOffset)); 139a34c753fSRafael Auler } 140a34c753fSRafael Auler 141a34c753fSRafael Auler if (BC.HasRelocations && opts::HotData && isReordered()) 142a34c753fSRafael Auler Streamer.emitLabel(BC.Ctx->getOrCreateSymbol("__hot_data_end")); 143a34c753fSRafael Auler } 144a34c753fSRafael Auler 145*99655322SMaksim Panchenko uint64_t BinarySection::write(raw_ostream &OS) const { 146*99655322SMaksim Panchenko const uint64_t NumValidContentBytes = 147*99655322SMaksim Panchenko std::min<uint64_t>(getOutputContents().size(), getOutputSize()); 148*99655322SMaksim Panchenko OS.write(getOutputContents().data(), NumValidContentBytes); 149*99655322SMaksim Panchenko if (getOutputSize() > NumValidContentBytes) 150*99655322SMaksim Panchenko OS.write_zeros(getOutputSize() - NumValidContentBytes); 151*99655322SMaksim Panchenko return getOutputSize(); 152*99655322SMaksim Panchenko } 153*99655322SMaksim Panchenko 154a34c753fSRafael Auler void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS, 155a34c753fSRafael Auler SymbolResolverFuncTy Resolver) { 156a34c753fSRafael Auler if (PendingRelocations.empty() && Patches.empty()) 157a34c753fSRafael Auler return; 158a34c753fSRafael Auler 159a34c753fSRafael Auler const uint64_t SectionAddress = getAddress(); 160a34c753fSRafael Auler 161a34c753fSRafael Auler // We apply relocations to original section contents. For allocatable sections 162a34c753fSRafael Auler // this means using their input file offsets, since the output file offset 163a34c753fSRafael Auler // could change (e.g. for new instance of .text). For non-allocatable 164a34c753fSRafael Auler // sections, the output offset should always be a valid one. 16540c2e0faSMaksim Panchenko const uint64_t SectionFileOffset = 16640c2e0faSMaksim Panchenko isAllocatable() ? getInputFileOffset() : getOutputFileOffset(); 167a34c753fSRafael Auler LLVM_DEBUG( 168a34c753fSRafael Auler dbgs() << "BOLT-DEBUG: flushing pending relocations for section " 169a34c753fSRafael Auler << getName() << '\n' 170a34c753fSRafael Auler << " address: 0x" << Twine::utohexstr(SectionAddress) << '\n' 171a34c753fSRafael Auler << " offset: 0x" << Twine::utohexstr(SectionFileOffset) << '\n'); 172a34c753fSRafael Auler 1733652483cSRafael Auler for (BinaryPatch &Patch : Patches) 17440c2e0faSMaksim Panchenko OS.pwrite(Patch.Bytes.data(), Patch.Bytes.size(), 175a34c753fSRafael Auler SectionFileOffset + Patch.Offset); 176a34c753fSRafael Auler 177a34c753fSRafael Auler for (Relocation &Reloc : PendingRelocations) { 178a34c753fSRafael Auler uint64_t Value = Reloc.Addend; 179a34c753fSRafael Auler if (Reloc.Symbol) 180a34c753fSRafael Auler Value += Resolver(Reloc.Symbol); 1814a4045f7SElvina Yakubova 18277811752SRafael Auler Value = Relocation::encodeValue(Reloc.Type, Value, 1834a4045f7SElvina Yakubova SectionAddress + Reloc.Offset); 1844a4045f7SElvina Yakubova 185a34c753fSRafael Auler OS.pwrite(reinterpret_cast<const char *>(&Value), 186a34c753fSRafael Auler Relocation::getSizeForType(Reloc.Type), 187a34c753fSRafael Auler SectionFileOffset + Reloc.Offset); 1884a4045f7SElvina Yakubova 189a34c753fSRafael Auler LLVM_DEBUG( 190a34c753fSRafael Auler dbgs() << "BOLT-DEBUG: writing value 0x" << Twine::utohexstr(Value) 191a34c753fSRafael Auler << " of size " << Relocation::getSizeForType(Reloc.Type) 192a34c753fSRafael Auler << " at section offset 0x" << Twine::utohexstr(Reloc.Offset) 193a34c753fSRafael Auler << " address 0x" 194a34c753fSRafael Auler << Twine::utohexstr(SectionAddress + Reloc.Offset) 195a34c753fSRafael Auler << " file offset 0x" 196a34c753fSRafael Auler << Twine::utohexstr(SectionFileOffset + Reloc.Offset) << '\n';); 197a34c753fSRafael Auler } 198a34c753fSRafael Auler 199a34c753fSRafael Auler clearList(PendingRelocations); 200a34c753fSRafael Auler } 201a34c753fSRafael Auler 2025daf2001SMaksim Panchenko BinarySection::~BinarySection() { updateContents(nullptr, 0); } 203a34c753fSRafael Auler 20440c2e0faSMaksim Panchenko void BinarySection::clearRelocations() { clearList(Relocations); } 205a34c753fSRafael Auler 206a34c753fSRafael Auler void BinarySection::print(raw_ostream &OS) const { 207a34c753fSRafael Auler OS << getName() << ", " 20840c2e0faSMaksim Panchenko << "0x" << Twine::utohexstr(getAddress()) << ", " << getSize() << " (0x" 20940c2e0faSMaksim Panchenko << Twine::utohexstr(getOutputAddress()) << ", " << getOutputSize() << ")" 21040c2e0faSMaksim Panchenko << ", data = " << getData() << ", output data = " << getOutputData(); 211a34c753fSRafael Auler 212a34c753fSRafael Auler if (isAllocatable()) 213a34c753fSRafael Auler OS << " (allocatable)"; 214a34c753fSRafael Auler 215a34c753fSRafael Auler if (isVirtual()) 216a34c753fSRafael Auler OS << " (virtual)"; 217a34c753fSRafael Auler 218a34c753fSRafael Auler if (isTLS()) 219a34c753fSRafael Auler OS << " (tls)"; 220a34c753fSRafael Auler 2213652483cSRafael Auler if (opts::PrintRelocations) 222a34c753fSRafael Auler for (const Relocation &R : relocations()) 223a34c753fSRafael Auler OS << "\n " << R; 224a34c753fSRafael Auler } 225a34c753fSRafael Auler 226a34c753fSRafael Auler BinarySection::RelocationSetType 227a34c753fSRafael Auler BinarySection::reorderRelocations(bool Inplace) const { 228a34c753fSRafael Auler assert(PendingRelocations.empty() && 2291a2f8336Sspaette "reordering pending relocations not supported"); 230a34c753fSRafael Auler RelocationSetType NewRelocations; 231a34c753fSRafael Auler for (const Relocation &Rel : relocations()) { 232a34c753fSRafael Auler uint64_t RelAddr = Rel.Offset + getAddress(); 233a34c753fSRafael Auler BinaryData *BD = BC.getBinaryDataContainingAddress(RelAddr); 234a34c753fSRafael Auler BD = BD->getAtomicRoot(); 235a34c753fSRafael Auler assert(BD); 236a34c753fSRafael Auler 237a34c753fSRafael Auler if ((!BD->isMoved() && !Inplace) || BD->isJumpTable()) 238a34c753fSRafael Auler continue; 239a34c753fSRafael Auler 240a34c753fSRafael Auler Relocation NewRel(Rel); 241a34c753fSRafael Auler uint64_t RelOffset = RelAddr - BD->getAddress(); 242a34c753fSRafael Auler NewRel.Offset = BD->getOutputOffset() + RelOffset; 243a34c753fSRafael Auler assert(NewRel.Offset < getSize()); 244a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: moving " << Rel << " -> " << NewRel 245a34c753fSRafael Auler << "\n"); 246b4bb6211SJob Noorman NewRelocations.emplace(std::move(NewRel)); 247a34c753fSRafael Auler } 248a34c753fSRafael Auler return NewRelocations; 249a34c753fSRafael Auler } 250a34c753fSRafael Auler 251a34c753fSRafael Auler void BinarySection::reorderContents(const std::vector<BinaryData *> &Order, 252a34c753fSRafael Auler bool Inplace) { 253a34c753fSRafael Auler IsReordered = true; 254a34c753fSRafael Auler 255a34c753fSRafael Auler Relocations = reorderRelocations(Inplace); 256a34c753fSRafael Auler 257a34c753fSRafael Auler std::string Str; 258a34c753fSRafael Auler raw_string_ostream OS(Str); 259a34c753fSRafael Auler const char *Src = Contents.data(); 260a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: reorderContents for " << Name << "\n"); 261a34c753fSRafael Auler for (BinaryData *BD : Order) { 262a34c753fSRafael Auler assert((BD->isMoved() || !Inplace) && !BD->isJumpTable()); 263a34c753fSRafael Auler assert(BD->isAtomic() && BD->isMoveable()); 264a34c753fSRafael Auler const uint64_t SrcOffset = BD->getAddress() - getAddress(); 265a34c753fSRafael Auler assert(SrcOffset < Contents.size()); 266a34c753fSRafael Auler assert(SrcOffset == BD->getOffset()); 2673652483cSRafael Auler while (OS.tell() < BD->getOutputOffset()) 268a34c753fSRafael Auler OS.write((unsigned char)0); 269a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << BD->getName() << " @ " << OS.tell() 270a34c753fSRafael Auler << "\n"); 271a34c753fSRafael Auler OS.write(&Src[SrcOffset], BD->getOutputSize()); 272a34c753fSRafael Auler } 273a34c753fSRafael Auler if (Relocations.empty()) { 274a34c753fSRafael Auler // If there are no existing relocations, tack a phony one at the end 275a34c753fSRafael Auler // of the reordered segment to force LLVM to recognize and map this 276a34c753fSRafael Auler // section. 277a34c753fSRafael Auler MCSymbol *ZeroSym = BC.registerNameAtAddress("Zero", 0, 0, 0); 278b0d1f87bSVladislav Khmelevsky addRelocation(OS.tell(), ZeroSym, Relocation::getAbs64(), 0xdeadbeef); 279a34c753fSRafael Auler 280a34c753fSRafael Auler uint64_t Zero = 0; 281a34c753fSRafael Auler OS.write(reinterpret_cast<const char *>(&Zero), sizeof(Zero)); 282a34c753fSRafael Auler } 283a34c753fSRafael Auler auto *NewData = reinterpret_cast<char *>(copyByteArray(OS.str())); 284a34c753fSRafael Auler Contents = OutputContents = StringRef(NewData, OS.str().size()); 285a34c753fSRafael Auler OutputSize = Contents.size(); 286a34c753fSRafael Auler } 287a34c753fSRafael Auler 288a34c753fSRafael Auler std::string BinarySection::encodeELFNote(StringRef NameStr, StringRef DescStr, 289a34c753fSRafael Auler uint32_t Type) { 290a34c753fSRafael Auler std::string Str; 291a34c753fSRafael Auler raw_string_ostream OS(Str); 292a34c753fSRafael Auler const uint32_t NameSz = NameStr.size() + 1; 293a34c753fSRafael Auler const uint32_t DescSz = DescStr.size(); 294a34c753fSRafael Auler OS.write(reinterpret_cast<const char *>(&(NameSz)), 4); 295a34c753fSRafael Auler OS.write(reinterpret_cast<const char *>(&(DescSz)), 4); 296a34c753fSRafael Auler OS.write(reinterpret_cast<const char *>(&(Type)), 4); 297a34c753fSRafael Auler OS << NameStr << '\0'; 2983652483cSRafael Auler for (uint64_t I = NameSz; I < alignTo(NameSz, 4); ++I) 299a34c753fSRafael Auler OS << '\0'; 300a34c753fSRafael Auler OS << DescStr; 3013652483cSRafael Auler for (uint64_t I = DescStr.size(); I < alignTo(DescStr.size(), 4); ++I) 302a34c753fSRafael Auler OS << '\0'; 303a34c753fSRafael Auler return OS.str(); 304a34c753fSRafael Auler } 305