//===- bolt/Core/BinarySection.cpp - Section in a binary file -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the BinarySection class. // //===----------------------------------------------------------------------===// #include "bolt/Core/BinarySection.h" #include "bolt/Core/BinaryContext.h" #include "bolt/Utils/Utils.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/CommandLine.h" #define DEBUG_TYPE "bolt" using namespace llvm; using namespace bolt; namespace opts { extern cl::opt PrintRelocations; extern cl::opt HotData; } // namespace opts uint64_t BinarySection::Count = 0; bool BinarySection::isELF() const { return BC.isELF(); } bool BinarySection::isMachO() const { return BC.isMachO(); } uint64_t BinarySection::hash(const BinaryData &BD, std::map &Cache) const { auto Itr = Cache.find(&BD); if (Itr != Cache.end()) return Itr->second; hash_code Hash = hash_combine(hash_value(BD.getSize()), hash_value(BD.getSectionName())); Cache[&BD] = Hash; if (!containsRange(BD.getAddress(), BD.getSize())) return Hash; uint64_t Offset = BD.getAddress() - getAddress(); const uint64_t EndOffset = BD.getEndAddress() - getAddress(); auto Begin = Relocations.lower_bound(Relocation{Offset, 0, 0, 0, 0}); auto End = Relocations.upper_bound(Relocation{EndOffset, 0, 0, 0, 0}); const StringRef Contents = getContents(); while (Begin != End) { const Relocation &Rel = *Begin++; Hash = hash_combine( Hash, hash_value(Contents.substr(Offset, Begin->Offset - Offset))); if (BinaryData *RelBD = BC.getBinaryDataByName(Rel.Symbol->getName())) Hash = hash_combine(Hash, hash(*RelBD, Cache)); Offset = Rel.Offset + Rel.getSize(); } Hash = hash_combine(Hash, hash_value(Contents.substr(Offset, EndOffset - Offset))); Cache[&BD] = Hash; return Hash; } void BinarySection::emitAsData(MCStreamer &Streamer, const Twine &SectionName) const { StringRef SectionContents = isFinalized() ? getOutputContents() : getContents(); MCSectionELF *ELFSection = BC.Ctx->getELFSection(SectionName, getELFType(), getELFFlags()); Streamer.switchSection(ELFSection); Streamer.emitValueToAlignment(getAlign()); if (BC.HasRelocations && opts::HotData && isReordered()) Streamer.emitLabel(BC.Ctx->getOrCreateSymbol("__hot_data_start")); LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitting " << (isAllocatable() ? "" : "non-") << "allocatable data section " << SectionName << '\n'); if (!hasRelocations()) { Streamer.emitBytes(SectionContents); } else { uint64_t SectionOffset = 0; for (auto RI = Relocations.begin(), RE = Relocations.end(); RI != RE;) { auto RelocationOffset = RI->Offset; assert(RelocationOffset < SectionContents.size() && "overflow detected"); if (SectionOffset < RelocationOffset) { Streamer.emitBytes(SectionContents.substr( SectionOffset, RelocationOffset - SectionOffset)); SectionOffset = RelocationOffset; } // Get iterators to all relocations with the same offset. Usually, there // is only one such relocation but there can be more for composed // relocations. auto ROI = RI; auto ROE = Relocations.upper_bound(RelocationOffset); // Start from the next offset on the next iteration. RI = ROE; // Skip undefined symbols. auto HasUndefSym = [this](const auto &Relocation) { return BC.UndefinedSymbols.count(Relocation.Symbol); }; if (std::any_of(ROI, ROE, HasUndefSym)) continue; #ifndef NDEBUG for (const auto &Relocation : make_range(ROI, ROE)) { LLVM_DEBUG( dbgs() << "BOLT-DEBUG: emitting relocation for symbol " << (Relocation.Symbol ? Relocation.Symbol->getName() : StringRef("")) << " at offset 0x" << Twine::utohexstr(Relocation.Offset) << " with size " << Relocation::getSizeForType(Relocation.Type) << '\n'); } #endif size_t RelocationSize = Relocation::emit(ROI, ROE, &Streamer); SectionOffset += RelocationSize; } assert(SectionOffset <= SectionContents.size() && "overflow error"); if (SectionOffset < SectionContents.size()) Streamer.emitBytes(SectionContents.substr(SectionOffset)); } if (BC.HasRelocations && opts::HotData && isReordered()) Streamer.emitLabel(BC.Ctx->getOrCreateSymbol("__hot_data_end")); } uint64_t BinarySection::write(raw_ostream &OS) const { const uint64_t NumValidContentBytes = std::min(getOutputContents().size(), getOutputSize()); OS.write(getOutputContents().data(), NumValidContentBytes); if (getOutputSize() > NumValidContentBytes) OS.write_zeros(getOutputSize() - NumValidContentBytes); return getOutputSize(); } void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS, SymbolResolverFuncTy Resolver) { if (PendingRelocations.empty() && Patches.empty()) return; const uint64_t SectionAddress = getAddress(); // We apply relocations to original section contents. For allocatable sections // this means using their input file offsets, since the output file offset // could change (e.g. for new instance of .text). For non-allocatable // sections, the output offset should always be a valid one. const uint64_t SectionFileOffset = isAllocatable() ? getInputFileOffset() : getOutputFileOffset(); LLVM_DEBUG( dbgs() << "BOLT-DEBUG: flushing pending relocations for section " << getName() << '\n' << " address: 0x" << Twine::utohexstr(SectionAddress) << '\n' << " offset: 0x" << Twine::utohexstr(SectionFileOffset) << '\n'); for (BinaryPatch &Patch : Patches) OS.pwrite(Patch.Bytes.data(), Patch.Bytes.size(), SectionFileOffset + Patch.Offset); for (Relocation &Reloc : PendingRelocations) { uint64_t Value = Reloc.Addend; if (Reloc.Symbol) Value += Resolver(Reloc.Symbol); Value = Relocation::encodeValue(Reloc.Type, Value, SectionAddress + Reloc.Offset); OS.pwrite(reinterpret_cast(&Value), Relocation::getSizeForType(Reloc.Type), SectionFileOffset + Reloc.Offset); LLVM_DEBUG( dbgs() << "BOLT-DEBUG: writing value 0x" << Twine::utohexstr(Value) << " of size " << Relocation::getSizeForType(Reloc.Type) << " at section offset 0x" << Twine::utohexstr(Reloc.Offset) << " address 0x" << Twine::utohexstr(SectionAddress + Reloc.Offset) << " file offset 0x" << Twine::utohexstr(SectionFileOffset + Reloc.Offset) << '\n';); } clearList(PendingRelocations); } BinarySection::~BinarySection() { updateContents(nullptr, 0); } void BinarySection::clearRelocations() { clearList(Relocations); } void BinarySection::print(raw_ostream &OS) const { OS << getName() << ", " << "0x" << Twine::utohexstr(getAddress()) << ", " << getSize() << " (0x" << Twine::utohexstr(getOutputAddress()) << ", " << getOutputSize() << ")" << ", data = " << getData() << ", output data = " << getOutputData(); if (isAllocatable()) OS << " (allocatable)"; if (isVirtual()) OS << " (virtual)"; if (isTLS()) OS << " (tls)"; if (opts::PrintRelocations) for (const Relocation &R : relocations()) OS << "\n " << R; } BinarySection::RelocationSetType BinarySection::reorderRelocations(bool Inplace) const { assert(PendingRelocations.empty() && "reordering pending relocations not supported"); RelocationSetType NewRelocations; for (const Relocation &Rel : relocations()) { uint64_t RelAddr = Rel.Offset + getAddress(); BinaryData *BD = BC.getBinaryDataContainingAddress(RelAddr); BD = BD->getAtomicRoot(); assert(BD); if ((!BD->isMoved() && !Inplace) || BD->isJumpTable()) continue; Relocation NewRel(Rel); uint64_t RelOffset = RelAddr - BD->getAddress(); NewRel.Offset = BD->getOutputOffset() + RelOffset; assert(NewRel.Offset < getSize()); LLVM_DEBUG(dbgs() << "BOLT-DEBUG: moving " << Rel << " -> " << NewRel << "\n"); NewRelocations.emplace(std::move(NewRel)); } return NewRelocations; } void BinarySection::reorderContents(const std::vector &Order, bool Inplace) { IsReordered = true; Relocations = reorderRelocations(Inplace); std::string Str; raw_string_ostream OS(Str); const char *Src = Contents.data(); LLVM_DEBUG(dbgs() << "BOLT-DEBUG: reorderContents for " << Name << "\n"); for (BinaryData *BD : Order) { assert((BD->isMoved() || !Inplace) && !BD->isJumpTable()); assert(BD->isAtomic() && BD->isMoveable()); const uint64_t SrcOffset = BD->getAddress() - getAddress(); assert(SrcOffset < Contents.size()); assert(SrcOffset == BD->getOffset()); while (OS.tell() < BD->getOutputOffset()) OS.write((unsigned char)0); LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << BD->getName() << " @ " << OS.tell() << "\n"); OS.write(&Src[SrcOffset], BD->getOutputSize()); } if (Relocations.empty()) { // If there are no existing relocations, tack a phony one at the end // of the reordered segment to force LLVM to recognize and map this // section. MCSymbol *ZeroSym = BC.registerNameAtAddress("Zero", 0, 0, 0); addRelocation(OS.tell(), ZeroSym, Relocation::getAbs64(), 0xdeadbeef); uint64_t Zero = 0; OS.write(reinterpret_cast(&Zero), sizeof(Zero)); } auto *NewData = reinterpret_cast(copyByteArray(OS.str())); Contents = OutputContents = StringRef(NewData, OS.str().size()); OutputSize = Contents.size(); } std::string BinarySection::encodeELFNote(StringRef NameStr, StringRef DescStr, uint32_t Type) { std::string Str; raw_string_ostream OS(Str); const uint32_t NameSz = NameStr.size() + 1; const uint32_t DescSz = DescStr.size(); OS.write(reinterpret_cast(&(NameSz)), 4); OS.write(reinterpret_cast(&(DescSz)), 4); OS.write(reinterpret_cast(&(Type)), 4); OS << NameStr << '\0'; for (uint64_t I = NameSz; I < alignTo(NameSz, 4); ++I) OS << '\0'; OS << DescStr; for (uint64_t I = DescStr.size(); I < alignTo(DescStr.size(), 4); ++I) OS << '\0'; return OS.str(); }