1 //===- bolt/Core/BinarySection.cpp - Section in a binary file -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinarySection class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinarySection.h" 14 #include "bolt/Core/BinaryContext.h" 15 #include "bolt/Utils/Utils.h" 16 #include "llvm/MC/MCStreamer.h" 17 #include "llvm/Support/CommandLine.h" 18 19 #define DEBUG_TYPE "bolt" 20 21 using namespace llvm; 22 using namespace bolt; 23 24 namespace opts { 25 extern cl::opt<bool> PrintRelocations; 26 extern cl::opt<bool> HotData; 27 } // namespace opts 28 29 uint64_t BinarySection::Count = 0; 30 31 bool BinarySection::isELF() const { return BC.isELF(); } 32 33 bool BinarySection::isMachO() const { return BC.isMachO(); } 34 35 uint64_t 36 BinarySection::hash(const BinaryData &BD, 37 std::map<const BinaryData *, uint64_t> &Cache) const { 38 auto Itr = Cache.find(&BD); 39 if (Itr != Cache.end()) 40 return Itr->second; 41 42 hash_code Hash = 43 hash_combine(hash_value(BD.getSize()), hash_value(BD.getSectionName())); 44 45 Cache[&BD] = Hash; 46 47 if (!containsRange(BD.getAddress(), BD.getSize())) 48 return Hash; 49 50 uint64_t Offset = BD.getAddress() - getAddress(); 51 const uint64_t EndOffset = BD.getEndAddress() - getAddress(); 52 auto Begin = Relocations.lower_bound(Relocation{Offset, 0, 0, 0, 0}); 53 auto End = Relocations.upper_bound(Relocation{EndOffset, 0, 0, 0, 0}); 54 const StringRef Contents = getContents(); 55 56 while (Begin != End) { 57 const Relocation &Rel = *Begin++; 58 Hash = hash_combine( 59 Hash, hash_value(Contents.substr(Offset, Begin->Offset - Offset))); 60 if (BinaryData *RelBD = BC.getBinaryDataByName(Rel.Symbol->getName())) 61 Hash = hash_combine(Hash, hash(*RelBD, Cache)); 62 Offset = Rel.Offset + Rel.getSize(); 63 } 64 65 Hash = hash_combine(Hash, 66 hash_value(Contents.substr(Offset, EndOffset - Offset))); 67 68 Cache[&BD] = Hash; 69 70 return Hash; 71 } 72 73 void BinarySection::emitAsData(MCStreamer &Streamer, 74 const Twine &SectionName) const { 75 StringRef SectionContents = 76 isFinalized() ? getOutputContents() : getContents(); 77 MCSectionELF *ELFSection = 78 BC.Ctx->getELFSection(SectionName, getELFType(), getELFFlags()); 79 80 Streamer.switchSection(ELFSection); 81 Streamer.emitValueToAlignment(getAlign()); 82 83 if (BC.HasRelocations && opts::HotData && isReordered()) 84 Streamer.emitLabel(BC.Ctx->getOrCreateSymbol("__hot_data_start")); 85 86 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitting " 87 << (isAllocatable() ? "" : "non-") 88 << "allocatable data section " << SectionName << '\n'); 89 90 if (!hasRelocations()) { 91 Streamer.emitBytes(SectionContents); 92 } else { 93 uint64_t SectionOffset = 0; 94 for (auto RI = Relocations.begin(), RE = Relocations.end(); RI != RE;) { 95 auto RelocationOffset = RI->Offset; 96 assert(RelocationOffset < SectionContents.size() && "overflow detected"); 97 98 if (SectionOffset < RelocationOffset) { 99 Streamer.emitBytes(SectionContents.substr( 100 SectionOffset, RelocationOffset - SectionOffset)); 101 SectionOffset = RelocationOffset; 102 } 103 104 // Get iterators to all relocations with the same offset. Usually, there 105 // is only one such relocation but there can be more for composed 106 // relocations. 107 auto ROI = RI; 108 auto ROE = Relocations.upper_bound(RelocationOffset); 109 110 // Start from the next offset on the next iteration. 111 RI = ROE; 112 113 // Skip undefined symbols. 114 auto HasUndefSym = [this](const auto &Relocation) { 115 return BC.UndefinedSymbols.count(Relocation.Symbol); 116 }; 117 118 if (std::any_of(ROI, ROE, HasUndefSym)) 119 continue; 120 121 #ifndef NDEBUG 122 for (const auto &Relocation : make_range(ROI, ROE)) { 123 LLVM_DEBUG( 124 dbgs() << "BOLT-DEBUG: emitting relocation for symbol " 125 << (Relocation.Symbol ? Relocation.Symbol->getName() 126 : StringRef("<none>")) 127 << " at offset 0x" << Twine::utohexstr(Relocation.Offset) 128 << " with size " 129 << Relocation::getSizeForType(Relocation.Type) << '\n'); 130 } 131 #endif 132 133 size_t RelocationSize = Relocation::emit(ROI, ROE, &Streamer); 134 SectionOffset += RelocationSize; 135 } 136 assert(SectionOffset <= SectionContents.size() && "overflow error"); 137 if (SectionOffset < SectionContents.size()) 138 Streamer.emitBytes(SectionContents.substr(SectionOffset)); 139 } 140 141 if (BC.HasRelocations && opts::HotData && isReordered()) 142 Streamer.emitLabel(BC.Ctx->getOrCreateSymbol("__hot_data_end")); 143 } 144 145 uint64_t BinarySection::write(raw_ostream &OS) const { 146 const uint64_t NumValidContentBytes = 147 std::min<uint64_t>(getOutputContents().size(), getOutputSize()); 148 OS.write(getOutputContents().data(), NumValidContentBytes); 149 if (getOutputSize() > NumValidContentBytes) 150 OS.write_zeros(getOutputSize() - NumValidContentBytes); 151 return getOutputSize(); 152 } 153 154 void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS, 155 SymbolResolverFuncTy Resolver) { 156 if (PendingRelocations.empty() && Patches.empty()) 157 return; 158 159 const uint64_t SectionAddress = getAddress(); 160 161 // We apply relocations to original section contents. For allocatable sections 162 // this means using their input file offsets, since the output file offset 163 // could change (e.g. for new instance of .text). For non-allocatable 164 // sections, the output offset should always be a valid one. 165 const uint64_t SectionFileOffset = 166 isAllocatable() ? getInputFileOffset() : getOutputFileOffset(); 167 LLVM_DEBUG( 168 dbgs() << "BOLT-DEBUG: flushing pending relocations for section " 169 << getName() << '\n' 170 << " address: 0x" << Twine::utohexstr(SectionAddress) << '\n' 171 << " offset: 0x" << Twine::utohexstr(SectionFileOffset) << '\n'); 172 173 for (BinaryPatch &Patch : Patches) 174 OS.pwrite(Patch.Bytes.data(), Patch.Bytes.size(), 175 SectionFileOffset + Patch.Offset); 176 177 for (Relocation &Reloc : PendingRelocations) { 178 uint64_t Value = Reloc.Addend; 179 if (Reloc.Symbol) 180 Value += Resolver(Reloc.Symbol); 181 182 Value = Relocation::encodeValue(Reloc.Type, Value, 183 SectionAddress + Reloc.Offset); 184 185 OS.pwrite(reinterpret_cast<const char *>(&Value), 186 Relocation::getSizeForType(Reloc.Type), 187 SectionFileOffset + Reloc.Offset); 188 189 LLVM_DEBUG( 190 dbgs() << "BOLT-DEBUG: writing value 0x" << Twine::utohexstr(Value) 191 << " of size " << Relocation::getSizeForType(Reloc.Type) 192 << " at section offset 0x" << Twine::utohexstr(Reloc.Offset) 193 << " address 0x" 194 << Twine::utohexstr(SectionAddress + Reloc.Offset) 195 << " file offset 0x" 196 << Twine::utohexstr(SectionFileOffset + Reloc.Offset) << '\n';); 197 } 198 199 clearList(PendingRelocations); 200 } 201 202 BinarySection::~BinarySection() { updateContents(nullptr, 0); } 203 204 void BinarySection::clearRelocations() { clearList(Relocations); } 205 206 void BinarySection::print(raw_ostream &OS) const { 207 OS << getName() << ", " 208 << "0x" << Twine::utohexstr(getAddress()) << ", " << getSize() << " (0x" 209 << Twine::utohexstr(getOutputAddress()) << ", " << getOutputSize() << ")" 210 << ", data = " << getData() << ", output data = " << getOutputData(); 211 212 if (isAllocatable()) 213 OS << " (allocatable)"; 214 215 if (isVirtual()) 216 OS << " (virtual)"; 217 218 if (isTLS()) 219 OS << " (tls)"; 220 221 if (opts::PrintRelocations) 222 for (const Relocation &R : relocations()) 223 OS << "\n " << R; 224 } 225 226 BinarySection::RelocationSetType 227 BinarySection::reorderRelocations(bool Inplace) const { 228 assert(PendingRelocations.empty() && 229 "reordering pending relocations not supported"); 230 RelocationSetType NewRelocations; 231 for (const Relocation &Rel : relocations()) { 232 uint64_t RelAddr = Rel.Offset + getAddress(); 233 BinaryData *BD = BC.getBinaryDataContainingAddress(RelAddr); 234 BD = BD->getAtomicRoot(); 235 assert(BD); 236 237 if ((!BD->isMoved() && !Inplace) || BD->isJumpTable()) 238 continue; 239 240 Relocation NewRel(Rel); 241 uint64_t RelOffset = RelAddr - BD->getAddress(); 242 NewRel.Offset = BD->getOutputOffset() + RelOffset; 243 assert(NewRel.Offset < getSize()); 244 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: moving " << Rel << " -> " << NewRel 245 << "\n"); 246 NewRelocations.emplace(std::move(NewRel)); 247 } 248 return NewRelocations; 249 } 250 251 void BinarySection::reorderContents(const std::vector<BinaryData *> &Order, 252 bool Inplace) { 253 IsReordered = true; 254 255 Relocations = reorderRelocations(Inplace); 256 257 std::string Str; 258 raw_string_ostream OS(Str); 259 const char *Src = Contents.data(); 260 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: reorderContents for " << Name << "\n"); 261 for (BinaryData *BD : Order) { 262 assert((BD->isMoved() || !Inplace) && !BD->isJumpTable()); 263 assert(BD->isAtomic() && BD->isMoveable()); 264 const uint64_t SrcOffset = BD->getAddress() - getAddress(); 265 assert(SrcOffset < Contents.size()); 266 assert(SrcOffset == BD->getOffset()); 267 while (OS.tell() < BD->getOutputOffset()) 268 OS.write((unsigned char)0); 269 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << BD->getName() << " @ " << OS.tell() 270 << "\n"); 271 OS.write(&Src[SrcOffset], BD->getOutputSize()); 272 } 273 if (Relocations.empty()) { 274 // If there are no existing relocations, tack a phony one at the end 275 // of the reordered segment to force LLVM to recognize and map this 276 // section. 277 MCSymbol *ZeroSym = BC.registerNameAtAddress("Zero", 0, 0, 0); 278 addRelocation(OS.tell(), ZeroSym, Relocation::getAbs64(), 0xdeadbeef); 279 280 uint64_t Zero = 0; 281 OS.write(reinterpret_cast<const char *>(&Zero), sizeof(Zero)); 282 } 283 auto *NewData = reinterpret_cast<char *>(copyByteArray(OS.str())); 284 Contents = OutputContents = StringRef(NewData, OS.str().size()); 285 OutputSize = Contents.size(); 286 } 287 288 std::string BinarySection::encodeELFNote(StringRef NameStr, StringRef DescStr, 289 uint32_t Type) { 290 std::string Str; 291 raw_string_ostream OS(Str); 292 const uint32_t NameSz = NameStr.size() + 1; 293 const uint32_t DescSz = DescStr.size(); 294 OS.write(reinterpret_cast<const char *>(&(NameSz)), 4); 295 OS.write(reinterpret_cast<const char *>(&(DescSz)), 4); 296 OS.write(reinterpret_cast<const char *>(&(Type)), 4); 297 OS << NameStr << '\0'; 298 for (uint64_t I = NameSz; I < alignTo(NameSz, 4); ++I) 299 OS << '\0'; 300 OS << DescStr; 301 for (uint64_t I = DescStr.size(); I < alignTo(DescStr.size(), 4); ++I) 302 OS << '\0'; 303 return OS.str(); 304 } 305