1 //===- bolt/Passes/LongJmp.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the LongJmpPass class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/LongJmp.h" 14 #include "bolt/Core/ParallelUtilities.h" 15 #include "llvm/Support/MathExtras.h" 16 17 #define DEBUG_TYPE "longjmp" 18 19 using namespace llvm; 20 21 namespace opts { 22 extern cl::OptionCategory BoltCategory; 23 extern cl::OptionCategory BoltOptCategory; 24 extern llvm::cl::opt<unsigned> AlignText; 25 extern cl::opt<unsigned> AlignFunctions; 26 extern cl::opt<bool> UseOldText; 27 extern cl::opt<bool> HotFunctionsAtEnd; 28 29 static cl::opt<bool> 30 CompactCodeModel("compact-code-model", 31 cl::desc("generate code for binaries <128MB on AArch64"), 32 cl::init(false), cl::cat(BoltCategory)); 33 34 static cl::opt<bool> GroupStubs("group-stubs", 35 cl::desc("share stubs across functions"), 36 cl::init(true), cl::cat(BoltOptCategory)); 37 } 38 39 namespace llvm { 40 namespace bolt { 41 42 constexpr unsigned ColdFragAlign = 16; 43 44 static void relaxStubToShortJmp(BinaryBasicBlock &StubBB, const MCSymbol *Tgt) { 45 const BinaryContext &BC = StubBB.getFunction()->getBinaryContext(); 46 InstructionListType Seq; 47 BC.MIB->createShortJmp(Seq, Tgt, BC.Ctx.get()); 48 StubBB.clear(); 49 StubBB.addInstructions(Seq.begin(), Seq.end()); 50 } 51 52 static void relaxStubToLongJmp(BinaryBasicBlock &StubBB, const MCSymbol *Tgt) { 53 const BinaryContext &BC = StubBB.getFunction()->getBinaryContext(); 54 InstructionListType Seq; 55 BC.MIB->createLongJmp(Seq, Tgt, BC.Ctx.get()); 56 StubBB.clear(); 57 StubBB.addInstructions(Seq.begin(), Seq.end()); 58 } 59 60 static BinaryBasicBlock *getBBAtHotColdSplitPoint(BinaryFunction &Func) { 61 if (!Func.isSplit() || Func.empty()) 62 return nullptr; 63 64 assert(!(*Func.begin()).isCold() && "Entry cannot be cold"); 65 for (auto I = Func.getLayout().block_begin(), 66 E = Func.getLayout().block_end(); 67 I != E; ++I) { 68 auto Next = std::next(I); 69 if (Next != E && (*Next)->isCold()) 70 return *I; 71 } 72 llvm_unreachable("No hot-cold split point found"); 73 } 74 75 static bool mayNeedStub(const BinaryContext &BC, const MCInst &Inst) { 76 return (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst)) && 77 !BC.MIB->isIndirectBranch(Inst) && !BC.MIB->isIndirectCall(Inst); 78 } 79 80 std::pair<std::unique_ptr<BinaryBasicBlock>, MCSymbol *> 81 LongJmpPass::createNewStub(BinaryBasicBlock &SourceBB, const MCSymbol *TgtSym, 82 bool TgtIsFunc, uint64_t AtAddress) { 83 BinaryFunction &Func = *SourceBB.getFunction(); 84 const BinaryContext &BC = Func.getBinaryContext(); 85 const bool IsCold = SourceBB.isCold(); 86 MCSymbol *StubSym = BC.Ctx->createNamedTempSymbol("Stub"); 87 std::unique_ptr<BinaryBasicBlock> StubBB = Func.createBasicBlock(StubSym); 88 MCInst Inst; 89 BC.MIB->createUncondBranch(Inst, TgtSym, BC.Ctx.get()); 90 if (TgtIsFunc) 91 BC.MIB->convertJmpToTailCall(Inst); 92 StubBB->addInstruction(Inst); 93 StubBB->setExecutionCount(0); 94 95 // Register this in stubs maps 96 auto registerInMap = [&](StubGroupsTy &Map) { 97 StubGroupTy &StubGroup = Map[TgtSym]; 98 StubGroup.insert( 99 llvm::lower_bound( 100 StubGroup, std::make_pair(AtAddress, nullptr), 101 [&](const std::pair<uint64_t, BinaryBasicBlock *> &LHS, 102 const std::pair<uint64_t, BinaryBasicBlock *> &RHS) { 103 return LHS.first < RHS.first; 104 }), 105 std::make_pair(AtAddress, StubBB.get())); 106 }; 107 108 Stubs[&Func].insert(StubBB.get()); 109 StubBits[StubBB.get()] = BC.MIB->getUncondBranchEncodingSize(); 110 if (IsCold) { 111 registerInMap(ColdLocalStubs[&Func]); 112 if (opts::GroupStubs && TgtIsFunc) 113 registerInMap(ColdStubGroups); 114 ++NumColdStubs; 115 } else { 116 registerInMap(HotLocalStubs[&Func]); 117 if (opts::GroupStubs && TgtIsFunc) 118 registerInMap(HotStubGroups); 119 ++NumHotStubs; 120 } 121 122 return std::make_pair(std::move(StubBB), StubSym); 123 } 124 125 BinaryBasicBlock *LongJmpPass::lookupStubFromGroup( 126 const StubGroupsTy &StubGroups, const BinaryFunction &Func, 127 const MCInst &Inst, const MCSymbol *TgtSym, uint64_t DotAddress) const { 128 const BinaryContext &BC = Func.getBinaryContext(); 129 auto CandidatesIter = StubGroups.find(TgtSym); 130 if (CandidatesIter == StubGroups.end()) 131 return nullptr; 132 const StubGroupTy &Candidates = CandidatesIter->second; 133 if (Candidates.empty()) 134 return nullptr; 135 auto Cand = llvm::lower_bound( 136 Candidates, std::make_pair(DotAddress, nullptr), 137 [&](const std::pair<uint64_t, BinaryBasicBlock *> &LHS, 138 const std::pair<uint64_t, BinaryBasicBlock *> &RHS) { 139 return LHS.first < RHS.first; 140 }); 141 if (Cand == Candidates.end()) { 142 Cand = std::prev(Cand); 143 } else if (Cand != Candidates.begin()) { 144 const StubTy *LeftCand = std::prev(Cand); 145 if (Cand->first - DotAddress > DotAddress - LeftCand->first) 146 Cand = LeftCand; 147 } 148 int BitsAvail = BC.MIB->getPCRelEncodingSize(Inst) - 1; 149 assert(BitsAvail < 63 && "PCRelEncodingSize is too large to use int64_t to" 150 "check for out-of-bounds."); 151 int64_t MaxVal = (1ULL << BitsAvail) - 1; 152 int64_t MinVal = -(1ULL << BitsAvail); 153 uint64_t PCRelTgtAddress = Cand->first; 154 int64_t PCOffset = (int64_t)(PCRelTgtAddress - DotAddress); 155 156 LLVM_DEBUG({ 157 if (Candidates.size() > 1) 158 dbgs() << "Considering stub group with " << Candidates.size() 159 << " candidates. DotAddress is " << Twine::utohexstr(DotAddress) 160 << ", chosen candidate address is " 161 << Twine::utohexstr(Cand->first) << "\n"; 162 }); 163 return (PCOffset < MinVal || PCOffset > MaxVal) ? nullptr : Cand->second; 164 } 165 166 BinaryBasicBlock * 167 LongJmpPass::lookupGlobalStub(const BinaryBasicBlock &SourceBB, 168 const MCInst &Inst, const MCSymbol *TgtSym, 169 uint64_t DotAddress) const { 170 const BinaryFunction &Func = *SourceBB.getFunction(); 171 const StubGroupsTy &StubGroups = 172 SourceBB.isCold() ? ColdStubGroups : HotStubGroups; 173 return lookupStubFromGroup(StubGroups, Func, Inst, TgtSym, DotAddress); 174 } 175 176 BinaryBasicBlock *LongJmpPass::lookupLocalStub(const BinaryBasicBlock &SourceBB, 177 const MCInst &Inst, 178 const MCSymbol *TgtSym, 179 uint64_t DotAddress) const { 180 const BinaryFunction &Func = *SourceBB.getFunction(); 181 const DenseMap<const BinaryFunction *, StubGroupsTy> &StubGroups = 182 SourceBB.isCold() ? ColdLocalStubs : HotLocalStubs; 183 const auto Iter = StubGroups.find(&Func); 184 if (Iter == StubGroups.end()) 185 return nullptr; 186 return lookupStubFromGroup(Iter->second, Func, Inst, TgtSym, DotAddress); 187 } 188 189 std::unique_ptr<BinaryBasicBlock> 190 LongJmpPass::replaceTargetWithStub(BinaryBasicBlock &BB, MCInst &Inst, 191 uint64_t DotAddress, 192 uint64_t StubCreationAddress) { 193 const BinaryFunction &Func = *BB.getFunction(); 194 const BinaryContext &BC = Func.getBinaryContext(); 195 std::unique_ptr<BinaryBasicBlock> NewBB; 196 const MCSymbol *TgtSym = BC.MIB->getTargetSymbol(Inst); 197 assert(TgtSym && "getTargetSymbol failed"); 198 199 BinaryBasicBlock::BinaryBranchInfo BI{0, 0}; 200 BinaryBasicBlock *TgtBB = BB.getSuccessor(TgtSym, BI); 201 auto LocalStubsIter = Stubs.find(&Func); 202 203 // If already using stub and the stub is from another function, create a local 204 // stub, since the foreign stub is now out of range 205 if (!TgtBB) { 206 auto SSIter = SharedStubs.find(TgtSym); 207 if (SSIter != SharedStubs.end()) { 208 TgtSym = BC.MIB->getTargetSymbol(*SSIter->second->begin()); 209 --NumSharedStubs; 210 } 211 } else if (LocalStubsIter != Stubs.end() && 212 LocalStubsIter->second.count(TgtBB)) { 213 // The TgtBB and TgtSym now are the local out-of-range stub and its label. 214 // So, we are attempting to restore BB to its previous state without using 215 // this stub. 216 TgtSym = BC.MIB->getTargetSymbol(*TgtBB->begin()); 217 assert(TgtSym && 218 "First instruction is expected to contain a target symbol."); 219 BinaryBasicBlock *TgtBBSucc = TgtBB->getSuccessor(TgtSym, BI); 220 221 // TgtBB might have no successor. e.g. a stub for a function call. 222 if (TgtBBSucc) { 223 BB.replaceSuccessor(TgtBB, TgtBBSucc, BI.Count, BI.MispredictedCount); 224 assert(TgtBB->getExecutionCount() >= BI.Count && 225 "At least equal or greater than the branch count."); 226 TgtBB->setExecutionCount(TgtBB->getExecutionCount() - BI.Count); 227 } 228 229 TgtBB = TgtBBSucc; 230 } 231 232 BinaryBasicBlock *StubBB = lookupLocalStub(BB, Inst, TgtSym, DotAddress); 233 // If not found, look it up in globally shared stub maps if it is a function 234 // call (TgtBB is not set) 235 if (!StubBB && !TgtBB) { 236 StubBB = lookupGlobalStub(BB, Inst, TgtSym, DotAddress); 237 if (StubBB) { 238 SharedStubs[StubBB->getLabel()] = StubBB; 239 ++NumSharedStubs; 240 } 241 } 242 MCSymbol *StubSymbol = StubBB ? StubBB->getLabel() : nullptr; 243 244 if (!StubBB) { 245 std::tie(NewBB, StubSymbol) = 246 createNewStub(BB, TgtSym, /*is func?*/ !TgtBB, StubCreationAddress); 247 StubBB = NewBB.get(); 248 } 249 250 // Local branch 251 if (TgtBB) { 252 uint64_t OrigCount = BI.Count; 253 uint64_t OrigMispreds = BI.MispredictedCount; 254 BB.replaceSuccessor(TgtBB, StubBB, OrigCount, OrigMispreds); 255 StubBB->setExecutionCount(StubBB->getExecutionCount() + OrigCount); 256 if (NewBB) { 257 StubBB->addSuccessor(TgtBB, OrigCount, OrigMispreds); 258 StubBB->setIsCold(BB.isCold()); 259 } 260 // Call / tail call 261 } else { 262 StubBB->setExecutionCount(StubBB->getExecutionCount() + 263 BB.getExecutionCount()); 264 if (NewBB) { 265 assert(TgtBB == nullptr); 266 StubBB->setIsCold(BB.isCold()); 267 // Set as entry point because this block is valid but we have no preds 268 StubBB->getFunction()->addEntryPoint(*StubBB); 269 } 270 } 271 BC.MIB->replaceBranchTarget(Inst, StubSymbol, BC.Ctx.get()); 272 273 return NewBB; 274 } 275 276 void LongJmpPass::updateStubGroups() { 277 auto update = [&](StubGroupsTy &StubGroups) { 278 for (auto &KeyVal : StubGroups) { 279 for (StubTy &Elem : KeyVal.second) 280 Elem.first = BBAddresses[Elem.second]; 281 llvm::sort(KeyVal.second, llvm::less_first()); 282 } 283 }; 284 285 for (auto &KeyVal : HotLocalStubs) 286 update(KeyVal.second); 287 for (auto &KeyVal : ColdLocalStubs) 288 update(KeyVal.second); 289 update(HotStubGroups); 290 update(ColdStubGroups); 291 } 292 293 void LongJmpPass::tentativeBBLayout(const BinaryFunction &Func) { 294 const BinaryContext &BC = Func.getBinaryContext(); 295 uint64_t HotDot = HotAddresses[&Func]; 296 uint64_t ColdDot = ColdAddresses[&Func]; 297 bool Cold = false; 298 for (const BinaryBasicBlock *BB : Func.getLayout().blocks()) { 299 if (Cold || BB->isCold()) { 300 Cold = true; 301 BBAddresses[BB] = ColdDot; 302 ColdDot += BC.computeCodeSize(BB->begin(), BB->end()); 303 } else { 304 BBAddresses[BB] = HotDot; 305 HotDot += BC.computeCodeSize(BB->begin(), BB->end()); 306 } 307 } 308 } 309 310 uint64_t LongJmpPass::tentativeLayoutRelocColdPart( 311 const BinaryContext &BC, std::vector<BinaryFunction *> &SortedFunctions, 312 uint64_t DotAddress) { 313 DotAddress = alignTo(DotAddress, llvm::Align(opts::AlignFunctions)); 314 for (BinaryFunction *Func : SortedFunctions) { 315 if (!Func->isSplit()) 316 continue; 317 DotAddress = alignTo(DotAddress, Func->getMinAlignment()); 318 uint64_t Pad = 319 offsetToAlignment(DotAddress, llvm::Align(Func->getAlignment())); 320 if (Pad <= Func->getMaxColdAlignmentBytes()) 321 DotAddress += Pad; 322 ColdAddresses[Func] = DotAddress; 323 LLVM_DEBUG(dbgs() << Func->getPrintName() << " cold tentative: " 324 << Twine::utohexstr(DotAddress) << "\n"); 325 DotAddress += Func->estimateColdSize(); 326 DotAddress = alignTo(DotAddress, Func->getConstantIslandAlignment()); 327 DotAddress += Func->estimateConstantIslandSize(); 328 } 329 return DotAddress; 330 } 331 332 uint64_t LongJmpPass::tentativeLayoutRelocMode( 333 const BinaryContext &BC, std::vector<BinaryFunction *> &SortedFunctions, 334 uint64_t DotAddress) { 335 // Compute hot cold frontier 336 int64_t LastHotIndex = -1u; 337 uint32_t CurrentIndex = 0; 338 if (opts::HotFunctionsAtEnd) { 339 for (BinaryFunction *BF : SortedFunctions) { 340 if (BF->hasValidIndex()) { 341 LastHotIndex = CurrentIndex; 342 break; 343 } 344 345 ++CurrentIndex; 346 } 347 } else { 348 for (BinaryFunction *BF : SortedFunctions) { 349 if (!BF->hasValidIndex()) { 350 LastHotIndex = CurrentIndex; 351 break; 352 } 353 354 ++CurrentIndex; 355 } 356 } 357 358 // Hot 359 CurrentIndex = 0; 360 bool ColdLayoutDone = false; 361 auto runColdLayout = [&]() { 362 DotAddress = tentativeLayoutRelocColdPart(BC, SortedFunctions, DotAddress); 363 ColdLayoutDone = true; 364 if (opts::HotFunctionsAtEnd) 365 DotAddress = alignTo(DotAddress, opts::AlignText); 366 }; 367 for (BinaryFunction *Func : SortedFunctions) { 368 if (!BC.shouldEmit(*Func)) { 369 HotAddresses[Func] = Func->getAddress(); 370 continue; 371 } 372 373 if (!ColdLayoutDone && CurrentIndex >= LastHotIndex) 374 runColdLayout(); 375 376 DotAddress = alignTo(DotAddress, Func->getMinAlignment()); 377 uint64_t Pad = 378 offsetToAlignment(DotAddress, llvm::Align(Func->getAlignment())); 379 if (Pad <= Func->getMaxAlignmentBytes()) 380 DotAddress += Pad; 381 HotAddresses[Func] = DotAddress; 382 LLVM_DEBUG(dbgs() << Func->getPrintName() << " tentative: " 383 << Twine::utohexstr(DotAddress) << "\n"); 384 if (!Func->isSplit()) 385 DotAddress += Func->estimateSize(); 386 else 387 DotAddress += Func->estimateHotSize(); 388 389 DotAddress = alignTo(DotAddress, Func->getConstantIslandAlignment()); 390 DotAddress += Func->estimateConstantIslandSize(); 391 ++CurrentIndex; 392 } 393 394 // Ensure that tentative code layout always runs for cold blocks. 395 if (!ColdLayoutDone) 396 runColdLayout(); 397 398 // BBs 399 for (BinaryFunction *Func : SortedFunctions) 400 tentativeBBLayout(*Func); 401 402 return DotAddress; 403 } 404 405 void LongJmpPass::tentativeLayout( 406 const BinaryContext &BC, std::vector<BinaryFunction *> &SortedFunctions) { 407 uint64_t DotAddress = BC.LayoutStartAddress; 408 409 if (!BC.HasRelocations) { 410 for (BinaryFunction *Func : SortedFunctions) { 411 HotAddresses[Func] = Func->getAddress(); 412 DotAddress = alignTo(DotAddress, ColdFragAlign); 413 ColdAddresses[Func] = DotAddress; 414 if (Func->isSplit()) 415 DotAddress += Func->estimateColdSize(); 416 tentativeBBLayout(*Func); 417 } 418 419 return; 420 } 421 422 // Relocation mode 423 uint64_t EstimatedTextSize = 0; 424 if (opts::UseOldText) { 425 EstimatedTextSize = tentativeLayoutRelocMode(BC, SortedFunctions, 0); 426 427 // Initial padding 428 if (EstimatedTextSize <= BC.OldTextSectionSize) { 429 DotAddress = BC.OldTextSectionAddress; 430 uint64_t Pad = 431 offsetToAlignment(DotAddress, llvm::Align(opts::AlignText)); 432 if (Pad + EstimatedTextSize <= BC.OldTextSectionSize) { 433 DotAddress += Pad; 434 } 435 } 436 } 437 438 if (!EstimatedTextSize || EstimatedTextSize > BC.OldTextSectionSize) 439 DotAddress = alignTo(BC.LayoutStartAddress, opts::AlignText); 440 441 tentativeLayoutRelocMode(BC, SortedFunctions, DotAddress); 442 } 443 444 bool LongJmpPass::usesStub(const BinaryFunction &Func, 445 const MCInst &Inst) const { 446 const MCSymbol *TgtSym = Func.getBinaryContext().MIB->getTargetSymbol(Inst); 447 const BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(TgtSym); 448 auto Iter = Stubs.find(&Func); 449 if (Iter != Stubs.end()) 450 return Iter->second.count(TgtBB); 451 return false; 452 } 453 454 uint64_t LongJmpPass::getSymbolAddress(const BinaryContext &BC, 455 const MCSymbol *Target, 456 const BinaryBasicBlock *TgtBB) const { 457 if (TgtBB) { 458 auto Iter = BBAddresses.find(TgtBB); 459 assert(Iter != BBAddresses.end() && "Unrecognized BB"); 460 return Iter->second; 461 } 462 uint64_t EntryID = 0; 463 const BinaryFunction *TargetFunc = BC.getFunctionForSymbol(Target, &EntryID); 464 auto Iter = HotAddresses.find(TargetFunc); 465 if (Iter == HotAddresses.end() || (TargetFunc && EntryID)) { 466 // Look at BinaryContext's resolution for this symbol - this is a symbol not 467 // mapped to a BinaryFunction 468 ErrorOr<uint64_t> ValueOrError = BC.getSymbolValue(*Target); 469 assert(ValueOrError && "Unrecognized symbol"); 470 return *ValueOrError; 471 } 472 return Iter->second; 473 } 474 475 Error LongJmpPass::relaxStub(BinaryBasicBlock &StubBB, bool &Modified) { 476 const BinaryFunction &Func = *StubBB.getFunction(); 477 const BinaryContext &BC = Func.getBinaryContext(); 478 const int Bits = StubBits[&StubBB]; 479 // Already working with the largest range? 480 if (Bits == static_cast<int>(BC.AsmInfo->getCodePointerSize() * 8)) 481 return Error::success(); 482 483 const static int RangeShortJmp = BC.MIB->getShortJmpEncodingSize(); 484 const static int RangeSingleInstr = BC.MIB->getUncondBranchEncodingSize(); 485 const static uint64_t ShortJmpMask = ~((1ULL << RangeShortJmp) - 1); 486 const static uint64_t SingleInstrMask = 487 ~((1ULL << (RangeSingleInstr - 1)) - 1); 488 489 const MCSymbol *RealTargetSym = BC.MIB->getTargetSymbol(*StubBB.begin()); 490 const BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(RealTargetSym); 491 uint64_t TgtAddress = getSymbolAddress(BC, RealTargetSym, TgtBB); 492 uint64_t DotAddress = BBAddresses[&StubBB]; 493 uint64_t PCRelTgtAddress = DotAddress > TgtAddress ? DotAddress - TgtAddress 494 : TgtAddress - DotAddress; 495 // If it fits in one instruction, do not relax 496 if (!(PCRelTgtAddress & SingleInstrMask)) 497 return Error::success(); 498 499 // Fits short jmp 500 if (!(PCRelTgtAddress & ShortJmpMask)) { 501 if (Bits >= RangeShortJmp) 502 return Error::success(); 503 504 LLVM_DEBUG(dbgs() << "Relaxing stub to short jump. PCRelTgtAddress = " 505 << Twine::utohexstr(PCRelTgtAddress) 506 << " RealTargetSym = " << RealTargetSym->getName() 507 << "\n"); 508 relaxStubToShortJmp(StubBB, RealTargetSym); 509 StubBits[&StubBB] = RangeShortJmp; 510 Modified = true; 511 return Error::success(); 512 } 513 514 // The long jmp uses absolute address on AArch64 515 // So we could not use it for PIC binaries 516 if (BC.isAArch64() && !BC.HasFixedLoadAddress) 517 return createFatalBOLTError( 518 "BOLT-ERROR: Unable to relax stub for PIC binary\n"); 519 520 LLVM_DEBUG(dbgs() << "Relaxing stub to long jump. PCRelTgtAddress = " 521 << Twine::utohexstr(PCRelTgtAddress) 522 << " RealTargetSym = " << RealTargetSym->getName() << "\n"); 523 relaxStubToLongJmp(StubBB, RealTargetSym); 524 StubBits[&StubBB] = static_cast<int>(BC.AsmInfo->getCodePointerSize() * 8); 525 Modified = true; 526 return Error::success(); 527 } 528 529 bool LongJmpPass::needsStub(const BinaryBasicBlock &BB, const MCInst &Inst, 530 uint64_t DotAddress) const { 531 const BinaryFunction &Func = *BB.getFunction(); 532 const BinaryContext &BC = Func.getBinaryContext(); 533 const MCSymbol *TgtSym = BC.MIB->getTargetSymbol(Inst); 534 assert(TgtSym && "getTargetSymbol failed"); 535 536 const BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(TgtSym); 537 // Check for shared stubs from foreign functions 538 if (!TgtBB) { 539 auto SSIter = SharedStubs.find(TgtSym); 540 if (SSIter != SharedStubs.end()) 541 TgtBB = SSIter->second; 542 } 543 544 int BitsAvail = BC.MIB->getPCRelEncodingSize(Inst) - 1; 545 assert(BitsAvail < 63 && "PCRelEncodingSize is too large to use int64_t to" 546 "check for out-of-bounds."); 547 int64_t MaxVal = (1ULL << BitsAvail) - 1; 548 int64_t MinVal = -(1ULL << BitsAvail); 549 550 uint64_t PCRelTgtAddress = getSymbolAddress(BC, TgtSym, TgtBB); 551 int64_t PCOffset = (int64_t)(PCRelTgtAddress - DotAddress); 552 553 return PCOffset < MinVal || PCOffset > MaxVal; 554 } 555 556 Error LongJmpPass::relax(BinaryFunction &Func, bool &Modified) { 557 const BinaryContext &BC = Func.getBinaryContext(); 558 559 assert(BC.isAArch64() && "Unsupported arch"); 560 constexpr int InsnSize = 4; // AArch64 561 std::vector<std::pair<BinaryBasicBlock *, std::unique_ptr<BinaryBasicBlock>>> 562 Insertions; 563 564 BinaryBasicBlock *Frontier = getBBAtHotColdSplitPoint(Func); 565 uint64_t FrontierAddress = Frontier ? BBAddresses[Frontier] : 0; 566 if (FrontierAddress) 567 FrontierAddress += Frontier->getNumNonPseudos() * InsnSize; 568 569 // Add necessary stubs for branch targets we know we can't fit in the 570 // instruction 571 for (BinaryBasicBlock &BB : Func) { 572 uint64_t DotAddress = BBAddresses[&BB]; 573 // Stubs themselves are relaxed on the next loop 574 if (Stubs[&Func].count(&BB)) 575 continue; 576 577 for (MCInst &Inst : BB) { 578 if (BC.MIB->isPseudo(Inst)) 579 continue; 580 581 if (!mayNeedStub(BC, Inst)) { 582 DotAddress += InsnSize; 583 continue; 584 } 585 586 // Check and relax direct branch or call 587 if (!needsStub(BB, Inst, DotAddress)) { 588 DotAddress += InsnSize; 589 continue; 590 } 591 Modified = true; 592 593 // Insert stubs close to the patched BB if call, but far away from the 594 // hot path if a branch, since this branch target is the cold region 595 // (but first check that the far away stub will be in range). 596 BinaryBasicBlock *InsertionPoint = &BB; 597 if (Func.isSimple() && !BC.MIB->isCall(Inst) && FrontierAddress && 598 !BB.isCold()) { 599 int BitsAvail = BC.MIB->getPCRelEncodingSize(Inst) - 1; 600 uint64_t Mask = ~((1ULL << BitsAvail) - 1); 601 assert(FrontierAddress > DotAddress && 602 "Hot code should be before the frontier"); 603 uint64_t PCRelTgt = FrontierAddress - DotAddress; 604 if (!(PCRelTgt & Mask)) 605 InsertionPoint = Frontier; 606 } 607 // Always put stubs at the end of the function if non-simple. We can't 608 // change the layout of non-simple functions because it has jump tables 609 // that we do not control. 610 if (!Func.isSimple()) 611 InsertionPoint = &*std::prev(Func.end()); 612 613 // Create a stub to handle a far-away target 614 Insertions.emplace_back(InsertionPoint, 615 replaceTargetWithStub(BB, Inst, DotAddress, 616 InsertionPoint == Frontier 617 ? FrontierAddress 618 : DotAddress)); 619 620 DotAddress += InsnSize; 621 } 622 } 623 624 // Relax stubs if necessary 625 for (BinaryBasicBlock &BB : Func) { 626 if (!Stubs[&Func].count(&BB) || !BB.isValid()) 627 continue; 628 629 if (auto E = relaxStub(BB, Modified)) 630 return Error(std::move(E)); 631 } 632 633 for (std::pair<BinaryBasicBlock *, std::unique_ptr<BinaryBasicBlock>> &Elmt : 634 Insertions) { 635 if (!Elmt.second) 636 continue; 637 std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs; 638 NewBBs.emplace_back(std::move(Elmt.second)); 639 Func.insertBasicBlocks(Elmt.first, std::move(NewBBs), true); 640 } 641 642 return Error::success(); 643 } 644 645 void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) { 646 BinaryContext &BC = BF.getBinaryContext(); 647 auto &MIB = BC.MIB; 648 649 // Quick path. 650 if (!BF.isSplit() && BF.estimateSize() < ShortestJumpSpan) 651 return; 652 653 auto isBranchOffsetInRange = [&](const MCInst &Inst, int64_t Offset) { 654 const unsigned Bits = MIB->getPCRelEncodingSize(Inst); 655 return isIntN(Bits, Offset); 656 }; 657 658 auto isBlockInRange = [&](const MCInst &Inst, uint64_t InstAddress, 659 const BinaryBasicBlock &BB) { 660 const int64_t Offset = BB.getOutputStartAddress() - InstAddress; 661 return isBranchOffsetInRange(Inst, Offset); 662 }; 663 664 // Keep track of *all* function trampolines that are going to be added to the 665 // function layout at the end of relaxation. 666 std::vector<std::pair<BinaryBasicBlock *, std::unique_ptr<BinaryBasicBlock>>> 667 FunctionTrampolines; 668 669 // Function fragments are relaxed independently. 670 for (FunctionFragment &FF : BF.getLayout().fragments()) { 671 // Fill out code size estimation for the fragment. Use output BB address 672 // ranges to store offsets from the start of the function fragment. 673 uint64_t CodeSize = 0; 674 for (BinaryBasicBlock *BB : FF) { 675 BB->setOutputStartAddress(CodeSize); 676 CodeSize += BB->estimateSize(); 677 BB->setOutputEndAddress(CodeSize); 678 } 679 680 // Dynamically-updated size of the fragment. 681 uint64_t FragmentSize = CodeSize; 682 683 // Size of the trampoline in bytes. 684 constexpr uint64_t TrampolineSize = 4; 685 686 // Trampolines created for the fragment. DestinationBB -> TrampolineBB. 687 // NB: here we store only the first trampoline created for DestinationBB. 688 DenseMap<const BinaryBasicBlock *, BinaryBasicBlock *> FragmentTrampolines; 689 690 // Create a trampoline code after \p BB or at the end of the fragment if BB 691 // is nullptr. If \p UpdateOffsets is true, update FragmentSize and offsets 692 // for basic blocks affected by the insertion of the trampoline. 693 auto addTrampolineAfter = [&](BinaryBasicBlock *BB, 694 BinaryBasicBlock *TargetBB, uint64_t Count, 695 bool UpdateOffsets = true) { 696 FunctionTrampolines.emplace_back(BB ? BB : FF.back(), 697 BF.createBasicBlock()); 698 BinaryBasicBlock *TrampolineBB = FunctionTrampolines.back().second.get(); 699 700 MCInst Inst; 701 { 702 auto L = BC.scopeLock(); 703 MIB->createUncondBranch(Inst, TargetBB->getLabel(), BC.Ctx.get()); 704 } 705 TrampolineBB->addInstruction(Inst); 706 TrampolineBB->addSuccessor(TargetBB, Count); 707 TrampolineBB->setExecutionCount(Count); 708 const uint64_t TrampolineAddress = 709 BB ? BB->getOutputEndAddress() : FragmentSize; 710 TrampolineBB->setOutputStartAddress(TrampolineAddress); 711 TrampolineBB->setOutputEndAddress(TrampolineAddress + TrampolineSize); 712 TrampolineBB->setFragmentNum(FF.getFragmentNum()); 713 714 if (!FragmentTrampolines.lookup(TargetBB)) 715 FragmentTrampolines[TargetBB] = TrampolineBB; 716 717 if (!UpdateOffsets) 718 return TrampolineBB; 719 720 FragmentSize += TrampolineSize; 721 722 // If the trampoline was added at the end of the fragment, offsets of 723 // other fragments should stay intact. 724 if (!BB) 725 return TrampolineBB; 726 727 // Update offsets for blocks after BB. 728 for (BinaryBasicBlock *IBB : FF) { 729 if (IBB->getOutputStartAddress() >= TrampolineAddress) { 730 IBB->setOutputStartAddress(IBB->getOutputStartAddress() + 731 TrampolineSize); 732 IBB->setOutputEndAddress(IBB->getOutputEndAddress() + TrampolineSize); 733 } 734 } 735 736 // Update offsets for trampolines in this fragment that are placed after 737 // the new trampoline. Note that trampoline blocks are not part of the 738 // function/fragment layout until we add them right before the return 739 // from relaxLocalBranches(). 740 for (auto &Pair : FunctionTrampolines) { 741 BinaryBasicBlock *IBB = Pair.second.get(); 742 if (IBB->getFragmentNum() != TrampolineBB->getFragmentNum()) 743 continue; 744 if (IBB == TrampolineBB) 745 continue; 746 if (IBB->getOutputStartAddress() >= TrampolineAddress) { 747 IBB->setOutputStartAddress(IBB->getOutputStartAddress() + 748 TrampolineSize); 749 IBB->setOutputEndAddress(IBB->getOutputEndAddress() + TrampolineSize); 750 } 751 } 752 753 return TrampolineBB; 754 }; 755 756 // Pre-populate trampolines by splitting unconditional branches from the 757 // containing basic block. 758 for (BinaryBasicBlock *BB : FF) { 759 MCInst *Inst = BB->getLastNonPseudoInstr(); 760 if (!Inst || !MIB->isUnconditionalBranch(*Inst)) 761 continue; 762 763 const MCSymbol *TargetSymbol = MIB->getTargetSymbol(*Inst); 764 BB->eraseInstruction(BB->findInstruction(Inst)); 765 BB->setOutputEndAddress(BB->getOutputEndAddress() - TrampolineSize); 766 767 BinaryBasicBlock::BinaryBranchInfo BI; 768 BinaryBasicBlock *TargetBB = BB->getSuccessor(TargetSymbol, BI); 769 770 BinaryBasicBlock *TrampolineBB = 771 addTrampolineAfter(BB, TargetBB, BI.Count, /*UpdateOffsets*/ false); 772 BB->replaceSuccessor(TargetBB, TrampolineBB, BI.Count); 773 } 774 775 /// Relax the branch \p Inst in basic block \p BB that targets \p TargetBB. 776 /// \p InstAddress contains offset of the branch from the start of the 777 /// containing function fragment. 778 auto relaxBranch = [&](BinaryBasicBlock *BB, MCInst &Inst, 779 uint64_t InstAddress, BinaryBasicBlock *TargetBB) { 780 BinaryFunction *BF = BB->getParent(); 781 782 // Use branch taken count for optimal relaxation. 783 const uint64_t Count = BB->getBranchInfo(*TargetBB).Count; 784 assert(Count != BinaryBasicBlock::COUNT_NO_PROFILE && 785 "Expected valid branch execution count"); 786 787 // Try to reuse an existing trampoline without introducing any new code. 788 BinaryBasicBlock *TrampolineBB = FragmentTrampolines.lookup(TargetBB); 789 if (TrampolineBB && isBlockInRange(Inst, InstAddress, *TrampolineBB)) { 790 BB->replaceSuccessor(TargetBB, TrampolineBB, Count); 791 TrampolineBB->setExecutionCount(TrampolineBB->getExecutionCount() + 792 Count); 793 auto L = BC.scopeLock(); 794 MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get()); 795 return; 796 } 797 798 // For cold branches, check if we can introduce a trampoline at the end 799 // of the fragment that is within the branch reach. Note that such 800 // trampoline may change address later and become unreachable in which 801 // case we will need further relaxation. 802 const int64_t OffsetToEnd = FragmentSize - InstAddress; 803 if (Count == 0 && isBranchOffsetInRange(Inst, OffsetToEnd)) { 804 TrampolineBB = addTrampolineAfter(nullptr, TargetBB, Count); 805 BB->replaceSuccessor(TargetBB, TrampolineBB, Count); 806 auto L = BC.scopeLock(); 807 MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get()); 808 809 return; 810 } 811 812 // Insert a new block after the current one and use it as a trampoline. 813 TrampolineBB = addTrampolineAfter(BB, TargetBB, Count); 814 815 // If the other successor is a fall-through, invert the condition code. 816 const BinaryBasicBlock *const NextBB = 817 BF->getLayout().getBasicBlockAfter(BB, /*IgnoreSplits*/ false); 818 if (BB->getConditionalSuccessor(false) == NextBB) { 819 BB->swapConditionalSuccessors(); 820 auto L = BC.scopeLock(); 821 MIB->reverseBranchCondition(Inst, NextBB->getLabel(), BC.Ctx.get()); 822 } else { 823 auto L = BC.scopeLock(); 824 MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get()); 825 } 826 BB->replaceSuccessor(TargetBB, TrampolineBB, Count); 827 }; 828 829 bool MayNeedRelaxation; 830 uint64_t NumIterations = 0; 831 do { 832 MayNeedRelaxation = false; 833 ++NumIterations; 834 for (auto BBI = FF.begin(); BBI != FF.end(); ++BBI) { 835 BinaryBasicBlock *BB = *BBI; 836 uint64_t NextInstOffset = BB->getOutputStartAddress(); 837 for (MCInst &Inst : *BB) { 838 const size_t InstAddress = NextInstOffset; 839 if (!MIB->isPseudo(Inst)) 840 NextInstOffset += 4; 841 842 if (!mayNeedStub(BF.getBinaryContext(), Inst)) 843 continue; 844 845 const size_t BitsAvailable = MIB->getPCRelEncodingSize(Inst); 846 847 // Span of +/-128MB. 848 if (BitsAvailable == LongestJumpBits) 849 continue; 850 851 const MCSymbol *TargetSymbol = MIB->getTargetSymbol(Inst); 852 BinaryBasicBlock *TargetBB = BB->getSuccessor(TargetSymbol); 853 assert(TargetBB && 854 "Basic block target expected for conditional branch."); 855 856 // Check if the relaxation is needed. 857 if (TargetBB->getFragmentNum() == FF.getFragmentNum() && 858 isBlockInRange(Inst, InstAddress, *TargetBB)) 859 continue; 860 861 relaxBranch(BB, Inst, InstAddress, TargetBB); 862 863 MayNeedRelaxation = true; 864 } 865 } 866 867 // We may have added new instructions, but the whole fragment is less than 868 // the minimum branch span. 869 if (FragmentSize < ShortestJumpSpan) 870 MayNeedRelaxation = false; 871 872 } while (MayNeedRelaxation); 873 874 LLVM_DEBUG({ 875 if (NumIterations > 2) { 876 dbgs() << "BOLT-DEBUG: relaxed fragment " << FF.getFragmentNum().get() 877 << " of " << BF << " in " << NumIterations << " iterations\n"; 878 } 879 }); 880 (void)NumIterations; 881 } 882 883 // Add trampoline blocks from all fragments to the layout. 884 DenseMap<BinaryBasicBlock *, std::vector<std::unique_ptr<BinaryBasicBlock>>> 885 Insertions; 886 for (std::pair<BinaryBasicBlock *, std::unique_ptr<BinaryBasicBlock>> &Pair : 887 FunctionTrampolines) { 888 if (!Pair.second) 889 continue; 890 Insertions[Pair.first].emplace_back(std::move(Pair.second)); 891 } 892 893 for (auto &Pair : Insertions) { 894 BF.insertBasicBlocks(Pair.first, std::move(Pair.second), 895 /*UpdateLayout*/ true, /*UpdateCFI*/ true, 896 /*RecomputeLPs*/ false); 897 } 898 } 899 900 Error LongJmpPass::runOnFunctions(BinaryContext &BC) { 901 902 if (opts::CompactCodeModel) { 903 BC.outs() 904 << "BOLT-INFO: relaxing branches for compact code model (<128MB)\n"; 905 906 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { 907 relaxLocalBranches(BF); 908 }; 909 910 ParallelUtilities::PredicateTy SkipPredicate = 911 [&](const BinaryFunction &BF) { 912 return !BC.shouldEmit(BF) || !BF.isSimple(); 913 }; 914 915 ParallelUtilities::runOnEachFunction( 916 BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, 917 SkipPredicate, "RelaxLocalBranches"); 918 919 return Error::success(); 920 } 921 922 BC.outs() << "BOLT-INFO: Starting stub-insertion pass\n"; 923 std::vector<BinaryFunction *> Sorted = BC.getSortedFunctions(); 924 bool Modified; 925 uint32_t Iterations = 0; 926 do { 927 ++Iterations; 928 Modified = false; 929 tentativeLayout(BC, Sorted); 930 updateStubGroups(); 931 for (BinaryFunction *Func : Sorted) { 932 if (auto E = relax(*Func, Modified)) 933 return Error(std::move(E)); 934 // Don't ruin non-simple functions, they can't afford to have the layout 935 // changed. 936 if (Modified && Func->isSimple()) 937 Func->fixBranches(); 938 } 939 } while (Modified); 940 BC.outs() << "BOLT-INFO: Inserted " << NumHotStubs 941 << " stubs in the hot area and " << NumColdStubs 942 << " stubs in the cold area. Shared " << NumSharedStubs 943 << " times, iterated " << Iterations << " times.\n"; 944 return Error::success(); 945 } 946 } // namespace bolt 947 } // namespace llvm 948