xref: /llvm-project/bolt/lib/Passes/LongJmp.cpp (revision 671095b452365826b1ccb65483d6ae890a2a81f7)
1 //===- bolt/Passes/LongJmp.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the LongJmpPass class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/LongJmp.h"
14 #include "bolt/Core/ParallelUtilities.h"
15 #include "llvm/Support/MathExtras.h"
16 
17 #define DEBUG_TYPE "longjmp"
18 
19 using namespace llvm;
20 
21 namespace opts {
22 extern cl::OptionCategory BoltCategory;
23 extern cl::OptionCategory BoltOptCategory;
24 extern llvm::cl::opt<unsigned> AlignText;
25 extern cl::opt<unsigned> AlignFunctions;
26 extern cl::opt<bool> UseOldText;
27 extern cl::opt<bool> HotFunctionsAtEnd;
28 
29 static cl::opt<bool>
30     CompactCodeModel("compact-code-model",
31                      cl::desc("generate code for binaries <128MB on AArch64"),
32                      cl::init(false), cl::cat(BoltCategory));
33 
34 static cl::opt<bool> GroupStubs("group-stubs",
35                                 cl::desc("share stubs across functions"),
36                                 cl::init(true), cl::cat(BoltOptCategory));
37 }
38 
39 namespace llvm {
40 namespace bolt {
41 
42 constexpr unsigned ColdFragAlign = 16;
43 
44 static void relaxStubToShortJmp(BinaryBasicBlock &StubBB, const MCSymbol *Tgt) {
45   const BinaryContext &BC = StubBB.getFunction()->getBinaryContext();
46   InstructionListType Seq;
47   BC.MIB->createShortJmp(Seq, Tgt, BC.Ctx.get());
48   StubBB.clear();
49   StubBB.addInstructions(Seq.begin(), Seq.end());
50 }
51 
52 static void relaxStubToLongJmp(BinaryBasicBlock &StubBB, const MCSymbol *Tgt) {
53   const BinaryContext &BC = StubBB.getFunction()->getBinaryContext();
54   InstructionListType Seq;
55   BC.MIB->createLongJmp(Seq, Tgt, BC.Ctx.get());
56   StubBB.clear();
57   StubBB.addInstructions(Seq.begin(), Seq.end());
58 }
59 
60 static BinaryBasicBlock *getBBAtHotColdSplitPoint(BinaryFunction &Func) {
61   if (!Func.isSplit() || Func.empty())
62     return nullptr;
63 
64   assert(!(*Func.begin()).isCold() && "Entry cannot be cold");
65   for (auto I = Func.getLayout().block_begin(),
66             E = Func.getLayout().block_end();
67        I != E; ++I) {
68     auto Next = std::next(I);
69     if (Next != E && (*Next)->isCold())
70       return *I;
71   }
72   llvm_unreachable("No hot-cold split point found");
73 }
74 
75 static bool mayNeedStub(const BinaryContext &BC, const MCInst &Inst) {
76   return (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst)) &&
77          !BC.MIB->isIndirectBranch(Inst) && !BC.MIB->isIndirectCall(Inst);
78 }
79 
80 std::pair<std::unique_ptr<BinaryBasicBlock>, MCSymbol *>
81 LongJmpPass::createNewStub(BinaryBasicBlock &SourceBB, const MCSymbol *TgtSym,
82                            bool TgtIsFunc, uint64_t AtAddress) {
83   BinaryFunction &Func = *SourceBB.getFunction();
84   const BinaryContext &BC = Func.getBinaryContext();
85   const bool IsCold = SourceBB.isCold();
86   MCSymbol *StubSym = BC.Ctx->createNamedTempSymbol("Stub");
87   std::unique_ptr<BinaryBasicBlock> StubBB = Func.createBasicBlock(StubSym);
88   MCInst Inst;
89   BC.MIB->createUncondBranch(Inst, TgtSym, BC.Ctx.get());
90   if (TgtIsFunc)
91     BC.MIB->convertJmpToTailCall(Inst);
92   StubBB->addInstruction(Inst);
93   StubBB->setExecutionCount(0);
94 
95   // Register this in stubs maps
96   auto registerInMap = [&](StubGroupsTy &Map) {
97     StubGroupTy &StubGroup = Map[TgtSym];
98     StubGroup.insert(
99         llvm::lower_bound(
100             StubGroup, std::make_pair(AtAddress, nullptr),
101             [&](const std::pair<uint64_t, BinaryBasicBlock *> &LHS,
102                 const std::pair<uint64_t, BinaryBasicBlock *> &RHS) {
103               return LHS.first < RHS.first;
104             }),
105         std::make_pair(AtAddress, StubBB.get()));
106   };
107 
108   Stubs[&Func].insert(StubBB.get());
109   StubBits[StubBB.get()] = BC.MIB->getUncondBranchEncodingSize();
110   if (IsCold) {
111     registerInMap(ColdLocalStubs[&Func]);
112     if (opts::GroupStubs && TgtIsFunc)
113       registerInMap(ColdStubGroups);
114     ++NumColdStubs;
115   } else {
116     registerInMap(HotLocalStubs[&Func]);
117     if (opts::GroupStubs && TgtIsFunc)
118       registerInMap(HotStubGroups);
119     ++NumHotStubs;
120   }
121 
122   return std::make_pair(std::move(StubBB), StubSym);
123 }
124 
125 BinaryBasicBlock *LongJmpPass::lookupStubFromGroup(
126     const StubGroupsTy &StubGroups, const BinaryFunction &Func,
127     const MCInst &Inst, const MCSymbol *TgtSym, uint64_t DotAddress) const {
128   const BinaryContext &BC = Func.getBinaryContext();
129   auto CandidatesIter = StubGroups.find(TgtSym);
130   if (CandidatesIter == StubGroups.end())
131     return nullptr;
132   const StubGroupTy &Candidates = CandidatesIter->second;
133   if (Candidates.empty())
134     return nullptr;
135   auto Cand = llvm::lower_bound(
136       Candidates, std::make_pair(DotAddress, nullptr),
137       [&](const std::pair<uint64_t, BinaryBasicBlock *> &LHS,
138           const std::pair<uint64_t, BinaryBasicBlock *> &RHS) {
139         return LHS.first < RHS.first;
140       });
141   if (Cand == Candidates.end()) {
142     Cand = std::prev(Cand);
143   } else if (Cand != Candidates.begin()) {
144     const StubTy *LeftCand = std::prev(Cand);
145     if (Cand->first - DotAddress > DotAddress - LeftCand->first)
146       Cand = LeftCand;
147   }
148   int BitsAvail = BC.MIB->getPCRelEncodingSize(Inst) - 1;
149   assert(BitsAvail < 63 && "PCRelEncodingSize is too large to use int64_t to"
150                            "check for out-of-bounds.");
151   int64_t MaxVal = (1ULL << BitsAvail) - 1;
152   int64_t MinVal = -(1ULL << BitsAvail);
153   uint64_t PCRelTgtAddress = Cand->first;
154   int64_t PCOffset = (int64_t)(PCRelTgtAddress - DotAddress);
155 
156   LLVM_DEBUG({
157     if (Candidates.size() > 1)
158       dbgs() << "Considering stub group with " << Candidates.size()
159              << " candidates. DotAddress is " << Twine::utohexstr(DotAddress)
160              << ", chosen candidate address is "
161              << Twine::utohexstr(Cand->first) << "\n";
162   });
163   return (PCOffset < MinVal || PCOffset > MaxVal) ? nullptr : Cand->second;
164 }
165 
166 BinaryBasicBlock *
167 LongJmpPass::lookupGlobalStub(const BinaryBasicBlock &SourceBB,
168                               const MCInst &Inst, const MCSymbol *TgtSym,
169                               uint64_t DotAddress) const {
170   const BinaryFunction &Func = *SourceBB.getFunction();
171   const StubGroupsTy &StubGroups =
172       SourceBB.isCold() ? ColdStubGroups : HotStubGroups;
173   return lookupStubFromGroup(StubGroups, Func, Inst, TgtSym, DotAddress);
174 }
175 
176 BinaryBasicBlock *LongJmpPass::lookupLocalStub(const BinaryBasicBlock &SourceBB,
177                                                const MCInst &Inst,
178                                                const MCSymbol *TgtSym,
179                                                uint64_t DotAddress) const {
180   const BinaryFunction &Func = *SourceBB.getFunction();
181   const DenseMap<const BinaryFunction *, StubGroupsTy> &StubGroups =
182       SourceBB.isCold() ? ColdLocalStubs : HotLocalStubs;
183   const auto Iter = StubGroups.find(&Func);
184   if (Iter == StubGroups.end())
185     return nullptr;
186   return lookupStubFromGroup(Iter->second, Func, Inst, TgtSym, DotAddress);
187 }
188 
189 std::unique_ptr<BinaryBasicBlock>
190 LongJmpPass::replaceTargetWithStub(BinaryBasicBlock &BB, MCInst &Inst,
191                                    uint64_t DotAddress,
192                                    uint64_t StubCreationAddress) {
193   const BinaryFunction &Func = *BB.getFunction();
194   const BinaryContext &BC = Func.getBinaryContext();
195   std::unique_ptr<BinaryBasicBlock> NewBB;
196   const MCSymbol *TgtSym = BC.MIB->getTargetSymbol(Inst);
197   assert(TgtSym && "getTargetSymbol failed");
198 
199   BinaryBasicBlock::BinaryBranchInfo BI{0, 0};
200   BinaryBasicBlock *TgtBB = BB.getSuccessor(TgtSym, BI);
201   auto LocalStubsIter = Stubs.find(&Func);
202 
203   // If already using stub and the stub is from another function, create a local
204   // stub, since the foreign stub is now out of range
205   if (!TgtBB) {
206     auto SSIter = SharedStubs.find(TgtSym);
207     if (SSIter != SharedStubs.end()) {
208       TgtSym = BC.MIB->getTargetSymbol(*SSIter->second->begin());
209       --NumSharedStubs;
210     }
211   } else if (LocalStubsIter != Stubs.end() &&
212              LocalStubsIter->second.count(TgtBB)) {
213     // The TgtBB and TgtSym now are the local out-of-range stub and its label.
214     // So, we are attempting to restore BB to its previous state without using
215     // this stub.
216     TgtSym = BC.MIB->getTargetSymbol(*TgtBB->begin());
217     assert(TgtSym &&
218            "First instruction is expected to contain a target symbol.");
219     BinaryBasicBlock *TgtBBSucc = TgtBB->getSuccessor(TgtSym, BI);
220 
221     // TgtBB might have no successor. e.g. a stub for a function call.
222     if (TgtBBSucc) {
223       BB.replaceSuccessor(TgtBB, TgtBBSucc, BI.Count, BI.MispredictedCount);
224       assert(TgtBB->getExecutionCount() >= BI.Count &&
225              "At least equal or greater than the branch count.");
226       TgtBB->setExecutionCount(TgtBB->getExecutionCount() - BI.Count);
227     }
228 
229     TgtBB = TgtBBSucc;
230   }
231 
232   BinaryBasicBlock *StubBB = lookupLocalStub(BB, Inst, TgtSym, DotAddress);
233   // If not found, look it up in globally shared stub maps if it is a function
234   // call (TgtBB is not set)
235   if (!StubBB && !TgtBB) {
236     StubBB = lookupGlobalStub(BB, Inst, TgtSym, DotAddress);
237     if (StubBB) {
238       SharedStubs[StubBB->getLabel()] = StubBB;
239       ++NumSharedStubs;
240     }
241   }
242   MCSymbol *StubSymbol = StubBB ? StubBB->getLabel() : nullptr;
243 
244   if (!StubBB) {
245     std::tie(NewBB, StubSymbol) =
246         createNewStub(BB, TgtSym, /*is func?*/ !TgtBB, StubCreationAddress);
247     StubBB = NewBB.get();
248   }
249 
250   // Local branch
251   if (TgtBB) {
252     uint64_t OrigCount = BI.Count;
253     uint64_t OrigMispreds = BI.MispredictedCount;
254     BB.replaceSuccessor(TgtBB, StubBB, OrigCount, OrigMispreds);
255     StubBB->setExecutionCount(StubBB->getExecutionCount() + OrigCount);
256     if (NewBB) {
257       StubBB->addSuccessor(TgtBB, OrigCount, OrigMispreds);
258       StubBB->setIsCold(BB.isCold());
259     }
260     // Call / tail call
261   } else {
262     StubBB->setExecutionCount(StubBB->getExecutionCount() +
263                               BB.getExecutionCount());
264     if (NewBB) {
265       assert(TgtBB == nullptr);
266       StubBB->setIsCold(BB.isCold());
267       // Set as entry point because this block is valid but we have no preds
268       StubBB->getFunction()->addEntryPoint(*StubBB);
269     }
270   }
271   BC.MIB->replaceBranchTarget(Inst, StubSymbol, BC.Ctx.get());
272 
273   return NewBB;
274 }
275 
276 void LongJmpPass::updateStubGroups() {
277   auto update = [&](StubGroupsTy &StubGroups) {
278     for (auto &KeyVal : StubGroups) {
279       for (StubTy &Elem : KeyVal.second)
280         Elem.first = BBAddresses[Elem.second];
281       llvm::sort(KeyVal.second, llvm::less_first());
282     }
283   };
284 
285   for (auto &KeyVal : HotLocalStubs)
286     update(KeyVal.second);
287   for (auto &KeyVal : ColdLocalStubs)
288     update(KeyVal.second);
289   update(HotStubGroups);
290   update(ColdStubGroups);
291 }
292 
293 void LongJmpPass::tentativeBBLayout(const BinaryFunction &Func) {
294   const BinaryContext &BC = Func.getBinaryContext();
295   uint64_t HotDot = HotAddresses[&Func];
296   uint64_t ColdDot = ColdAddresses[&Func];
297   bool Cold = false;
298   for (const BinaryBasicBlock *BB : Func.getLayout().blocks()) {
299     if (Cold || BB->isCold()) {
300       Cold = true;
301       BBAddresses[BB] = ColdDot;
302       ColdDot += BC.computeCodeSize(BB->begin(), BB->end());
303     } else {
304       BBAddresses[BB] = HotDot;
305       HotDot += BC.computeCodeSize(BB->begin(), BB->end());
306     }
307   }
308 }
309 
310 uint64_t LongJmpPass::tentativeLayoutRelocColdPart(
311     const BinaryContext &BC, std::vector<BinaryFunction *> &SortedFunctions,
312     uint64_t DotAddress) {
313   DotAddress = alignTo(DotAddress, llvm::Align(opts::AlignFunctions));
314   for (BinaryFunction *Func : SortedFunctions) {
315     if (!Func->isSplit())
316       continue;
317     DotAddress = alignTo(DotAddress, Func->getMinAlignment());
318     uint64_t Pad =
319         offsetToAlignment(DotAddress, llvm::Align(Func->getAlignment()));
320     if (Pad <= Func->getMaxColdAlignmentBytes())
321       DotAddress += Pad;
322     ColdAddresses[Func] = DotAddress;
323     LLVM_DEBUG(dbgs() << Func->getPrintName() << " cold tentative: "
324                       << Twine::utohexstr(DotAddress) << "\n");
325     DotAddress += Func->estimateColdSize();
326     DotAddress = alignTo(DotAddress, Func->getConstantIslandAlignment());
327     DotAddress += Func->estimateConstantIslandSize();
328   }
329   return DotAddress;
330 }
331 
332 uint64_t LongJmpPass::tentativeLayoutRelocMode(
333     const BinaryContext &BC, std::vector<BinaryFunction *> &SortedFunctions,
334     uint64_t DotAddress) {
335   // Compute hot cold frontier
336   int64_t LastHotIndex = -1u;
337   uint32_t CurrentIndex = 0;
338   if (opts::HotFunctionsAtEnd) {
339     for (BinaryFunction *BF : SortedFunctions) {
340       if (BF->hasValidIndex()) {
341         LastHotIndex = CurrentIndex;
342         break;
343       }
344 
345       ++CurrentIndex;
346     }
347   } else {
348     for (BinaryFunction *BF : SortedFunctions) {
349       if (!BF->hasValidIndex()) {
350         LastHotIndex = CurrentIndex;
351         break;
352       }
353 
354       ++CurrentIndex;
355     }
356   }
357 
358   // Hot
359   CurrentIndex = 0;
360   bool ColdLayoutDone = false;
361   auto runColdLayout = [&]() {
362     DotAddress = tentativeLayoutRelocColdPart(BC, SortedFunctions, DotAddress);
363     ColdLayoutDone = true;
364     if (opts::HotFunctionsAtEnd)
365       DotAddress = alignTo(DotAddress, opts::AlignText);
366   };
367   for (BinaryFunction *Func : SortedFunctions) {
368     if (!BC.shouldEmit(*Func)) {
369       HotAddresses[Func] = Func->getAddress();
370       continue;
371     }
372 
373     if (!ColdLayoutDone && CurrentIndex >= LastHotIndex)
374       runColdLayout();
375 
376     DotAddress = alignTo(DotAddress, Func->getMinAlignment());
377     uint64_t Pad =
378         offsetToAlignment(DotAddress, llvm::Align(Func->getAlignment()));
379     if (Pad <= Func->getMaxAlignmentBytes())
380       DotAddress += Pad;
381     HotAddresses[Func] = DotAddress;
382     LLVM_DEBUG(dbgs() << Func->getPrintName() << " tentative: "
383                       << Twine::utohexstr(DotAddress) << "\n");
384     if (!Func->isSplit())
385       DotAddress += Func->estimateSize();
386     else
387       DotAddress += Func->estimateHotSize();
388 
389     DotAddress = alignTo(DotAddress, Func->getConstantIslandAlignment());
390     DotAddress += Func->estimateConstantIslandSize();
391     ++CurrentIndex;
392   }
393 
394   // Ensure that tentative code layout always runs for cold blocks.
395   if (!ColdLayoutDone)
396     runColdLayout();
397 
398   // BBs
399   for (BinaryFunction *Func : SortedFunctions)
400     tentativeBBLayout(*Func);
401 
402   return DotAddress;
403 }
404 
405 void LongJmpPass::tentativeLayout(
406     const BinaryContext &BC, std::vector<BinaryFunction *> &SortedFunctions) {
407   uint64_t DotAddress = BC.LayoutStartAddress;
408 
409   if (!BC.HasRelocations) {
410     for (BinaryFunction *Func : SortedFunctions) {
411       HotAddresses[Func] = Func->getAddress();
412       DotAddress = alignTo(DotAddress, ColdFragAlign);
413       ColdAddresses[Func] = DotAddress;
414       if (Func->isSplit())
415         DotAddress += Func->estimateColdSize();
416       tentativeBBLayout(*Func);
417     }
418 
419     return;
420   }
421 
422   // Relocation mode
423   uint64_t EstimatedTextSize = 0;
424   if (opts::UseOldText) {
425     EstimatedTextSize = tentativeLayoutRelocMode(BC, SortedFunctions, 0);
426 
427     // Initial padding
428     if (EstimatedTextSize <= BC.OldTextSectionSize) {
429       DotAddress = BC.OldTextSectionAddress;
430       uint64_t Pad =
431           offsetToAlignment(DotAddress, llvm::Align(opts::AlignText));
432       if (Pad + EstimatedTextSize <= BC.OldTextSectionSize) {
433         DotAddress += Pad;
434       }
435     }
436   }
437 
438   if (!EstimatedTextSize || EstimatedTextSize > BC.OldTextSectionSize)
439     DotAddress = alignTo(BC.LayoutStartAddress, opts::AlignText);
440 
441   tentativeLayoutRelocMode(BC, SortedFunctions, DotAddress);
442 }
443 
444 bool LongJmpPass::usesStub(const BinaryFunction &Func,
445                            const MCInst &Inst) const {
446   const MCSymbol *TgtSym = Func.getBinaryContext().MIB->getTargetSymbol(Inst);
447   const BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(TgtSym);
448   auto Iter = Stubs.find(&Func);
449   if (Iter != Stubs.end())
450     return Iter->second.count(TgtBB);
451   return false;
452 }
453 
454 uint64_t LongJmpPass::getSymbolAddress(const BinaryContext &BC,
455                                        const MCSymbol *Target,
456                                        const BinaryBasicBlock *TgtBB) const {
457   if (TgtBB) {
458     auto Iter = BBAddresses.find(TgtBB);
459     assert(Iter != BBAddresses.end() && "Unrecognized BB");
460     return Iter->second;
461   }
462   uint64_t EntryID = 0;
463   const BinaryFunction *TargetFunc = BC.getFunctionForSymbol(Target, &EntryID);
464   auto Iter = HotAddresses.find(TargetFunc);
465   if (Iter == HotAddresses.end() || (TargetFunc && EntryID)) {
466     // Look at BinaryContext's resolution for this symbol - this is a symbol not
467     // mapped to a BinaryFunction
468     ErrorOr<uint64_t> ValueOrError = BC.getSymbolValue(*Target);
469     assert(ValueOrError && "Unrecognized symbol");
470     return *ValueOrError;
471   }
472   return Iter->second;
473 }
474 
475 Error LongJmpPass::relaxStub(BinaryBasicBlock &StubBB, bool &Modified) {
476   const BinaryFunction &Func = *StubBB.getFunction();
477   const BinaryContext &BC = Func.getBinaryContext();
478   const int Bits = StubBits[&StubBB];
479   // Already working with the largest range?
480   if (Bits == static_cast<int>(BC.AsmInfo->getCodePointerSize() * 8))
481     return Error::success();
482 
483   const static int RangeShortJmp = BC.MIB->getShortJmpEncodingSize();
484   const static int RangeSingleInstr = BC.MIB->getUncondBranchEncodingSize();
485   const static uint64_t ShortJmpMask = ~((1ULL << RangeShortJmp) - 1);
486   const static uint64_t SingleInstrMask =
487       ~((1ULL << (RangeSingleInstr - 1)) - 1);
488 
489   const MCSymbol *RealTargetSym = BC.MIB->getTargetSymbol(*StubBB.begin());
490   const BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(RealTargetSym);
491   uint64_t TgtAddress = getSymbolAddress(BC, RealTargetSym, TgtBB);
492   uint64_t DotAddress = BBAddresses[&StubBB];
493   uint64_t PCRelTgtAddress = DotAddress > TgtAddress ? DotAddress - TgtAddress
494                                                      : TgtAddress - DotAddress;
495   // If it fits in one instruction, do not relax
496   if (!(PCRelTgtAddress & SingleInstrMask))
497     return Error::success();
498 
499   // Fits short jmp
500   if (!(PCRelTgtAddress & ShortJmpMask)) {
501     if (Bits >= RangeShortJmp)
502       return Error::success();
503 
504     LLVM_DEBUG(dbgs() << "Relaxing stub to short jump. PCRelTgtAddress = "
505                       << Twine::utohexstr(PCRelTgtAddress)
506                       << " RealTargetSym = " << RealTargetSym->getName()
507                       << "\n");
508     relaxStubToShortJmp(StubBB, RealTargetSym);
509     StubBits[&StubBB] = RangeShortJmp;
510     Modified = true;
511     return Error::success();
512   }
513 
514   // The long jmp uses absolute address on AArch64
515   // So we could not use it for PIC binaries
516   if (BC.isAArch64() && !BC.HasFixedLoadAddress)
517     return createFatalBOLTError(
518         "BOLT-ERROR: Unable to relax stub for PIC binary\n");
519 
520   LLVM_DEBUG(dbgs() << "Relaxing stub to long jump. PCRelTgtAddress = "
521                     << Twine::utohexstr(PCRelTgtAddress)
522                     << " RealTargetSym = " << RealTargetSym->getName() << "\n");
523   relaxStubToLongJmp(StubBB, RealTargetSym);
524   StubBits[&StubBB] = static_cast<int>(BC.AsmInfo->getCodePointerSize() * 8);
525   Modified = true;
526   return Error::success();
527 }
528 
529 bool LongJmpPass::needsStub(const BinaryBasicBlock &BB, const MCInst &Inst,
530                             uint64_t DotAddress) const {
531   const BinaryFunction &Func = *BB.getFunction();
532   const BinaryContext &BC = Func.getBinaryContext();
533   const MCSymbol *TgtSym = BC.MIB->getTargetSymbol(Inst);
534   assert(TgtSym && "getTargetSymbol failed");
535 
536   const BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(TgtSym);
537   // Check for shared stubs from foreign functions
538   if (!TgtBB) {
539     auto SSIter = SharedStubs.find(TgtSym);
540     if (SSIter != SharedStubs.end())
541       TgtBB = SSIter->second;
542   }
543 
544   int BitsAvail = BC.MIB->getPCRelEncodingSize(Inst) - 1;
545   assert(BitsAvail < 63 && "PCRelEncodingSize is too large to use int64_t to"
546                            "check for out-of-bounds.");
547   int64_t MaxVal = (1ULL << BitsAvail) - 1;
548   int64_t MinVal = -(1ULL << BitsAvail);
549 
550   uint64_t PCRelTgtAddress = getSymbolAddress(BC, TgtSym, TgtBB);
551   int64_t PCOffset = (int64_t)(PCRelTgtAddress - DotAddress);
552 
553   return PCOffset < MinVal || PCOffset > MaxVal;
554 }
555 
556 Error LongJmpPass::relax(BinaryFunction &Func, bool &Modified) {
557   const BinaryContext &BC = Func.getBinaryContext();
558 
559   assert(BC.isAArch64() && "Unsupported arch");
560   constexpr int InsnSize = 4; // AArch64
561   std::vector<std::pair<BinaryBasicBlock *, std::unique_ptr<BinaryBasicBlock>>>
562       Insertions;
563 
564   BinaryBasicBlock *Frontier = getBBAtHotColdSplitPoint(Func);
565   uint64_t FrontierAddress = Frontier ? BBAddresses[Frontier] : 0;
566   if (FrontierAddress)
567     FrontierAddress += Frontier->getNumNonPseudos() * InsnSize;
568 
569   // Add necessary stubs for branch targets we know we can't fit in the
570   // instruction
571   for (BinaryBasicBlock &BB : Func) {
572     uint64_t DotAddress = BBAddresses[&BB];
573     // Stubs themselves are relaxed on the next loop
574     if (Stubs[&Func].count(&BB))
575       continue;
576 
577     for (MCInst &Inst : BB) {
578       if (BC.MIB->isPseudo(Inst))
579         continue;
580 
581       if (!mayNeedStub(BC, Inst)) {
582         DotAddress += InsnSize;
583         continue;
584       }
585 
586       // Check and relax direct branch or call
587       if (!needsStub(BB, Inst, DotAddress)) {
588         DotAddress += InsnSize;
589         continue;
590       }
591       Modified = true;
592 
593       // Insert stubs close to the patched BB if call, but far away from the
594       // hot path if a branch, since this branch target is the cold region
595       // (but first check that the far away stub will be in range).
596       BinaryBasicBlock *InsertionPoint = &BB;
597       if (Func.isSimple() && !BC.MIB->isCall(Inst) && FrontierAddress &&
598           !BB.isCold()) {
599         int BitsAvail = BC.MIB->getPCRelEncodingSize(Inst) - 1;
600         uint64_t Mask = ~((1ULL << BitsAvail) - 1);
601         assert(FrontierAddress > DotAddress &&
602                "Hot code should be before the frontier");
603         uint64_t PCRelTgt = FrontierAddress - DotAddress;
604         if (!(PCRelTgt & Mask))
605           InsertionPoint = Frontier;
606       }
607       // Always put stubs at the end of the function if non-simple. We can't
608       // change the layout of non-simple functions because it has jump tables
609       // that we do not control.
610       if (!Func.isSimple())
611         InsertionPoint = &*std::prev(Func.end());
612 
613       // Create a stub to handle a far-away target
614       Insertions.emplace_back(InsertionPoint,
615                               replaceTargetWithStub(BB, Inst, DotAddress,
616                                                     InsertionPoint == Frontier
617                                                         ? FrontierAddress
618                                                         : DotAddress));
619 
620       DotAddress += InsnSize;
621     }
622   }
623 
624   // Relax stubs if necessary
625   for (BinaryBasicBlock &BB : Func) {
626     if (!Stubs[&Func].count(&BB) || !BB.isValid())
627       continue;
628 
629     if (auto E = relaxStub(BB, Modified))
630       return Error(std::move(E));
631   }
632 
633   for (std::pair<BinaryBasicBlock *, std::unique_ptr<BinaryBasicBlock>> &Elmt :
634        Insertions) {
635     if (!Elmt.second)
636       continue;
637     std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs;
638     NewBBs.emplace_back(std::move(Elmt.second));
639     Func.insertBasicBlocks(Elmt.first, std::move(NewBBs), true);
640   }
641 
642   return Error::success();
643 }
644 
645 void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) {
646   BinaryContext &BC = BF.getBinaryContext();
647   auto &MIB = BC.MIB;
648 
649   // Quick path.
650   if (!BF.isSplit() && BF.estimateSize() < ShortestJumpSpan)
651     return;
652 
653   auto isBranchOffsetInRange = [&](const MCInst &Inst, int64_t Offset) {
654     const unsigned Bits = MIB->getPCRelEncodingSize(Inst);
655     return isIntN(Bits, Offset);
656   };
657 
658   auto isBlockInRange = [&](const MCInst &Inst, uint64_t InstAddress,
659                             const BinaryBasicBlock &BB) {
660     const int64_t Offset = BB.getOutputStartAddress() - InstAddress;
661     return isBranchOffsetInRange(Inst, Offset);
662   };
663 
664   // Keep track of *all* function trampolines that are going to be added to the
665   // function layout at the end of relaxation.
666   std::vector<std::pair<BinaryBasicBlock *, std::unique_ptr<BinaryBasicBlock>>>
667       FunctionTrampolines;
668 
669   // Function fragments are relaxed independently.
670   for (FunctionFragment &FF : BF.getLayout().fragments()) {
671     // Fill out code size estimation for the fragment. Use output BB address
672     // ranges to store offsets from the start of the function fragment.
673     uint64_t CodeSize = 0;
674     for (BinaryBasicBlock *BB : FF) {
675       BB->setOutputStartAddress(CodeSize);
676       CodeSize += BB->estimateSize();
677       BB->setOutputEndAddress(CodeSize);
678     }
679 
680     // Dynamically-updated size of the fragment.
681     uint64_t FragmentSize = CodeSize;
682 
683     // Size of the trampoline in bytes.
684     constexpr uint64_t TrampolineSize = 4;
685 
686     // Trampolines created for the fragment. DestinationBB -> TrampolineBB.
687     // NB: here we store only the first trampoline created for DestinationBB.
688     DenseMap<const BinaryBasicBlock *, BinaryBasicBlock *> FragmentTrampolines;
689 
690     // Create a trampoline code after \p BB or at the end of the fragment if BB
691     // is nullptr. If \p UpdateOffsets is true, update FragmentSize and offsets
692     // for basic blocks affected by the insertion of the trampoline.
693     auto addTrampolineAfter = [&](BinaryBasicBlock *BB,
694                                   BinaryBasicBlock *TargetBB, uint64_t Count,
695                                   bool UpdateOffsets = true) {
696       FunctionTrampolines.emplace_back(BB ? BB : FF.back(),
697                                        BF.createBasicBlock());
698       BinaryBasicBlock *TrampolineBB = FunctionTrampolines.back().second.get();
699 
700       MCInst Inst;
701       {
702         auto L = BC.scopeLock();
703         MIB->createUncondBranch(Inst, TargetBB->getLabel(), BC.Ctx.get());
704       }
705       TrampolineBB->addInstruction(Inst);
706       TrampolineBB->addSuccessor(TargetBB, Count);
707       TrampolineBB->setExecutionCount(Count);
708       const uint64_t TrampolineAddress =
709           BB ? BB->getOutputEndAddress() : FragmentSize;
710       TrampolineBB->setOutputStartAddress(TrampolineAddress);
711       TrampolineBB->setOutputEndAddress(TrampolineAddress + TrampolineSize);
712       TrampolineBB->setFragmentNum(FF.getFragmentNum());
713 
714       if (!FragmentTrampolines.lookup(TargetBB))
715         FragmentTrampolines[TargetBB] = TrampolineBB;
716 
717       if (!UpdateOffsets)
718         return TrampolineBB;
719 
720       FragmentSize += TrampolineSize;
721 
722       // If the trampoline was added at the end of the fragment, offsets of
723       // other fragments should stay intact.
724       if (!BB)
725         return TrampolineBB;
726 
727       // Update offsets for blocks after BB.
728       for (BinaryBasicBlock *IBB : FF) {
729         if (IBB->getOutputStartAddress() >= TrampolineAddress) {
730           IBB->setOutputStartAddress(IBB->getOutputStartAddress() +
731                                      TrampolineSize);
732           IBB->setOutputEndAddress(IBB->getOutputEndAddress() + TrampolineSize);
733         }
734       }
735 
736       // Update offsets for trampolines in this fragment that are placed after
737       // the new trampoline. Note that trampoline blocks are not part of the
738       // function/fragment layout until we add them right before the return
739       // from relaxLocalBranches().
740       for (auto &Pair : FunctionTrampolines) {
741         BinaryBasicBlock *IBB = Pair.second.get();
742         if (IBB->getFragmentNum() != TrampolineBB->getFragmentNum())
743           continue;
744         if (IBB == TrampolineBB)
745           continue;
746         if (IBB->getOutputStartAddress() >= TrampolineAddress) {
747           IBB->setOutputStartAddress(IBB->getOutputStartAddress() +
748                                      TrampolineSize);
749           IBB->setOutputEndAddress(IBB->getOutputEndAddress() + TrampolineSize);
750         }
751       }
752 
753       return TrampolineBB;
754     };
755 
756     // Pre-populate trampolines by splitting unconditional branches from the
757     // containing basic block.
758     for (BinaryBasicBlock *BB : FF) {
759       MCInst *Inst = BB->getLastNonPseudoInstr();
760       if (!Inst || !MIB->isUnconditionalBranch(*Inst))
761         continue;
762 
763       const MCSymbol *TargetSymbol = MIB->getTargetSymbol(*Inst);
764       BB->eraseInstruction(BB->findInstruction(Inst));
765       BB->setOutputEndAddress(BB->getOutputEndAddress() - TrampolineSize);
766 
767       BinaryBasicBlock::BinaryBranchInfo BI;
768       BinaryBasicBlock *TargetBB = BB->getSuccessor(TargetSymbol, BI);
769 
770       BinaryBasicBlock *TrampolineBB =
771           addTrampolineAfter(BB, TargetBB, BI.Count, /*UpdateOffsets*/ false);
772       BB->replaceSuccessor(TargetBB, TrampolineBB, BI.Count);
773     }
774 
775     /// Relax the branch \p Inst in basic block \p BB that targets \p TargetBB.
776     /// \p InstAddress contains offset of the branch from the start of the
777     /// containing function fragment.
778     auto relaxBranch = [&](BinaryBasicBlock *BB, MCInst &Inst,
779                            uint64_t InstAddress, BinaryBasicBlock *TargetBB) {
780       BinaryFunction *BF = BB->getParent();
781 
782       // Use branch taken count for optimal relaxation.
783       const uint64_t Count = BB->getBranchInfo(*TargetBB).Count;
784       assert(Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
785              "Expected valid branch execution count");
786 
787       // Try to reuse an existing trampoline without introducing any new code.
788       BinaryBasicBlock *TrampolineBB = FragmentTrampolines.lookup(TargetBB);
789       if (TrampolineBB && isBlockInRange(Inst, InstAddress, *TrampolineBB)) {
790         BB->replaceSuccessor(TargetBB, TrampolineBB, Count);
791         TrampolineBB->setExecutionCount(TrampolineBB->getExecutionCount() +
792                                         Count);
793         auto L = BC.scopeLock();
794         MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get());
795         return;
796       }
797 
798       // For cold branches, check if we can introduce a trampoline at the end
799       // of the fragment that is within the branch reach. Note that such
800       // trampoline may change address later and become unreachable in which
801       // case we will need further relaxation.
802       const int64_t OffsetToEnd = FragmentSize - InstAddress;
803       if (Count == 0 && isBranchOffsetInRange(Inst, OffsetToEnd)) {
804         TrampolineBB = addTrampolineAfter(nullptr, TargetBB, Count);
805         BB->replaceSuccessor(TargetBB, TrampolineBB, Count);
806         auto L = BC.scopeLock();
807         MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get());
808 
809         return;
810       }
811 
812       // Insert a new block after the current one and use it as a trampoline.
813       TrampolineBB = addTrampolineAfter(BB, TargetBB, Count);
814 
815       // If the other successor is a fall-through, invert the condition code.
816       const BinaryBasicBlock *const NextBB =
817           BF->getLayout().getBasicBlockAfter(BB, /*IgnoreSplits*/ false);
818       if (BB->getConditionalSuccessor(false) == NextBB) {
819         BB->swapConditionalSuccessors();
820         auto L = BC.scopeLock();
821         MIB->reverseBranchCondition(Inst, NextBB->getLabel(), BC.Ctx.get());
822       } else {
823         auto L = BC.scopeLock();
824         MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get());
825       }
826       BB->replaceSuccessor(TargetBB, TrampolineBB, Count);
827     };
828 
829     bool MayNeedRelaxation;
830     uint64_t NumIterations = 0;
831     do {
832       MayNeedRelaxation = false;
833       ++NumIterations;
834       for (auto BBI = FF.begin(); BBI != FF.end(); ++BBI) {
835         BinaryBasicBlock *BB = *BBI;
836         uint64_t NextInstOffset = BB->getOutputStartAddress();
837         for (MCInst &Inst : *BB) {
838           const size_t InstAddress = NextInstOffset;
839           if (!MIB->isPseudo(Inst))
840             NextInstOffset += 4;
841 
842           if (!mayNeedStub(BF.getBinaryContext(), Inst))
843             continue;
844 
845           const size_t BitsAvailable = MIB->getPCRelEncodingSize(Inst);
846 
847           // Span of +/-128MB.
848           if (BitsAvailable == LongestJumpBits)
849             continue;
850 
851           const MCSymbol *TargetSymbol = MIB->getTargetSymbol(Inst);
852           BinaryBasicBlock *TargetBB = BB->getSuccessor(TargetSymbol);
853           assert(TargetBB &&
854                  "Basic block target expected for conditional branch.");
855 
856           // Check if the relaxation is needed.
857           if (TargetBB->getFragmentNum() == FF.getFragmentNum() &&
858               isBlockInRange(Inst, InstAddress, *TargetBB))
859             continue;
860 
861           relaxBranch(BB, Inst, InstAddress, TargetBB);
862 
863           MayNeedRelaxation = true;
864         }
865       }
866 
867       // We may have added new instructions, but the whole fragment is less than
868       // the minimum branch span.
869       if (FragmentSize < ShortestJumpSpan)
870         MayNeedRelaxation = false;
871 
872     } while (MayNeedRelaxation);
873 
874     LLVM_DEBUG({
875       if (NumIterations > 2) {
876         dbgs() << "BOLT-DEBUG: relaxed fragment " << FF.getFragmentNum().get()
877                << " of " << BF << " in " << NumIterations << " iterations\n";
878       }
879     });
880     (void)NumIterations;
881   }
882 
883   // Add trampoline blocks from all fragments to the layout.
884   DenseMap<BinaryBasicBlock *, std::vector<std::unique_ptr<BinaryBasicBlock>>>
885       Insertions;
886   for (std::pair<BinaryBasicBlock *, std::unique_ptr<BinaryBasicBlock>> &Pair :
887        FunctionTrampolines) {
888     if (!Pair.second)
889       continue;
890     Insertions[Pair.first].emplace_back(std::move(Pair.second));
891   }
892 
893   for (auto &Pair : Insertions) {
894     BF.insertBasicBlocks(Pair.first, std::move(Pair.second),
895                          /*UpdateLayout*/ true, /*UpdateCFI*/ true,
896                          /*RecomputeLPs*/ false);
897   }
898 }
899 
900 Error LongJmpPass::runOnFunctions(BinaryContext &BC) {
901 
902   if (opts::CompactCodeModel) {
903     BC.outs()
904         << "BOLT-INFO: relaxing branches for compact code model (<128MB)\n";
905 
906     ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
907       relaxLocalBranches(BF);
908     };
909 
910     ParallelUtilities::PredicateTy SkipPredicate =
911         [&](const BinaryFunction &BF) {
912           return !BC.shouldEmit(BF) || !BF.isSimple();
913         };
914 
915     ParallelUtilities::runOnEachFunction(
916         BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
917         SkipPredicate, "RelaxLocalBranches");
918 
919     return Error::success();
920   }
921 
922   BC.outs() << "BOLT-INFO: Starting stub-insertion pass\n";
923   std::vector<BinaryFunction *> Sorted = BC.getSortedFunctions();
924   bool Modified;
925   uint32_t Iterations = 0;
926   do {
927     ++Iterations;
928     Modified = false;
929     tentativeLayout(BC, Sorted);
930     updateStubGroups();
931     for (BinaryFunction *Func : Sorted) {
932       if (auto E = relax(*Func, Modified))
933         return Error(std::move(E));
934       // Don't ruin non-simple functions, they can't afford to have the layout
935       // changed.
936       if (Modified && Func->isSimple())
937         Func->fixBranches();
938     }
939   } while (Modified);
940   BC.outs() << "BOLT-INFO: Inserted " << NumHotStubs
941             << " stubs in the hot area and " << NumColdStubs
942             << " stubs in the cold area. Shared " << NumSharedStubs
943             << " times, iterated " << Iterations << " times.\n";
944   return Error::success();
945 }
946 } // namespace bolt
947 } // namespace llvm
948