xref: /llvm-project/bolt/lib/Core/BinaryFunction.cpp (revision 4be3083bb39836466680a15f970ecc883d2e362a)
1 //===- bolt/Core/BinaryFunction.cpp - Low-level function ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryFunction class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryFunction.h"
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/DynoStats.h"
16 #include "bolt/Core/HashUtilities.h"
17 #include "bolt/Core/MCPlusBuilder.h"
18 #include "bolt/Utils/NameResolver.h"
19 #include "bolt/Utils/NameShortener.h"
20 #include "bolt/Utils/Utils.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/Demangle/Demangle.h"
26 #include "llvm/MC/MCAsmInfo.h"
27 #include "llvm/MC/MCContext.h"
28 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
29 #include "llvm/MC/MCExpr.h"
30 #include "llvm/MC/MCInst.h"
31 #include "llvm/MC/MCInstPrinter.h"
32 #include "llvm/MC/MCRegisterInfo.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/Object/ObjectFile.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/GenericDomTreeConstruction.h"
38 #include "llvm/Support/GenericLoopInfoImpl.h"
39 #include "llvm/Support/GraphWriter.h"
40 #include "llvm/Support/LEB128.h"
41 #include "llvm/Support/Regex.h"
42 #include "llvm/Support/Timer.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Support/xxhash.h"
45 #include <functional>
46 #include <limits>
47 #include <numeric>
48 #include <string>
49 
50 #define DEBUG_TYPE "bolt"
51 
52 using namespace llvm;
53 using namespace bolt;
54 
55 namespace opts {
56 
57 extern cl::OptionCategory BoltCategory;
58 extern cl::OptionCategory BoltOptCategory;
59 
60 extern cl::opt<bool> EnableBAT;
61 extern cl::opt<bool> Instrument;
62 extern cl::opt<bool> StrictMode;
63 extern cl::opt<bool> UpdateDebugSections;
64 extern cl::opt<unsigned> Verbosity;
65 
66 extern bool processAllFunctions();
67 
68 cl::opt<bool> CheckEncoding(
69     "check-encoding",
70     cl::desc("perform verification of LLVM instruction encoding/decoding. "
71              "Every instruction in the input is decoded and re-encoded. "
72              "If the resulting bytes do not match the input, a warning message "
73              "is printed."),
74     cl::Hidden, cl::cat(BoltCategory));
75 
76 static cl::opt<bool> DotToolTipCode(
77     "dot-tooltip-code",
78     cl::desc("add basic block instructions as tool tips on nodes"), cl::Hidden,
79     cl::cat(BoltCategory));
80 
81 cl::opt<JumpTableSupportLevel>
82 JumpTables("jump-tables",
83   cl::desc("jump tables support (default=basic)"),
84   cl::init(JTS_BASIC),
85   cl::values(
86       clEnumValN(JTS_NONE, "none",
87                  "do not optimize functions with jump tables"),
88       clEnumValN(JTS_BASIC, "basic",
89                  "optimize functions with jump tables"),
90       clEnumValN(JTS_MOVE, "move",
91                  "move jump tables to a separate section"),
92       clEnumValN(JTS_SPLIT, "split",
93                  "split jump tables section into hot and cold based on "
94                  "function execution frequency"),
95       clEnumValN(JTS_AGGRESSIVE, "aggressive",
96                  "aggressively split jump tables section based on usage "
97                  "of the tables")),
98   cl::ZeroOrMore,
99   cl::cat(BoltOptCategory));
100 
101 static cl::opt<bool> NoScan(
102     "no-scan",
103     cl::desc(
104         "do not scan cold functions for external references (may result in "
105         "slower binary)"),
106     cl::Hidden, cl::cat(BoltOptCategory));
107 
108 cl::opt<bool>
109     PreserveBlocksAlignment("preserve-blocks-alignment",
110                             cl::desc("try to preserve basic block alignment"),
111                             cl::cat(BoltOptCategory));
112 
113 static cl::opt<bool> PrintOutputAddressRange(
114     "print-output-address-range",
115     cl::desc(
116         "print output address range for each basic block in the function when"
117         "BinaryFunction::print is called"),
118     cl::Hidden, cl::cat(BoltOptCategory));
119 
120 cl::opt<bool>
121 PrintDynoStats("dyno-stats",
122   cl::desc("print execution info based on profile"),
123   cl::cat(BoltCategory));
124 
125 static cl::opt<bool>
126 PrintDynoStatsOnly("print-dyno-stats-only",
127   cl::desc("while printing functions output dyno-stats and skip instructions"),
128   cl::init(false),
129   cl::Hidden,
130   cl::cat(BoltCategory));
131 
132 static cl::list<std::string>
133 PrintOnly("print-only",
134   cl::CommaSeparated,
135   cl::desc("list of functions to print"),
136   cl::value_desc("func1,func2,func3,..."),
137   cl::Hidden,
138   cl::cat(BoltCategory));
139 
140 cl::opt<bool>
141     TimeBuild("time-build",
142               cl::desc("print time spent constructing binary functions"),
143               cl::Hidden, cl::cat(BoltCategory));
144 
145 cl::opt<bool>
146 TrapOnAVX512("trap-avx512",
147   cl::desc("in relocation mode trap upon entry to any function that uses "
148             "AVX-512 instructions"),
149   cl::init(false),
150   cl::ZeroOrMore,
151   cl::Hidden,
152   cl::cat(BoltCategory));
153 
154 bool shouldPrint(const BinaryFunction &Function) {
155   if (Function.isIgnored())
156     return false;
157 
158   if (PrintOnly.empty())
159     return true;
160 
161   for (std::string &Name : opts::PrintOnly) {
162     if (Function.hasNameRegex(Name)) {
163       return true;
164     }
165   }
166 
167   return false;
168 }
169 
170 } // namespace opts
171 
172 namespace llvm {
173 namespace bolt {
174 
175 template <typename R> static bool emptyRange(const R &Range) {
176   return Range.begin() == Range.end();
177 }
178 
179 /// Gets debug line information for the instruction located at the given
180 /// address in the original binary. The SMLoc's pointer is used
181 /// to point to this information, which is represented by a
182 /// DebugLineTableRowRef. The returned pointer is null if no debug line
183 /// information for this instruction was found.
184 static SMLoc findDebugLineInformationForInstructionAt(
185     uint64_t Address, DWARFUnit *Unit,
186     const DWARFDebugLine::LineTable *LineTable) {
187   // We use the pointer in SMLoc to store an instance of DebugLineTableRowRef,
188   // which occupies 64 bits. Thus, we can only proceed if the struct fits into
189   // the pointer itself.
190   static_assert(
191       sizeof(decltype(SMLoc().getPointer())) >= sizeof(DebugLineTableRowRef),
192       "Cannot fit instruction debug line information into SMLoc's pointer");
193 
194   SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc();
195   uint32_t RowIndex = LineTable->lookupAddress(
196       {Address, object::SectionedAddress::UndefSection});
197   if (RowIndex == LineTable->UnknownRowIndex)
198     return NullResult;
199 
200   assert(RowIndex < LineTable->Rows.size() &&
201          "Line Table lookup returned invalid index.");
202 
203   decltype(SMLoc().getPointer()) Ptr;
204   DebugLineTableRowRef *InstructionLocation =
205       reinterpret_cast<DebugLineTableRowRef *>(&Ptr);
206 
207   InstructionLocation->DwCompileUnitIndex = Unit->getOffset();
208   InstructionLocation->RowIndex = RowIndex + 1;
209 
210   return SMLoc::getFromPointer(Ptr);
211 }
212 
213 static std::string buildSectionName(StringRef Prefix, StringRef Name,
214                                     const BinaryContext &BC) {
215   if (BC.isELF())
216     return (Prefix + Name).str();
217   static NameShortener NS;
218   return (Prefix + Twine(NS.getID(Name))).str();
219 }
220 
221 static raw_ostream &operator<<(raw_ostream &OS,
222                                const BinaryFunction::State State) {
223   switch (State) {
224   case BinaryFunction::State::Empty:         OS << "empty"; break;
225   case BinaryFunction::State::Disassembled:  OS << "disassembled"; break;
226   case BinaryFunction::State::CFG:           OS << "CFG constructed"; break;
227   case BinaryFunction::State::CFG_Finalized: OS << "CFG finalized"; break;
228   case BinaryFunction::State::EmittedCFG:    OS << "emitted with CFG"; break;
229   case BinaryFunction::State::Emitted:       OS << "emitted"; break;
230   }
231 
232   return OS;
233 }
234 
235 std::string BinaryFunction::buildCodeSectionName(StringRef Name,
236                                                  const BinaryContext &BC) {
237   return buildSectionName(BC.isELF() ? ".local.text." : ".l.text.", Name, BC);
238 }
239 
240 std::string BinaryFunction::buildColdCodeSectionName(StringRef Name,
241                                                      const BinaryContext &BC) {
242   return buildSectionName(BC.isELF() ? ".local.cold.text." : ".l.c.text.", Name,
243                           BC);
244 }
245 
246 uint64_t BinaryFunction::Count = 0;
247 
248 std::optional<StringRef>
249 BinaryFunction::hasNameRegex(const StringRef Name) const {
250   const std::string RegexName = (Twine("^") + StringRef(Name) + "$").str();
251   Regex MatchName(RegexName);
252   return forEachName(
253       [&MatchName](StringRef Name) { return MatchName.match(Name); });
254 }
255 
256 std::optional<StringRef>
257 BinaryFunction::hasRestoredNameRegex(const StringRef Name) const {
258   const std::string RegexName = (Twine("^") + StringRef(Name) + "$").str();
259   Regex MatchName(RegexName);
260   return forEachName([&MatchName](StringRef Name) {
261     return MatchName.match(NameResolver::restore(Name));
262   });
263 }
264 
265 std::string BinaryFunction::getDemangledName() const {
266   StringRef MangledName = NameResolver::restore(getOneName());
267   return demangle(MangledName.str());
268 }
269 
270 BinaryBasicBlock *
271 BinaryFunction::getBasicBlockContainingOffset(uint64_t Offset) {
272   if (Offset > Size)
273     return nullptr;
274 
275   if (BasicBlockOffsets.empty())
276     return nullptr;
277 
278   /*
279    * This is commented out because it makes BOLT too slow.
280    * assert(std::is_sorted(BasicBlockOffsets.begin(),
281    *                       BasicBlockOffsets.end(),
282    *                       CompareBasicBlockOffsets())));
283    */
284   auto I =
285       llvm::upper_bound(BasicBlockOffsets, BasicBlockOffset(Offset, nullptr),
286                         CompareBasicBlockOffsets());
287   assert(I != BasicBlockOffsets.begin() && "first basic block not at offset 0");
288   --I;
289   BinaryBasicBlock *BB = I->second;
290   return (Offset < BB->getOffset() + BB->getOriginalSize()) ? BB : nullptr;
291 }
292 
293 void BinaryFunction::markUnreachableBlocks() {
294   std::stack<BinaryBasicBlock *> Stack;
295 
296   for (BinaryBasicBlock &BB : blocks())
297     BB.markValid(false);
298 
299   // Add all entries and landing pads as roots.
300   for (BinaryBasicBlock *BB : BasicBlocks) {
301     if (isEntryPoint(*BB) || BB->isLandingPad()) {
302       Stack.push(BB);
303       BB->markValid(true);
304       continue;
305     }
306     // FIXME:
307     // Also mark BBs with indirect jumps as reachable, since we do not
308     // support removing unused jump tables yet (GH-issue20).
309     for (const MCInst &Inst : *BB) {
310       if (BC.MIB->getJumpTable(Inst)) {
311         Stack.push(BB);
312         BB->markValid(true);
313         break;
314       }
315     }
316   }
317 
318   // Determine reachable BBs from the entry point
319   while (!Stack.empty()) {
320     BinaryBasicBlock *BB = Stack.top();
321     Stack.pop();
322     for (BinaryBasicBlock *Succ : BB->successors()) {
323       if (Succ->isValid())
324         continue;
325       Succ->markValid(true);
326       Stack.push(Succ);
327     }
328   }
329 }
330 
331 // Any unnecessary fallthrough jumps revealed after calling eraseInvalidBBs
332 // will be cleaned up by fixBranches().
333 std::pair<unsigned, uint64_t>
334 BinaryFunction::eraseInvalidBBs(const MCCodeEmitter *Emitter) {
335   DenseSet<const BinaryBasicBlock *> InvalidBBs;
336   unsigned Count = 0;
337   uint64_t Bytes = 0;
338   for (BinaryBasicBlock *const BB : BasicBlocks) {
339     if (!BB->isValid()) {
340       assert(!isEntryPoint(*BB) && "all entry blocks must be valid");
341       InvalidBBs.insert(BB);
342       ++Count;
343       Bytes += BC.computeCodeSize(BB->begin(), BB->end(), Emitter);
344     }
345   }
346 
347   Layout.eraseBasicBlocks(InvalidBBs);
348 
349   BasicBlockListType NewBasicBlocks;
350   for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) {
351     BinaryBasicBlock *BB = *I;
352     if (InvalidBBs.contains(BB)) {
353       // Make sure the block is removed from the list of predecessors.
354       BB->removeAllSuccessors();
355       DeletedBasicBlocks.push_back(BB);
356     } else {
357       NewBasicBlocks.push_back(BB);
358     }
359   }
360   BasicBlocks = std::move(NewBasicBlocks);
361 
362   assert(BasicBlocks.size() == Layout.block_size());
363 
364   // Update CFG state if needed
365   if (Count > 0)
366     recomputeLandingPads();
367 
368   return std::make_pair(Count, Bytes);
369 }
370 
371 bool BinaryFunction::isForwardCall(const MCSymbol *CalleeSymbol) const {
372   // This function should work properly before and after function reordering.
373   // In order to accomplish this, we use the function index (if it is valid).
374   // If the function indices are not valid, we fall back to the original
375   // addresses.  This should be ok because the functions without valid indices
376   // should have been ordered with a stable sort.
377   const BinaryFunction *CalleeBF = BC.getFunctionForSymbol(CalleeSymbol);
378   if (CalleeBF) {
379     if (CalleeBF->isInjected())
380       return true;
381 
382     if (hasValidIndex() && CalleeBF->hasValidIndex()) {
383       return getIndex() < CalleeBF->getIndex();
384     } else if (hasValidIndex() && !CalleeBF->hasValidIndex()) {
385       return true;
386     } else if (!hasValidIndex() && CalleeBF->hasValidIndex()) {
387       return false;
388     } else {
389       return getAddress() < CalleeBF->getAddress();
390     }
391   } else {
392     // Absolute symbol.
393     ErrorOr<uint64_t> CalleeAddressOrError = BC.getSymbolValue(*CalleeSymbol);
394     assert(CalleeAddressOrError && "unregistered symbol found");
395     return *CalleeAddressOrError > getAddress();
396   }
397 }
398 
399 void BinaryFunction::dump() const {
400   // getDynoStats calls FunctionLayout::updateLayoutIndices and
401   // BasicBlock::analyzeBranch. The former cannot be const, but should be
402   // removed, the latter should be made const, but seems to require refactoring.
403   // Forcing all callers to have a non-const reference to BinaryFunction to call
404   // dump non-const however is not ideal either. Adding this const_cast is right
405   // now the best solution. It is safe, because BinaryFunction itself is not
406   // modified. Only BinaryBasicBlocks are actually modified (if it all) and we
407   // have mutable pointers to those regardless whether this function is
408   // const-qualified or not.
409   const_cast<BinaryFunction &>(*this).print(dbgs(), "");
410 }
411 
412 void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
413   if (!opts::shouldPrint(*this))
414     return;
415 
416   StringRef SectionName =
417       OriginSection ? OriginSection->getName() : "<no origin section>";
418   OS << "Binary Function \"" << *this << "\" " << Annotation << " {";
419   std::vector<StringRef> AllNames = getNames();
420   if (AllNames.size() > 1) {
421     OS << "\n  All names   : ";
422     const char *Sep = "";
423     for (const StringRef &Name : AllNames) {
424       OS << Sep << Name;
425       Sep = "\n                ";
426     }
427   }
428   OS << "\n  Number      : " << FunctionNumber;
429   OS << "\n  State       : " << CurrentState;
430   OS << "\n  Address     : 0x" << Twine::utohexstr(Address);
431   OS << "\n  Size        : 0x" << Twine::utohexstr(Size);
432   OS << "\n  MaxSize     : 0x" << Twine::utohexstr(MaxSize);
433   OS << "\n  Offset      : 0x" << Twine::utohexstr(getFileOffset());
434   OS << "\n  Section     : " << SectionName;
435   OS << "\n  Orc Section : " << getCodeSectionName();
436   OS << "\n  LSDA        : 0x" << Twine::utohexstr(getLSDAAddress());
437   OS << "\n  IsSimple    : " << IsSimple;
438   OS << "\n  IsMultiEntry: " << isMultiEntry();
439   OS << "\n  IsSplit     : " << isSplit();
440   OS << "\n  BB Count    : " << size();
441 
442   if (HasUnknownControlFlow)
443     OS << "\n  Unknown CF  : true";
444   if (getPersonalityFunction())
445     OS << "\n  Personality : " << getPersonalityFunction()->getName();
446   if (IsFragment)
447     OS << "\n  IsFragment  : true";
448   if (isFolded())
449     OS << "\n  FoldedInto  : " << *getFoldedIntoFunction();
450   for (BinaryFunction *ParentFragment : ParentFragments)
451     OS << "\n  Parent      : " << *ParentFragment;
452   if (!Fragments.empty()) {
453     OS << "\n  Fragments   : ";
454     ListSeparator LS;
455     for (BinaryFunction *Frag : Fragments)
456       OS << LS << *Frag;
457   }
458   if (hasCFG())
459     OS << "\n  Hash        : " << Twine::utohexstr(computeHash());
460   if (isMultiEntry()) {
461     OS << "\n  Secondary Entry Points : ";
462     ListSeparator LS;
463     for (const auto &KV : SecondaryEntryPoints)
464       OS << LS << KV.second->getName();
465   }
466   if (FrameInstructions.size())
467     OS << "\n  CFI Instrs  : " << FrameInstructions.size();
468   if (!Layout.block_empty()) {
469     OS << "\n  BB Layout   : ";
470     ListSeparator LS;
471     for (const BinaryBasicBlock *BB : Layout.blocks())
472       OS << LS << BB->getName();
473   }
474   if (getImageAddress())
475     OS << "\n  Image       : 0x" << Twine::utohexstr(getImageAddress());
476   if (ExecutionCount != COUNT_NO_PROFILE) {
477     OS << "\n  Exec Count  : " << ExecutionCount;
478     OS << "\n  Branch Count: " << RawBranchCount;
479     OS << "\n  Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
480   }
481 
482   if (opts::PrintDynoStats && !getLayout().block_empty()) {
483     OS << '\n';
484     DynoStats dynoStats = getDynoStats(*this);
485     OS << dynoStats;
486   }
487 
488   OS << "\n}\n";
489 
490   if (opts::PrintDynoStatsOnly || !BC.InstPrinter)
491     return;
492 
493   // Offset of the instruction in function.
494   uint64_t Offset = 0;
495 
496   if (BasicBlocks.empty() && !Instructions.empty()) {
497     // Print before CFG was built.
498     for (const std::pair<const uint32_t, MCInst> &II : Instructions) {
499       Offset = II.first;
500 
501       // Print label if exists at this offset.
502       auto LI = Labels.find(Offset);
503       if (LI != Labels.end()) {
504         if (const MCSymbol *EntrySymbol =
505                 getSecondaryEntryPointSymbol(LI->second))
506           OS << EntrySymbol->getName() << " (Entry Point):\n";
507         OS << LI->second->getName() << ":\n";
508       }
509 
510       BC.printInstruction(OS, II.second, Offset, this);
511     }
512   }
513 
514   StringRef SplitPointMsg = "";
515   for (const FunctionFragment &FF : Layout.fragments()) {
516     OS << SplitPointMsg;
517     SplitPointMsg = "-------   HOT-COLD SPLIT POINT   -------\n\n";
518     for (const BinaryBasicBlock *BB : FF) {
519       OS << BB->getName() << " (" << BB->size()
520          << " instructions, align : " << BB->getAlignment() << ")\n";
521 
522       if (opts::PrintOutputAddressRange)
523         OS << formatv("  Output Address Range: [{0:x}, {1:x}) ({2} bytes)\n",
524                       BB->getOutputAddressRange().first,
525                       BB->getOutputAddressRange().second, BB->getOutputSize());
526 
527       if (isEntryPoint(*BB)) {
528         if (MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB))
529           OS << "  Secondary Entry Point: " << EntrySymbol->getName() << '\n';
530         else
531           OS << "  Entry Point\n";
532       }
533 
534       if (BB->isLandingPad())
535         OS << "  Landing Pad\n";
536 
537       uint64_t BBExecCount = BB->getExecutionCount();
538       if (hasValidProfile()) {
539         OS << "  Exec Count : ";
540         if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE)
541           OS << BBExecCount << '\n';
542         else
543           OS << "<unknown>\n";
544       }
545       if (BB->getCFIState() >= 0)
546         OS << "  CFI State : " << BB->getCFIState() << '\n';
547       if (opts::EnableBAT) {
548         OS << "  Input offset: 0x" << Twine::utohexstr(BB->getInputOffset())
549            << "\n";
550       }
551       if (!BB->pred_empty()) {
552         OS << "  Predecessors: ";
553         ListSeparator LS;
554         for (BinaryBasicBlock *Pred : BB->predecessors())
555           OS << LS << Pred->getName();
556         OS << '\n';
557       }
558       if (!BB->throw_empty()) {
559         OS << "  Throwers: ";
560         ListSeparator LS;
561         for (BinaryBasicBlock *Throw : BB->throwers())
562           OS << LS << Throw->getName();
563         OS << '\n';
564       }
565 
566       Offset = alignTo(Offset, BB->getAlignment());
567 
568       // Note: offsets are imprecise since this is happening prior to
569       // relaxation.
570       Offset = BC.printInstructions(OS, BB->begin(), BB->end(), Offset, this);
571 
572       if (!BB->succ_empty()) {
573         OS << "  Successors: ";
574         // For more than 2 successors, sort them based on frequency.
575         std::vector<uint64_t> Indices(BB->succ_size());
576         std::iota(Indices.begin(), Indices.end(), 0);
577         if (BB->succ_size() > 2 && BB->getKnownExecutionCount()) {
578           llvm::stable_sort(Indices, [&](const uint64_t A, const uint64_t B) {
579             return BB->BranchInfo[B] < BB->BranchInfo[A];
580           });
581         }
582         ListSeparator LS;
583         for (unsigned I = 0; I < Indices.size(); ++I) {
584           BinaryBasicBlock *Succ = BB->Successors[Indices[I]];
585           const BinaryBasicBlock::BinaryBranchInfo &BI =
586               BB->BranchInfo[Indices[I]];
587           OS << LS << Succ->getName();
588           if (ExecutionCount != COUNT_NO_PROFILE &&
589               BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
590             OS << " (mispreds: " << BI.MispredictedCount
591                << ", count: " << BI.Count << ")";
592           } else if (ExecutionCount != COUNT_NO_PROFILE &&
593                      BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
594             OS << " (inferred count: " << BI.Count << ")";
595           }
596         }
597         OS << '\n';
598       }
599 
600       if (!BB->lp_empty()) {
601         OS << "  Landing Pads: ";
602         ListSeparator LS;
603         for (BinaryBasicBlock *LP : BB->landing_pads()) {
604           OS << LS << LP->getName();
605           if (ExecutionCount != COUNT_NO_PROFILE) {
606             OS << " (count: " << LP->getExecutionCount() << ")";
607           }
608         }
609         OS << '\n';
610       }
611 
612       // In CFG_Finalized state we can miscalculate CFI state at exit.
613       if (CurrentState == State::CFG) {
614         const int32_t CFIStateAtExit = BB->getCFIStateAtExit();
615         if (CFIStateAtExit >= 0)
616           OS << "  CFI State: " << CFIStateAtExit << '\n';
617       }
618 
619       OS << '\n';
620     }
621   }
622 
623   // Dump new exception ranges for the function.
624   if (!CallSites.empty()) {
625     OS << "EH table:\n";
626     for (const FunctionFragment &FF : getLayout().fragments()) {
627       for (const auto &FCSI : getCallSites(FF.getFragmentNum())) {
628         const CallSite &CSI = FCSI.second;
629         OS << "  [" << *CSI.Start << ", " << *CSI.End << ") landing pad : ";
630         if (CSI.LP)
631           OS << *CSI.LP;
632         else
633           OS << "0";
634         OS << ", action : " << CSI.Action << '\n';
635       }
636     }
637     OS << '\n';
638   }
639 
640   // Print all jump tables.
641   for (const std::pair<const uint64_t, JumpTable *> &JTI : JumpTables)
642     JTI.second->print(OS);
643 
644   OS << "DWARF CFI Instructions:\n";
645   if (OffsetToCFI.size()) {
646     // Pre-buildCFG information
647     for (const std::pair<const uint32_t, uint32_t> &Elmt : OffsetToCFI) {
648       OS << format("    %08x:\t", Elmt.first);
649       assert(Elmt.second < FrameInstructions.size() && "Incorrect CFI offset");
650       BinaryContext::printCFI(OS, FrameInstructions[Elmt.second]);
651       OS << "\n";
652     }
653   } else {
654     // Post-buildCFG information
655     for (uint32_t I = 0, E = FrameInstructions.size(); I != E; ++I) {
656       const MCCFIInstruction &CFI = FrameInstructions[I];
657       OS << format("    %d:\t", I);
658       BinaryContext::printCFI(OS, CFI);
659       OS << "\n";
660     }
661   }
662   if (FrameInstructions.empty())
663     OS << "    <empty>\n";
664 
665   OS << "End of Function \"" << *this << "\"\n\n";
666 }
667 
668 void BinaryFunction::printRelocations(raw_ostream &OS, uint64_t Offset,
669                                       uint64_t Size) const {
670   const char *Sep = " # Relocs: ";
671 
672   auto RI = Relocations.lower_bound(Offset);
673   while (RI != Relocations.end() && RI->first < Offset + Size) {
674     OS << Sep << "(R: " << RI->second << ")";
675     Sep = ", ";
676     ++RI;
677   }
678 }
679 
680 static std::string mutateDWARFExpressionTargetReg(const MCCFIInstruction &Instr,
681                                                   MCPhysReg NewReg) {
682   StringRef ExprBytes = Instr.getValues();
683   assert(ExprBytes.size() > 1 && "DWARF expression CFI is too short");
684   uint8_t Opcode = ExprBytes[0];
685   assert((Opcode == dwarf::DW_CFA_expression ||
686           Opcode == dwarf::DW_CFA_val_expression) &&
687          "invalid DWARF expression CFI");
688   (void)Opcode;
689   const uint8_t *const Start =
690       reinterpret_cast<const uint8_t *>(ExprBytes.drop_front(1).data());
691   const uint8_t *const End =
692       reinterpret_cast<const uint8_t *>(Start + ExprBytes.size() - 1);
693   unsigned Size = 0;
694   decodeULEB128(Start, &Size, End);
695   assert(Size > 0 && "Invalid reg encoding for DWARF expression CFI");
696   SmallString<8> Tmp;
697   raw_svector_ostream OSE(Tmp);
698   encodeULEB128(NewReg, OSE);
699   return Twine(ExprBytes.slice(0, 1))
700       .concat(OSE.str())
701       .concat(ExprBytes.drop_front(1 + Size))
702       .str();
703 }
704 
705 void BinaryFunction::mutateCFIRegisterFor(const MCInst &Instr,
706                                           MCPhysReg NewReg) {
707   const MCCFIInstruction *OldCFI = getCFIFor(Instr);
708   assert(OldCFI && "invalid CFI instr");
709   switch (OldCFI->getOperation()) {
710   default:
711     llvm_unreachable("Unexpected instruction");
712   case MCCFIInstruction::OpDefCfa:
713     setCFIFor(Instr, MCCFIInstruction::cfiDefCfa(nullptr, NewReg,
714                                                  OldCFI->getOffset()));
715     break;
716   case MCCFIInstruction::OpDefCfaRegister:
717     setCFIFor(Instr, MCCFIInstruction::createDefCfaRegister(nullptr, NewReg));
718     break;
719   case MCCFIInstruction::OpOffset:
720     setCFIFor(Instr, MCCFIInstruction::createOffset(nullptr, NewReg,
721                                                     OldCFI->getOffset()));
722     break;
723   case MCCFIInstruction::OpRegister:
724     setCFIFor(Instr, MCCFIInstruction::createRegister(nullptr, NewReg,
725                                                       OldCFI->getRegister2()));
726     break;
727   case MCCFIInstruction::OpSameValue:
728     setCFIFor(Instr, MCCFIInstruction::createSameValue(nullptr, NewReg));
729     break;
730   case MCCFIInstruction::OpEscape:
731     setCFIFor(Instr,
732               MCCFIInstruction::createEscape(
733                   nullptr,
734                   StringRef(mutateDWARFExpressionTargetReg(*OldCFI, NewReg))));
735     break;
736   case MCCFIInstruction::OpRestore:
737     setCFIFor(Instr, MCCFIInstruction::createRestore(nullptr, NewReg));
738     break;
739   case MCCFIInstruction::OpUndefined:
740     setCFIFor(Instr, MCCFIInstruction::createUndefined(nullptr, NewReg));
741     break;
742   }
743 }
744 
745 const MCCFIInstruction *BinaryFunction::mutateCFIOffsetFor(const MCInst &Instr,
746                                                            int64_t NewOffset) {
747   const MCCFIInstruction *OldCFI = getCFIFor(Instr);
748   assert(OldCFI && "invalid CFI instr");
749   switch (OldCFI->getOperation()) {
750   default:
751     llvm_unreachable("Unexpected instruction");
752   case MCCFIInstruction::OpDefCfaOffset:
753     setCFIFor(Instr, MCCFIInstruction::cfiDefCfaOffset(nullptr, NewOffset));
754     break;
755   case MCCFIInstruction::OpAdjustCfaOffset:
756     setCFIFor(Instr,
757               MCCFIInstruction::createAdjustCfaOffset(nullptr, NewOffset));
758     break;
759   case MCCFIInstruction::OpDefCfa:
760     setCFIFor(Instr, MCCFIInstruction::cfiDefCfa(nullptr, OldCFI->getRegister(),
761                                                  NewOffset));
762     break;
763   case MCCFIInstruction::OpOffset:
764     setCFIFor(Instr, MCCFIInstruction::createOffset(
765                          nullptr, OldCFI->getRegister(), NewOffset));
766     break;
767   }
768   return getCFIFor(Instr);
769 }
770 
771 IndirectBranchType
772 BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
773                                       uint64_t Offset,
774                                       uint64_t &TargetAddress) {
775   const unsigned PtrSize = BC.AsmInfo->getCodePointerSize();
776 
777   // The instruction referencing memory used by the branch instruction.
778   // It could be the branch instruction itself or one of the instructions
779   // setting the value of the register used by the branch.
780   MCInst *MemLocInstr;
781 
782   // Address of the table referenced by MemLocInstr. Could be either an
783   // array of function pointers, or a jump table.
784   uint64_t ArrayStart = 0;
785 
786   unsigned BaseRegNum, IndexRegNum;
787   int64_t DispValue;
788   const MCExpr *DispExpr;
789 
790   // In AArch, identify the instruction adding the PC-relative offset to
791   // jump table entries to correctly decode it.
792   MCInst *PCRelBaseInstr;
793   uint64_t PCRelAddr = 0;
794 
795   auto Begin = Instructions.begin();
796   if (BC.isAArch64()) {
797     PreserveNops = BC.HasRelocations;
798     // Start at the last label as an approximation of the current basic block.
799     // This is a heuristic, since the full set of labels have yet to be
800     // determined
801     for (const uint32_t Offset :
802          llvm::make_first_range(llvm::reverse(Labels))) {
803       auto II = Instructions.find(Offset);
804       if (II != Instructions.end()) {
805         Begin = II;
806         break;
807       }
808     }
809   }
810 
811   IndirectBranchType BranchType = BC.MIB->analyzeIndirectBranch(
812       Instruction, Begin, Instructions.end(), PtrSize, MemLocInstr, BaseRegNum,
813       IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
814 
815   if (BranchType == IndirectBranchType::UNKNOWN && !MemLocInstr)
816     return BranchType;
817 
818   if (MemLocInstr != &Instruction)
819     IndexRegNum = BC.MIB->getNoRegister();
820 
821   if (BC.isAArch64()) {
822     const MCSymbol *Sym = BC.MIB->getTargetSymbol(*PCRelBaseInstr, 1);
823     assert(Sym && "Symbol extraction failed");
824     ErrorOr<uint64_t> SymValueOrError = BC.getSymbolValue(*Sym);
825     if (SymValueOrError) {
826       PCRelAddr = *SymValueOrError;
827     } else {
828       for (std::pair<const uint32_t, MCSymbol *> &Elmt : Labels) {
829         if (Elmt.second == Sym) {
830           PCRelAddr = Elmt.first + getAddress();
831           break;
832         }
833       }
834     }
835     uint64_t InstrAddr = 0;
836     for (auto II = Instructions.rbegin(); II != Instructions.rend(); ++II) {
837       if (&II->second == PCRelBaseInstr) {
838         InstrAddr = II->first + getAddress();
839         break;
840       }
841     }
842     assert(InstrAddr != 0 && "instruction not found");
843     // We do this to avoid spurious references to code locations outside this
844     // function (for example, if the indirect jump lives in the last basic
845     // block of the function, it will create a reference to the next function).
846     // This replaces a symbol reference with an immediate.
847     BC.MIB->replaceMemOperandDisp(*PCRelBaseInstr,
848                                   MCOperand::createImm(PCRelAddr - InstrAddr));
849     // FIXME: Disable full jump table processing for AArch64 until we have a
850     // proper way of determining the jump table limits.
851     return IndirectBranchType::UNKNOWN;
852   }
853 
854   auto getExprValue = [&](const MCExpr *Expr) {
855     const MCSymbol *TargetSym;
856     uint64_t TargetOffset;
857     std::tie(TargetSym, TargetOffset) = BC.MIB->getTargetSymbolInfo(Expr);
858     ErrorOr<uint64_t> SymValueOrError = BC.getSymbolValue(*TargetSym);
859     assert(SymValueOrError && "Global symbol needs a value");
860     return *SymValueOrError + TargetOffset;
861   };
862 
863   // RIP-relative addressing should be converted to symbol form by now
864   // in processed instructions (but not in jump).
865   if (DispExpr) {
866     ArrayStart = getExprValue(DispExpr);
867     BaseRegNum = BC.MIB->getNoRegister();
868     if (BC.isAArch64()) {
869       ArrayStart &= ~0xFFFULL;
870       ArrayStart += DispValue & 0xFFFULL;
871     }
872   } else {
873     ArrayStart = static_cast<uint64_t>(DispValue);
874   }
875 
876   if (BaseRegNum == BC.MRI->getProgramCounter())
877     ArrayStart += getAddress() + Offset + Size;
878 
879   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addressed memory is 0x"
880                     << Twine::utohexstr(ArrayStart) << '\n');
881 
882   ErrorOr<BinarySection &> Section = BC.getSectionForAddress(ArrayStart);
883   if (!Section) {
884     // No section - possibly an absolute address. Since we don't allow
885     // internal function addresses to escape the function scope - we
886     // consider it a tail call.
887     if (opts::Verbosity >= 1) {
888       BC.errs() << "BOLT-WARNING: no section for address 0x"
889                 << Twine::utohexstr(ArrayStart) << " referenced from function "
890                 << *this << '\n';
891     }
892     return IndirectBranchType::POSSIBLE_TAIL_CALL;
893   }
894   if (Section->isVirtual()) {
895     // The contents are filled at runtime.
896     return IndirectBranchType::POSSIBLE_TAIL_CALL;
897   }
898 
899   if (BranchType == IndirectBranchType::POSSIBLE_FIXED_BRANCH) {
900     ErrorOr<uint64_t> Value = BC.getPointerAtAddress(ArrayStart);
901     if (!Value)
902       return IndirectBranchType::UNKNOWN;
903 
904     if (BC.getSectionForAddress(ArrayStart)->isWritable())
905       return IndirectBranchType::UNKNOWN;
906 
907     BC.outs() << "BOLT-INFO: fixed indirect branch detected in " << *this
908               << " at 0x" << Twine::utohexstr(getAddress() + Offset)
909               << " referencing data at 0x" << Twine::utohexstr(ArrayStart)
910               << " the destination value is 0x" << Twine::utohexstr(*Value)
911               << '\n';
912 
913     TargetAddress = *Value;
914     return BranchType;
915   }
916 
917   // Check if there's already a jump table registered at this address.
918   MemoryContentsType MemType;
919   if (JumpTable *JT = BC.getJumpTableContainingAddress(ArrayStart)) {
920     switch (JT->Type) {
921     case JumpTable::JTT_NORMAL:
922       MemType = MemoryContentsType::POSSIBLE_JUMP_TABLE;
923       break;
924     case JumpTable::JTT_PIC:
925       MemType = MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
926       break;
927     }
928   } else {
929     MemType = BC.analyzeMemoryAt(ArrayStart, *this);
930   }
931 
932   // Check that jump table type in instruction pattern matches memory contents.
933   JumpTable::JumpTableType JTType;
934   if (BranchType == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
935     if (MemType != MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
936       return IndirectBranchType::UNKNOWN;
937     JTType = JumpTable::JTT_PIC;
938   } else {
939     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
940       return IndirectBranchType::UNKNOWN;
941 
942     if (MemType == MemoryContentsType::UNKNOWN)
943       return IndirectBranchType::POSSIBLE_TAIL_CALL;
944 
945     BranchType = IndirectBranchType::POSSIBLE_JUMP_TABLE;
946     JTType = JumpTable::JTT_NORMAL;
947   }
948 
949   // Convert the instruction into jump table branch.
950   const MCSymbol *JTLabel = BC.getOrCreateJumpTable(*this, ArrayStart, JTType);
951   BC.MIB->replaceMemOperandDisp(*MemLocInstr, JTLabel, BC.Ctx.get());
952   BC.MIB->setJumpTable(Instruction, ArrayStart, IndexRegNum);
953 
954   JTSites.emplace_back(Offset, ArrayStart);
955 
956   return BranchType;
957 }
958 
959 MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address,
960                                                 bool CreatePastEnd) {
961   const uint64_t Offset = Address - getAddress();
962 
963   if ((Offset == getSize()) && CreatePastEnd)
964     return getFunctionEndLabel();
965 
966   auto LI = Labels.find(Offset);
967   if (LI != Labels.end())
968     return LI->second;
969 
970   // For AArch64, check if this address is part of a constant island.
971   if (BC.isAArch64()) {
972     if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address))
973       return IslandSym;
974   }
975 
976   MCSymbol *Label = BC.Ctx->createNamedTempSymbol();
977   Labels[Offset] = Label;
978 
979   return Label;
980 }
981 
982 ErrorOr<ArrayRef<uint8_t>> BinaryFunction::getData() const {
983   BinarySection &Section = *getOriginSection();
984   assert(Section.containsRange(getAddress(), getMaxSize()) &&
985          "wrong section for function");
986 
987   if (!Section.isText() || Section.isVirtual() || !Section.getSize())
988     return std::make_error_code(std::errc::bad_address);
989 
990   StringRef SectionContents = Section.getContents();
991 
992   assert(SectionContents.size() == Section.getSize() &&
993          "section size mismatch");
994 
995   // Function offset from the section start.
996   uint64_t Offset = getAddress() - Section.getAddress();
997   auto *Bytes = reinterpret_cast<const uint8_t *>(SectionContents.data());
998   return ArrayRef<uint8_t>(Bytes + Offset, getMaxSize());
999 }
1000 
1001 size_t BinaryFunction::getSizeOfDataInCodeAt(uint64_t Offset) const {
1002   if (!Islands)
1003     return 0;
1004 
1005   if (!llvm::is_contained(Islands->DataOffsets, Offset))
1006     return 0;
1007 
1008   auto Iter = Islands->CodeOffsets.upper_bound(Offset);
1009   if (Iter != Islands->CodeOffsets.end())
1010     return *Iter - Offset;
1011   return getSize() - Offset;
1012 }
1013 
1014 bool BinaryFunction::isZeroPaddingAt(uint64_t Offset) const {
1015   ArrayRef<uint8_t> FunctionData = *getData();
1016   uint64_t EndOfCode = getSize();
1017   if (Islands) {
1018     auto Iter = Islands->DataOffsets.upper_bound(Offset);
1019     if (Iter != Islands->DataOffsets.end())
1020       EndOfCode = *Iter;
1021   }
1022   for (uint64_t I = Offset; I < EndOfCode; ++I)
1023     if (FunctionData[I] != 0)
1024       return false;
1025 
1026   return true;
1027 }
1028 
1029 Error BinaryFunction::handlePCRelOperand(MCInst &Instruction, uint64_t Address,
1030                                          uint64_t Size) {
1031   auto &MIB = BC.MIB;
1032   uint64_t TargetAddress = 0;
1033   if (!MIB->evaluateMemOperandTarget(Instruction, TargetAddress, Address,
1034                                      Size)) {
1035     std::string Msg;
1036     raw_string_ostream SS(Msg);
1037     SS << "BOLT-ERROR: PC-relative operand can't be evaluated:\n";
1038     BC.InstPrinter->printInst(&Instruction, 0, "", *BC.STI, SS);
1039     SS << '\n';
1040     Instruction.dump_pretty(SS, BC.InstPrinter.get());
1041     SS << '\n';
1042     SS << "BOLT-ERROR: cannot handle PC-relative operand at 0x"
1043        << Twine::utohexstr(Address) << ". Skipping function " << *this << ".\n";
1044     if (BC.HasRelocations)
1045       return createFatalBOLTError(Msg);
1046     IsSimple = false;
1047     return createNonFatalBOLTError(Msg);
1048   }
1049   if (TargetAddress == 0 && opts::Verbosity >= 1) {
1050     BC.outs() << "BOLT-INFO: PC-relative operand is zero in function " << *this
1051               << '\n';
1052   }
1053 
1054   const MCSymbol *TargetSymbol;
1055   uint64_t TargetOffset;
1056   std::tie(TargetSymbol, TargetOffset) =
1057       BC.handleAddressRef(TargetAddress, *this, /*IsPCRel*/ true);
1058 
1059   bool ReplaceSuccess = MIB->replaceMemOperandDisp(
1060       Instruction, TargetSymbol, static_cast<int64_t>(TargetOffset), &*BC.Ctx);
1061   (void)ReplaceSuccess;
1062   assert(ReplaceSuccess && "Failed to replace mem operand with symbol+off.");
1063   return Error::success();
1064 }
1065 
1066 MCSymbol *BinaryFunction::handleExternalReference(MCInst &Instruction,
1067                                                   uint64_t Size,
1068                                                   uint64_t Offset,
1069                                                   uint64_t TargetAddress,
1070                                                   bool &IsCall) {
1071   auto &MIB = BC.MIB;
1072 
1073   const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1074   BC.addInterproceduralReference(this, TargetAddress);
1075   if (opts::Verbosity >= 2 && !IsCall && Size == 2 && !BC.HasRelocations) {
1076     BC.errs() << "BOLT-WARNING: relaxed tail call detected at 0x"
1077               << Twine::utohexstr(AbsoluteInstrAddr) << " in function " << *this
1078               << ". Code size will be increased.\n";
1079   }
1080 
1081   assert(!MIB->isTailCall(Instruction) &&
1082          "synthetic tail call instruction found");
1083 
1084   // This is a call regardless of the opcode.
1085   // Assign proper opcode for tail calls, so that they could be
1086   // treated as calls.
1087   if (!IsCall) {
1088     if (!MIB->convertJmpToTailCall(Instruction)) {
1089       assert(MIB->isConditionalBranch(Instruction) &&
1090              "unknown tail call instruction");
1091       if (opts::Verbosity >= 2) {
1092         BC.errs() << "BOLT-WARNING: conditional tail call detected in "
1093                   << "function " << *this << " at 0x"
1094                   << Twine::utohexstr(AbsoluteInstrAddr) << ".\n";
1095       }
1096     }
1097     IsCall = true;
1098   }
1099 
1100   if (opts::Verbosity >= 2 && TargetAddress == 0) {
1101     // We actually see calls to address 0 in presence of weak
1102     // symbols originating from libraries. This code is never meant
1103     // to be executed.
1104     BC.outs() << "BOLT-INFO: Function " << *this
1105               << " has a call to address zero.\n";
1106   }
1107 
1108   return BC.getOrCreateGlobalSymbol(TargetAddress, "FUNCat");
1109 }
1110 
1111 void BinaryFunction::handleIndirectBranch(MCInst &Instruction, uint64_t Size,
1112                                           uint64_t Offset) {
1113   auto &MIB = BC.MIB;
1114   uint64_t IndirectTarget = 0;
1115   IndirectBranchType Result =
1116       processIndirectBranch(Instruction, Size, Offset, IndirectTarget);
1117   switch (Result) {
1118   default:
1119     llvm_unreachable("unexpected result");
1120   case IndirectBranchType::POSSIBLE_TAIL_CALL: {
1121     bool Result = MIB->convertJmpToTailCall(Instruction);
1122     (void)Result;
1123     assert(Result);
1124     break;
1125   }
1126   case IndirectBranchType::POSSIBLE_JUMP_TABLE:
1127   case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE:
1128     if (opts::JumpTables == JTS_NONE)
1129       IsSimple = false;
1130     break;
1131   case IndirectBranchType::POSSIBLE_FIXED_BRANCH: {
1132     if (containsAddress(IndirectTarget)) {
1133       const MCSymbol *TargetSymbol = getOrCreateLocalLabel(IndirectTarget);
1134       Instruction.clear();
1135       MIB->createUncondBranch(Instruction, TargetSymbol, BC.Ctx.get());
1136       TakenBranches.emplace_back(Offset, IndirectTarget - getAddress());
1137       addEntryPointAtOffset(IndirectTarget - getAddress());
1138     } else {
1139       MIB->convertJmpToTailCall(Instruction);
1140       BC.addInterproceduralReference(this, IndirectTarget);
1141     }
1142     break;
1143   }
1144   case IndirectBranchType::UNKNOWN:
1145     // Keep processing. We'll do more checks and fixes in
1146     // postProcessIndirectBranches().
1147     UnknownIndirectBranchOffsets.emplace(Offset);
1148     break;
1149   }
1150 }
1151 
1152 void BinaryFunction::handleAArch64IndirectCall(MCInst &Instruction,
1153                                                const uint64_t Offset) {
1154   auto &MIB = BC.MIB;
1155   const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1156   MCInst *TargetHiBits, *TargetLowBits;
1157   uint64_t TargetAddress, Count;
1158   Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1159                                  AbsoluteInstrAddr, Instruction, TargetHiBits,
1160                                  TargetLowBits, TargetAddress);
1161   if (Count) {
1162     MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1163     --Count;
1164     for (auto It = std::prev(Instructions.end()); Count != 0;
1165          It = std::prev(It), --Count) {
1166       MIB->addAnnotation(It->second, "AArch64Veneer", true);
1167     }
1168 
1169     BC.addAdrpAddRelocAArch64(*this, *TargetLowBits, *TargetHiBits,
1170                               TargetAddress);
1171   }
1172 }
1173 
1174 std::optional<MCInst>
1175 BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const {
1176   assert(CurrentState == State::Empty && "Function should not be disassembled");
1177   assert(Offset < MaxSize && "Invalid offset");
1178   ErrorOr<ArrayRef<unsigned char>> FunctionData = getData();
1179   assert(FunctionData && "Cannot get function as data");
1180   MCInst Instr;
1181   uint64_t InstrSize = 0;
1182   const uint64_t InstrAddress = getAddress() + Offset;
1183   if (BC.DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
1184                                 InstrAddress, nulls()))
1185     return Instr;
1186   return std::nullopt;
1187 }
1188 
1189 Error BinaryFunction::disassemble() {
1190   NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
1191                      "Build Binary Functions", opts::TimeBuild);
1192   ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
1193   assert(ErrorOrFunctionData && "function data is not available");
1194   ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
1195   assert(FunctionData.size() == getMaxSize() &&
1196          "function size does not match raw data size");
1197 
1198   auto &Ctx = BC.Ctx;
1199   auto &MIB = BC.MIB;
1200 
1201   BC.SymbolicDisAsm->setSymbolizer(MIB->createTargetSymbolizer(*this));
1202 
1203   // Insert a label at the beginning of the function. This will be our first
1204   // basic block.
1205   Labels[0] = Ctx->createNamedTempSymbol("BB0");
1206 
1207   // Map offsets in the function to a label that should always point to the
1208   // corresponding instruction. This is used for labels that shouldn't point to
1209   // the start of a basic block but always to a specific instruction. This is
1210   // used, for example, on RISC-V where %pcrel_lo relocations point to the
1211   // corresponding %pcrel_hi.
1212   LabelsMapType InstructionLabels;
1213 
1214   uint64_t Size = 0; // instruction size
1215   for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
1216     MCInst Instruction;
1217     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1218 
1219     // Check for data inside code and ignore it
1220     if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
1221       Size = DataInCodeSize;
1222       continue;
1223     }
1224 
1225     if (!BC.SymbolicDisAsm->getInstruction(Instruction, Size,
1226                                            FunctionData.slice(Offset),
1227                                            AbsoluteInstrAddr, nulls())) {
1228       // Functions with "soft" boundaries, e.g. coming from assembly source,
1229       // can have 0-byte padding at the end.
1230       if (isZeroPaddingAt(Offset))
1231         break;
1232 
1233       BC.errs()
1234           << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
1235           << Twine::utohexstr(Offset) << " (address 0x"
1236           << Twine::utohexstr(AbsoluteInstrAddr) << ") in function " << *this
1237           << '\n';
1238       // Some AVX-512 instructions could not be disassembled at all.
1239       if (BC.HasRelocations && opts::TrapOnAVX512 && BC.isX86()) {
1240         setTrapOnEntry();
1241         BC.TrappedFunctions.push_back(this);
1242       } else {
1243         setIgnored();
1244       }
1245 
1246       break;
1247     }
1248 
1249     // Check integrity of LLVM assembler/disassembler.
1250     if (opts::CheckEncoding && !BC.MIB->isBranch(Instruction) &&
1251         !BC.MIB->isCall(Instruction) && !BC.MIB->isNoop(Instruction)) {
1252       if (!BC.validateInstructionEncoding(FunctionData.slice(Offset, Size))) {
1253         BC.errs() << "BOLT-WARNING: mismatching LLVM encoding detected in "
1254                   << "function " << *this << " for instruction :\n";
1255         BC.printInstruction(BC.errs(), Instruction, AbsoluteInstrAddr);
1256         BC.errs() << '\n';
1257       }
1258     }
1259 
1260     // Special handling for AVX-512 instructions.
1261     if (MIB->hasEVEXEncoding(Instruction)) {
1262       if (BC.HasRelocations && opts::TrapOnAVX512) {
1263         setTrapOnEntry();
1264         BC.TrappedFunctions.push_back(this);
1265         break;
1266       }
1267 
1268       if (!BC.validateInstructionEncoding(FunctionData.slice(Offset, Size))) {
1269         BC.errs() << "BOLT-WARNING: internal assembler/disassembler error "
1270                      "detected for AVX512 instruction:\n";
1271         BC.printInstruction(BC.errs(), Instruction, AbsoluteInstrAddr);
1272         BC.errs() << " in function " << *this << '\n';
1273         setIgnored();
1274         break;
1275       }
1276     }
1277 
1278     if (MIB->isBranch(Instruction) || MIB->isCall(Instruction)) {
1279       uint64_t TargetAddress = 0;
1280       if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
1281                               TargetAddress)) {
1282         // Check if the target is within the same function. Otherwise it's
1283         // a call, possibly a tail call.
1284         //
1285         // If the target *is* the function address it could be either a branch
1286         // or a recursive call.
1287         bool IsCall = MIB->isCall(Instruction);
1288         const bool IsCondBranch = MIB->isConditionalBranch(Instruction);
1289         MCSymbol *TargetSymbol = nullptr;
1290 
1291         if (!BC.MIB->isReversibleBranch(Instruction)) {
1292           setIgnored();
1293           if (BinaryFunction *TargetFunc =
1294                   BC.getBinaryFunctionContainingAddress(TargetAddress))
1295             TargetFunc->setIgnored();
1296         }
1297 
1298         if (IsCall && containsAddress(TargetAddress)) {
1299           if (TargetAddress == getAddress()) {
1300             // Recursive call.
1301             TargetSymbol = getSymbol();
1302           } else {
1303             if (BC.isX86()) {
1304               // Dangerous old-style x86 PIC code. We may need to freeze this
1305               // function, so preserve the function as is for now.
1306               PreserveNops = true;
1307             } else {
1308               BC.errs() << "BOLT-WARNING: internal call detected at 0x"
1309                         << Twine::utohexstr(AbsoluteInstrAddr)
1310                         << " in function " << *this << ". Skipping.\n";
1311               IsSimple = false;
1312             }
1313           }
1314         }
1315 
1316         if (!TargetSymbol) {
1317           // Create either local label or external symbol.
1318           if (containsAddress(TargetAddress)) {
1319             TargetSymbol = getOrCreateLocalLabel(TargetAddress);
1320           } else {
1321             if (TargetAddress == getAddress() + getSize() &&
1322                 TargetAddress < getAddress() + getMaxSize() &&
1323                 !(BC.isAArch64() &&
1324                   BC.handleAArch64Veneer(TargetAddress, /*MatchOnly*/ true))) {
1325               // Result of __builtin_unreachable().
1326               LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump past end detected at 0x"
1327                                 << Twine::utohexstr(AbsoluteInstrAddr)
1328                                 << " in function " << *this
1329                                 << " : replacing with nop.\n");
1330               BC.MIB->createNoop(Instruction);
1331               if (IsCondBranch) {
1332                 // Register branch offset for profile validation.
1333                 IgnoredBranches.emplace_back(Offset, Offset + Size);
1334               }
1335               goto add_instruction;
1336             }
1337             // May update Instruction and IsCall
1338             TargetSymbol = handleExternalReference(Instruction, Size, Offset,
1339                                                    TargetAddress, IsCall);
1340           }
1341         }
1342 
1343         if (!IsCall) {
1344           // Add taken branch info.
1345           TakenBranches.emplace_back(Offset, TargetAddress - getAddress());
1346         }
1347         BC.MIB->replaceBranchTarget(Instruction, TargetSymbol, &*Ctx);
1348 
1349         // Mark CTC.
1350         if (IsCondBranch && IsCall)
1351           MIB->setConditionalTailCall(Instruction, TargetAddress);
1352       } else {
1353         // Could not evaluate branch. Should be an indirect call or an
1354         // indirect branch. Bail out on the latter case.
1355         if (MIB->isIndirectBranch(Instruction))
1356           handleIndirectBranch(Instruction, Size, Offset);
1357         // Indirect call. We only need to fix it if the operand is RIP-relative.
1358         if (IsSimple && MIB->hasPCRelOperand(Instruction)) {
1359           if (auto NewE = handleErrors(
1360                   handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size),
1361                   [&](const BOLTError &E) -> Error {
1362                     if (E.isFatal())
1363                       return Error(std::make_unique<BOLTError>(std::move(E)));
1364                     if (!E.getMessage().empty())
1365                       E.log(BC.errs());
1366                     return Error::success();
1367                   })) {
1368             return Error(std::move(NewE));
1369           }
1370         }
1371 
1372         if (BC.isAArch64())
1373           handleAArch64IndirectCall(Instruction, Offset);
1374       }
1375     } else if (BC.isAArch64() || BC.isRISCV()) {
1376       // Check if there's a relocation associated with this instruction.
1377       bool UsedReloc = false;
1378       for (auto Itr = Relocations.lower_bound(Offset),
1379                 ItrE = Relocations.lower_bound(Offset + Size);
1380            Itr != ItrE; ++Itr) {
1381         const Relocation &Relocation = Itr->second;
1382         MCSymbol *Symbol = Relocation.Symbol;
1383 
1384         if (Relocation::isInstructionReference(Relocation.Type)) {
1385           uint64_t RefOffset = Relocation.Value - getAddress();
1386           LabelsMapType::iterator LI = InstructionLabels.find(RefOffset);
1387 
1388           if (LI == InstructionLabels.end()) {
1389             Symbol = BC.Ctx->createNamedTempSymbol();
1390             InstructionLabels.emplace(RefOffset, Symbol);
1391           } else {
1392             Symbol = LI->second;
1393           }
1394         }
1395 
1396         int64_t Value = Relocation.Value;
1397         const bool Result = BC.MIB->replaceImmWithSymbolRef(
1398             Instruction, Symbol, Relocation.Addend, Ctx.get(), Value,
1399             Relocation.Type);
1400         (void)Result;
1401         assert(Result && "cannot replace immediate with relocation");
1402 
1403         // For aarch64, if we replaced an immediate with a symbol from a
1404         // relocation, we mark it so we do not try to further process a
1405         // pc-relative operand. All we need is the symbol.
1406         UsedReloc = true;
1407       }
1408 
1409       if (!BC.isRISCV() && MIB->hasPCRelOperand(Instruction) && !UsedReloc) {
1410         if (auto NewE = handleErrors(
1411                 handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size),
1412                 [&](const BOLTError &E) -> Error {
1413                   if (E.isFatal())
1414                     return Error(std::make_unique<BOLTError>(std::move(E)));
1415                   if (!E.getMessage().empty())
1416                     E.log(BC.errs());
1417                   return Error::success();
1418                 }))
1419           return Error(std::move(NewE));
1420       }
1421     }
1422 
1423 add_instruction:
1424     if (getDWARFLineTable()) {
1425       Instruction.setLoc(findDebugLineInformationForInstructionAt(
1426           AbsoluteInstrAddr, getDWARFUnit(), getDWARFLineTable()));
1427     }
1428 
1429     // Record offset of the instruction for profile matching.
1430     if (BC.keepOffsetForInstruction(Instruction))
1431       MIB->setOffset(Instruction, static_cast<uint32_t>(Offset));
1432 
1433     if (BC.isX86() && BC.MIB->isNoop(Instruction)) {
1434       // NOTE: disassembly loses the correct size information for noops on x86.
1435       //       E.g. nopw 0x0(%rax,%rax,1) is 9 bytes, but re-encoded it's only
1436       //       5 bytes. Preserve the size info using annotations.
1437       MIB->setSize(Instruction, Size);
1438     }
1439 
1440     addInstruction(Offset, std::move(Instruction));
1441   }
1442 
1443   for (auto [Offset, Label] : InstructionLabels) {
1444     InstrMapType::iterator II = Instructions.find(Offset);
1445     assert(II != Instructions.end() && "reference to non-existing instruction");
1446 
1447     BC.MIB->setInstLabel(II->second, Label);
1448   }
1449 
1450   // Reset symbolizer for the disassembler.
1451   BC.SymbolicDisAsm->setSymbolizer(nullptr);
1452 
1453   if (uint64_t Offset = getFirstInstructionOffset())
1454     Labels[Offset] = BC.Ctx->createNamedTempSymbol();
1455 
1456   clearList(Relocations);
1457 
1458   if (!IsSimple) {
1459     clearList(Instructions);
1460     return createNonFatalBOLTError("");
1461   }
1462 
1463   updateState(State::Disassembled);
1464 
1465   return Error::success();
1466 }
1467 
1468 MCSymbol *BinaryFunction::registerBranch(uint64_t Src, uint64_t Dst) {
1469   assert(CurrentState == State::Disassembled &&
1470          "Cannot register branch unless function is in disassembled state.");
1471   assert(containsAddress(Src) && containsAddress(Dst) &&
1472          "Cannot register external branch.");
1473   MCSymbol *Target = getOrCreateLocalLabel(Dst);
1474   TakenBranches.emplace_back(Src - getAddress(), Dst - getAddress());
1475   return Target;
1476 }
1477 
1478 bool BinaryFunction::scanExternalRefs() {
1479   bool Success = true;
1480   bool DisassemblyFailed = false;
1481 
1482   // Ignore pseudo functions.
1483   if (isPseudo())
1484     return Success;
1485 
1486   if (opts::NoScan) {
1487     clearList(Relocations);
1488     clearList(ExternallyReferencedOffsets);
1489 
1490     return false;
1491   }
1492 
1493   // List of external references for this function.
1494   std::vector<Relocation> FunctionRelocations;
1495 
1496   static BinaryContext::IndependentCodeEmitter Emitter =
1497       BC.createIndependentMCCodeEmitter();
1498 
1499   ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
1500   assert(ErrorOrFunctionData && "function data is not available");
1501   ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
1502   assert(FunctionData.size() == getMaxSize() &&
1503          "function size does not match raw data size");
1504 
1505   BC.SymbolicDisAsm->setSymbolizer(
1506       BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));
1507 
1508   // Disassemble contents of the function. Detect code entry points and create
1509   // relocations for references to code that will be moved.
1510   uint64_t Size = 0; // instruction size
1511   for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
1512     // Check for data inside code and ignore it
1513     if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
1514       Size = DataInCodeSize;
1515       continue;
1516     }
1517 
1518     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1519     MCInst Instruction;
1520     if (!BC.SymbolicDisAsm->getInstruction(Instruction, Size,
1521                                            FunctionData.slice(Offset),
1522                                            AbsoluteInstrAddr, nulls())) {
1523       if (opts::Verbosity >= 1 && !isZeroPaddingAt(Offset)) {
1524         BC.errs()
1525             << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
1526             << Twine::utohexstr(Offset) << " (address 0x"
1527             << Twine::utohexstr(AbsoluteInstrAddr) << ") in function " << *this
1528             << '\n';
1529       }
1530       Success = false;
1531       DisassemblyFailed = true;
1532       break;
1533     }
1534 
1535     // Return true if we can skip handling the Target function reference.
1536     auto ignoreFunctionRef = [&](const BinaryFunction &Target) {
1537       if (&Target == this)
1538         return true;
1539 
1540       // Note that later we may decide not to emit Target function. In that
1541       // case, we conservatively create references that will be ignored or
1542       // resolved to the same function.
1543       if (!BC.shouldEmit(Target))
1544         return true;
1545 
1546       return false;
1547     };
1548 
1549     // Return true if we can ignore reference to the symbol.
1550     auto ignoreReference = [&](const MCSymbol *TargetSymbol) {
1551       if (!TargetSymbol)
1552         return true;
1553 
1554       if (BC.forceSymbolRelocations(TargetSymbol->getName()))
1555         return false;
1556 
1557       BinaryFunction *TargetFunction = BC.getFunctionForSymbol(TargetSymbol);
1558       if (!TargetFunction)
1559         return true;
1560 
1561       return ignoreFunctionRef(*TargetFunction);
1562     };
1563 
1564     // Handle calls and branches separately as symbolization doesn't work for
1565     // them yet.
1566     MCSymbol *BranchTargetSymbol = nullptr;
1567     if (BC.MIB->isCall(Instruction) || BC.MIB->isBranch(Instruction)) {
1568       uint64_t TargetAddress = 0;
1569       BC.MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
1570                              TargetAddress);
1571 
1572       // Create an entry point at reference address if needed.
1573       BinaryFunction *TargetFunction =
1574           BC.getBinaryFunctionContainingAddress(TargetAddress);
1575 
1576       if (!TargetFunction || ignoreFunctionRef(*TargetFunction))
1577         continue;
1578 
1579       const uint64_t FunctionOffset =
1580           TargetAddress - TargetFunction->getAddress();
1581       BranchTargetSymbol =
1582           FunctionOffset ? TargetFunction->addEntryPointAtOffset(FunctionOffset)
1583                          : TargetFunction->getSymbol();
1584     }
1585 
1586     // Can't find more references. Not creating relocations since we are not
1587     // moving code.
1588     if (!BC.HasRelocations)
1589       continue;
1590 
1591     if (BranchTargetSymbol) {
1592       BC.MIB->replaceBranchTarget(Instruction, BranchTargetSymbol,
1593                                   Emitter.LocalCtx.get());
1594     } else if (!llvm::any_of(Instruction,
1595                              [](const MCOperand &Op) { return Op.isExpr(); })) {
1596       // Skip assembly if the instruction may not have any symbolic operands.
1597       continue;
1598     }
1599 
1600     // Emit the instruction using temp emitter and generate relocations.
1601     SmallString<256> Code;
1602     SmallVector<MCFixup, 4> Fixups;
1603     Emitter.MCE->encodeInstruction(Instruction, Code, Fixups, *BC.STI);
1604 
1605     // Create relocation for every fixup.
1606     for (const MCFixup &Fixup : Fixups) {
1607       std::optional<Relocation> Rel = BC.MIB->createRelocation(Fixup, *BC.MAB);
1608       if (!Rel) {
1609         Success = false;
1610         continue;
1611       }
1612 
1613       if (ignoreReference(Rel->Symbol))
1614         continue;
1615 
1616       if (Relocation::getSizeForType(Rel->Type) < 4) {
1617         // If the instruction uses a short form, then we might not be able
1618         // to handle the rewrite without relaxation, and hence cannot reliably
1619         // create an external reference relocation.
1620         Success = false;
1621         continue;
1622       }
1623       Rel->Offset += getAddress() - getOriginSection()->getAddress() + Offset;
1624       FunctionRelocations.push_back(*Rel);
1625     }
1626 
1627     if (!Success)
1628       break;
1629   }
1630 
1631   // Reset symbolizer for the disassembler.
1632   BC.SymbolicDisAsm->setSymbolizer(nullptr);
1633 
1634   // Add relocations unless disassembly failed for this function.
1635   if (!DisassemblyFailed)
1636     for (Relocation &Rel : FunctionRelocations)
1637       getOriginSection()->addPendingRelocation(Rel);
1638 
1639   // Inform BinaryContext that this function symbols will not be defined and
1640   // relocations should not be created against them.
1641   if (BC.HasRelocations) {
1642     for (std::pair<const uint32_t, MCSymbol *> &LI : Labels)
1643       BC.UndefinedSymbols.insert(LI.second);
1644     for (MCSymbol *const EndLabel : FunctionEndLabels)
1645       if (EndLabel)
1646         BC.UndefinedSymbols.insert(EndLabel);
1647   }
1648 
1649   clearList(Relocations);
1650   clearList(ExternallyReferencedOffsets);
1651 
1652   if (Success && BC.HasRelocations)
1653     HasExternalRefRelocations = true;
1654 
1655   if (opts::Verbosity >= 1 && !Success)
1656     BC.outs() << "BOLT-INFO: failed to scan refs for  " << *this << '\n';
1657 
1658   return Success;
1659 }
1660 
1661 void BinaryFunction::postProcessEntryPoints() {
1662   if (!isSimple())
1663     return;
1664 
1665   for (auto &KV : Labels) {
1666     MCSymbol *Label = KV.second;
1667     if (!getSecondaryEntryPointSymbol(Label))
1668       continue;
1669 
1670     // In non-relocation mode there's potentially an external undetectable
1671     // reference to the entry point and hence we cannot move this entry
1672     // point. Optimizing without moving could be difficult.
1673     // In BAT mode, register any known entry points for CFG construction.
1674     if (!BC.HasRelocations && !BC.HasBATSection)
1675       setSimple(false);
1676 
1677     const uint32_t Offset = KV.first;
1678 
1679     // If we are at Offset 0 and there is no instruction associated with it,
1680     // this means this is an empty function. Just ignore. If we find an
1681     // instruction at this offset, this entry point is valid.
1682     if (!Offset || getInstructionAtOffset(Offset))
1683       continue;
1684 
1685     // On AArch64 there are legitimate reasons to have references past the
1686     // end of the function, e.g. jump tables.
1687     if (BC.isAArch64() && Offset == getSize())
1688       continue;
1689 
1690     BC.errs() << "BOLT-WARNING: reference in the middle of instruction "
1691                  "detected in function "
1692               << *this << " at offset 0x" << Twine::utohexstr(Offset) << '\n';
1693     if (BC.HasRelocations)
1694       setIgnored();
1695     setSimple(false);
1696     return;
1697   }
1698 }
1699 
1700 void BinaryFunction::postProcessJumpTables() {
1701   // Create labels for all entries.
1702   for (auto &JTI : JumpTables) {
1703     JumpTable &JT = *JTI.second;
1704     if (JT.Type == JumpTable::JTT_PIC && opts::JumpTables == JTS_BASIC) {
1705       opts::JumpTables = JTS_MOVE;
1706       BC.outs() << "BOLT-INFO: forcing -jump-tables=move as PIC jump table was "
1707                    "detected in function "
1708                 << *this << '\n';
1709     }
1710     const uint64_t BDSize =
1711         BC.getBinaryDataAtAddress(JT.getAddress())->getSize();
1712     if (!BDSize) {
1713       BC.setBinaryDataSize(JT.getAddress(), JT.getSize());
1714     } else {
1715       assert(BDSize >= JT.getSize() &&
1716              "jump table cannot be larger than the containing object");
1717     }
1718     if (!JT.Entries.empty())
1719       continue;
1720 
1721     bool HasOneParent = (JT.Parents.size() == 1);
1722     for (uint64_t EntryAddress : JT.EntriesAsAddress) {
1723       // builtin_unreachable does not belong to any function
1724       // Need to handle separately
1725       bool IsBuiltinUnreachable =
1726           llvm::any_of(JT.Parents, [&](const BinaryFunction *Parent) {
1727             return EntryAddress == Parent->getAddress() + Parent->getSize();
1728           });
1729       if (IsBuiltinUnreachable) {
1730         MCSymbol *Label = getOrCreateLocalLabel(EntryAddress, true);
1731         JT.Entries.push_back(Label);
1732         continue;
1733       }
1734       // Create a local label for targets that cannot be reached by other
1735       // fragments. Otherwise, create a secondary entry point in the target
1736       // function.
1737       BinaryFunction *TargetBF =
1738           BC.getBinaryFunctionContainingAddress(EntryAddress);
1739       MCSymbol *Label;
1740       if (HasOneParent && TargetBF == this) {
1741         Label = getOrCreateLocalLabel(EntryAddress, true);
1742       } else {
1743         const uint64_t Offset = EntryAddress - TargetBF->getAddress();
1744         Label = Offset ? TargetBF->addEntryPointAtOffset(Offset)
1745                        : TargetBF->getSymbol();
1746       }
1747       JT.Entries.push_back(Label);
1748     }
1749   }
1750 
1751   // Add TakenBranches from JumpTables.
1752   //
1753   // We want to do it after initial processing since we don't know jump tables'
1754   // boundaries until we process them all.
1755   for (auto &JTSite : JTSites) {
1756     const uint64_t JTSiteOffset = JTSite.first;
1757     const uint64_t JTAddress = JTSite.second;
1758     const JumpTable *JT = getJumpTableContainingAddress(JTAddress);
1759     assert(JT && "cannot find jump table for address");
1760 
1761     uint64_t EntryOffset = JTAddress - JT->getAddress();
1762     while (EntryOffset < JT->getSize()) {
1763       uint64_t EntryAddress = JT->EntriesAsAddress[EntryOffset / JT->EntrySize];
1764       uint64_t TargetOffset = EntryAddress - getAddress();
1765       if (TargetOffset < getSize()) {
1766         TakenBranches.emplace_back(JTSiteOffset, TargetOffset);
1767 
1768         if (opts::StrictMode)
1769           registerReferencedOffset(TargetOffset);
1770       }
1771 
1772       EntryOffset += JT->EntrySize;
1773 
1774       // A label at the next entry means the end of this jump table.
1775       if (JT->Labels.count(EntryOffset))
1776         break;
1777     }
1778   }
1779   clearList(JTSites);
1780 
1781   // Conservatively populate all possible destinations for unknown indirect
1782   // branches.
1783   if (opts::StrictMode && hasInternalReference()) {
1784     for (uint64_t Offset : UnknownIndirectBranchOffsets) {
1785       for (uint64_t PossibleDestination : ExternallyReferencedOffsets) {
1786         // Ignore __builtin_unreachable().
1787         if (PossibleDestination == getSize())
1788           continue;
1789         TakenBranches.emplace_back(Offset, PossibleDestination);
1790       }
1791     }
1792   }
1793 }
1794 
1795 bool BinaryFunction::validateExternallyReferencedOffsets() {
1796   SmallPtrSet<MCSymbol *, 4> JTTargets;
1797   for (const JumpTable *JT : llvm::make_second_range(JumpTables))
1798     JTTargets.insert(JT->Entries.begin(), JT->Entries.end());
1799 
1800   bool HasUnclaimedReference = false;
1801   for (uint64_t Destination : ExternallyReferencedOffsets) {
1802     // Ignore __builtin_unreachable().
1803     if (Destination == getSize())
1804       continue;
1805     // Ignore constant islands
1806     if (isInConstantIsland(Destination + getAddress()))
1807       continue;
1808 
1809     if (BinaryBasicBlock *BB = getBasicBlockAtOffset(Destination)) {
1810       // Check if the externally referenced offset is a recognized jump table
1811       // target.
1812       if (JTTargets.contains(BB->getLabel()))
1813         continue;
1814 
1815       if (opts::Verbosity >= 1) {
1816         BC.errs() << "BOLT-WARNING: unclaimed data to code reference (possibly "
1817                   << "an unrecognized jump table entry) to " << BB->getName()
1818                   << " in " << *this << "\n";
1819       }
1820       auto L = BC.scopeLock();
1821       addEntryPoint(*BB);
1822     } else {
1823       BC.errs() << "BOLT-WARNING: unknown data to code reference to offset "
1824                 << Twine::utohexstr(Destination) << " in " << *this << "\n";
1825       setIgnored();
1826     }
1827     HasUnclaimedReference = true;
1828   }
1829   return !HasUnclaimedReference;
1830 }
1831 
1832 bool BinaryFunction::postProcessIndirectBranches(
1833     MCPlusBuilder::AllocatorIdTy AllocId) {
1834   auto addUnknownControlFlow = [&](BinaryBasicBlock &BB) {
1835     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding unknown control flow in " << *this
1836                       << " for " << BB.getName() << "\n");
1837     HasUnknownControlFlow = true;
1838     BB.removeAllSuccessors();
1839     for (uint64_t PossibleDestination : ExternallyReferencedOffsets)
1840       if (BinaryBasicBlock *SuccBB = getBasicBlockAtOffset(PossibleDestination))
1841         BB.addSuccessor(SuccBB);
1842   };
1843 
1844   uint64_t NumIndirectJumps = 0;
1845   MCInst *LastIndirectJump = nullptr;
1846   BinaryBasicBlock *LastIndirectJumpBB = nullptr;
1847   uint64_t LastJT = 0;
1848   uint16_t LastJTIndexReg = BC.MIB->getNoRegister();
1849   for (BinaryBasicBlock &BB : blocks()) {
1850     for (BinaryBasicBlock::iterator II = BB.begin(); II != BB.end(); ++II) {
1851       MCInst &Instr = *II;
1852       if (!BC.MIB->isIndirectBranch(Instr))
1853         continue;
1854 
1855       // If there's an indirect branch in a single-block function -
1856       // it must be a tail call.
1857       if (BasicBlocks.size() == 1) {
1858         BC.MIB->convertJmpToTailCall(Instr);
1859         return true;
1860       }
1861 
1862       ++NumIndirectJumps;
1863 
1864       if (opts::StrictMode && !hasInternalReference()) {
1865         BC.MIB->convertJmpToTailCall(Instr);
1866         break;
1867       }
1868 
1869       // Validate the tail call or jump table assumptions now that we know
1870       // basic block boundaries.
1871       if (BC.MIB->isTailCall(Instr) || BC.MIB->getJumpTable(Instr)) {
1872         const unsigned PtrSize = BC.AsmInfo->getCodePointerSize();
1873         MCInst *MemLocInstr;
1874         unsigned BaseRegNum, IndexRegNum;
1875         int64_t DispValue;
1876         const MCExpr *DispExpr;
1877         MCInst *PCRelBaseInstr;
1878         IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
1879             Instr, BB.begin(), II, PtrSize, MemLocInstr, BaseRegNum,
1880             IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
1881         if (Type != IndirectBranchType::UNKNOWN || MemLocInstr != nullptr)
1882           continue;
1883 
1884         if (!opts::StrictMode)
1885           return false;
1886 
1887         if (BC.MIB->isTailCall(Instr)) {
1888           BC.MIB->convertTailCallToJmp(Instr);
1889         } else {
1890           LastIndirectJump = &Instr;
1891           LastIndirectJumpBB = &BB;
1892           LastJT = BC.MIB->getJumpTable(Instr);
1893           LastJTIndexReg = BC.MIB->getJumpTableIndexReg(Instr);
1894           BC.MIB->unsetJumpTable(Instr);
1895 
1896           JumpTable *JT = BC.getJumpTableContainingAddress(LastJT);
1897           if (JT->Type == JumpTable::JTT_NORMAL) {
1898             // Invalidating the jump table may also invalidate other jump table
1899             // boundaries. Until we have/need a support for this, mark the
1900             // function as non-simple.
1901             LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejected jump table reference"
1902                               << JT->getName() << " in " << *this << '\n');
1903             return false;
1904           }
1905         }
1906 
1907         addUnknownControlFlow(BB);
1908         continue;
1909       }
1910 
1911       // If this block contains an epilogue code and has an indirect branch,
1912       // then most likely it's a tail call. Otherwise, we cannot tell for sure
1913       // what it is and conservatively reject the function's CFG.
1914       bool IsEpilogue = llvm::any_of(BB, [&](const MCInst &Instr) {
1915         return BC.MIB->isLeave(Instr) || BC.MIB->isPop(Instr);
1916       });
1917       if (IsEpilogue) {
1918         BC.MIB->convertJmpToTailCall(Instr);
1919         BB.removeAllSuccessors();
1920         continue;
1921       }
1922 
1923       if (opts::Verbosity >= 2) {
1924         BC.outs() << "BOLT-INFO: rejected potential indirect tail call in "
1925                   << "function " << *this << " in basic block " << BB.getName()
1926                   << ".\n";
1927         LLVM_DEBUG(BC.printInstructions(dbgs(), BB.begin(), BB.end(),
1928                                         BB.getOffset(), this, true));
1929       }
1930 
1931       if (!opts::StrictMode)
1932         return false;
1933 
1934       addUnknownControlFlow(BB);
1935     }
1936   }
1937 
1938   if (HasInternalLabelReference)
1939     return false;
1940 
1941   // If there's only one jump table, and one indirect jump, and no other
1942   // references, then we should be able to derive the jump table even if we
1943   // fail to match the pattern.
1944   if (HasUnknownControlFlow && NumIndirectJumps == 1 &&
1945       JumpTables.size() == 1 && LastIndirectJump &&
1946       !BC.getJumpTableContainingAddress(LastJT)->IsSplit) {
1947     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: unsetting unknown control flow in "
1948                       << *this << '\n');
1949     BC.MIB->setJumpTable(*LastIndirectJump, LastJT, LastJTIndexReg, AllocId);
1950     HasUnknownControlFlow = false;
1951 
1952     LastIndirectJumpBB->updateJumpTableSuccessors();
1953   }
1954 
1955   // Validate that all data references to function offsets are claimed by
1956   // recognized jump tables. Register externally referenced blocks as entry
1957   // points.
1958   if (!opts::StrictMode && hasInternalReference()) {
1959     if (!validateExternallyReferencedOffsets())
1960       return false;
1961   }
1962 
1963   if (HasUnknownControlFlow && !BC.HasRelocations)
1964     return false;
1965 
1966   return true;
1967 }
1968 
1969 void BinaryFunction::recomputeLandingPads() {
1970   updateBBIndices(0);
1971 
1972   for (BinaryBasicBlock *BB : BasicBlocks) {
1973     BB->LandingPads.clear();
1974     BB->Throwers.clear();
1975   }
1976 
1977   for (BinaryBasicBlock *BB : BasicBlocks) {
1978     std::unordered_set<const BinaryBasicBlock *> BBLandingPads;
1979     for (MCInst &Instr : *BB) {
1980       if (!BC.MIB->isInvoke(Instr))
1981         continue;
1982 
1983       const std::optional<MCPlus::MCLandingPad> EHInfo =
1984           BC.MIB->getEHInfo(Instr);
1985       if (!EHInfo || !EHInfo->first)
1986         continue;
1987 
1988       BinaryBasicBlock *LPBlock = getBasicBlockForLabel(EHInfo->first);
1989       if (!BBLandingPads.count(LPBlock)) {
1990         BBLandingPads.insert(LPBlock);
1991         BB->LandingPads.emplace_back(LPBlock);
1992         LPBlock->Throwers.emplace_back(BB);
1993       }
1994     }
1995   }
1996 }
1997 
1998 Error BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
1999   auto &MIB = BC.MIB;
2000 
2001   if (!isSimple()) {
2002     assert(!BC.HasRelocations &&
2003            "cannot process file with non-simple function in relocs mode");
2004     return createNonFatalBOLTError("");
2005   }
2006 
2007   if (CurrentState != State::Disassembled)
2008     return createNonFatalBOLTError("");
2009 
2010   assert(BasicBlocks.empty() && "basic block list should be empty");
2011   assert((Labels.find(getFirstInstructionOffset()) != Labels.end()) &&
2012          "first instruction should always have a label");
2013 
2014   // Create basic blocks in the original layout order:
2015   //
2016   //  * Every instruction with associated label marks
2017   //    the beginning of a basic block.
2018   //  * Conditional instruction marks the end of a basic block,
2019   //    except when the following instruction is an
2020   //    unconditional branch, and the unconditional branch is not
2021   //    a destination of another branch. In the latter case, the
2022   //    basic block will consist of a single unconditional branch
2023   //    (missed "double-jump" optimization).
2024   //
2025   // Created basic blocks are sorted in layout order since they are
2026   // created in the same order as instructions, and instructions are
2027   // sorted by offsets.
2028   BinaryBasicBlock *InsertBB = nullptr;
2029   BinaryBasicBlock *PrevBB = nullptr;
2030   bool IsLastInstrNop = false;
2031   // Offset of the last non-nop instruction.
2032   uint64_t LastInstrOffset = 0;
2033 
2034   auto addCFIPlaceholders = [this](uint64_t CFIOffset,
2035                                    BinaryBasicBlock *InsertBB) {
2036     for (auto FI = OffsetToCFI.lower_bound(CFIOffset),
2037               FE = OffsetToCFI.upper_bound(CFIOffset);
2038          FI != FE; ++FI) {
2039       addCFIPseudo(InsertBB, InsertBB->end(), FI->second);
2040     }
2041   };
2042 
2043   // For profiling purposes we need to save the offset of the last instruction
2044   // in the basic block.
2045   // NOTE: nops always have an Offset annotation. Annotate the last non-nop as
2046   //       older profiles ignored nops.
2047   auto updateOffset = [&](uint64_t Offset) {
2048     assert(PrevBB && PrevBB != InsertBB && "invalid previous block");
2049     MCInst *LastNonNop = nullptr;
2050     for (BinaryBasicBlock::reverse_iterator RII = PrevBB->getLastNonPseudo(),
2051                                             E = PrevBB->rend();
2052          RII != E; ++RII) {
2053       if (!BC.MIB->isPseudo(*RII) && !BC.MIB->isNoop(*RII)) {
2054         LastNonNop = &*RII;
2055         break;
2056       }
2057     }
2058     if (LastNonNop && !MIB->getOffset(*LastNonNop))
2059       MIB->setOffset(*LastNonNop, static_cast<uint32_t>(Offset));
2060   };
2061 
2062   for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) {
2063     const uint32_t Offset = I->first;
2064     MCInst &Instr = I->second;
2065 
2066     auto LI = Labels.find(Offset);
2067     if (LI != Labels.end()) {
2068       // Always create new BB at branch destination.
2069       PrevBB = InsertBB ? InsertBB : PrevBB;
2070       InsertBB = addBasicBlockAt(LI->first, LI->second);
2071       if (opts::PreserveBlocksAlignment && IsLastInstrNop)
2072         InsertBB->setDerivedAlignment();
2073 
2074       if (PrevBB)
2075         updateOffset(LastInstrOffset);
2076     }
2077 
2078     // Mark all nops with Offset for profile tracking purposes.
2079     if (MIB->isNoop(Instr) && !MIB->getOffset(Instr)) {
2080       // If "Offset" annotation is not present, set it and mark the nop for
2081       // deletion.
2082       MIB->setOffset(Instr, static_cast<uint32_t>(Offset));
2083       // Annotate ordinary nops, so we can safely delete them if required.
2084       MIB->addAnnotation(Instr, "NOP", static_cast<uint32_t>(1), AllocatorId);
2085     }
2086 
2087     if (!InsertBB) {
2088       // It must be a fallthrough or unreachable code. Create a new block unless
2089       // we see an unconditional branch following a conditional one. The latter
2090       // should not be a conditional tail call.
2091       assert(PrevBB && "no previous basic block for a fall through");
2092       MCInst *PrevInstr = PrevBB->getLastNonPseudoInstr();
2093       assert(PrevInstr && "no previous instruction for a fall through");
2094       if (MIB->isUnconditionalBranch(Instr) &&
2095           !MIB->isIndirectBranch(*PrevInstr) &&
2096           !MIB->isUnconditionalBranch(*PrevInstr) &&
2097           !MIB->getConditionalTailCall(*PrevInstr) &&
2098           !MIB->isReturn(*PrevInstr)) {
2099         // Temporarily restore inserter basic block.
2100         InsertBB = PrevBB;
2101       } else {
2102         MCSymbol *Label;
2103         {
2104           auto L = BC.scopeLock();
2105           Label = BC.Ctx->createNamedTempSymbol("FT");
2106         }
2107         InsertBB = addBasicBlockAt(Offset, Label);
2108         if (opts::PreserveBlocksAlignment && IsLastInstrNop)
2109           InsertBB->setDerivedAlignment();
2110         updateOffset(LastInstrOffset);
2111       }
2112     }
2113     if (Offset == getFirstInstructionOffset()) {
2114       // Add associated CFI pseudos in the first offset
2115       addCFIPlaceholders(Offset, InsertBB);
2116     }
2117 
2118     const bool IsBlockEnd = MIB->isTerminator(Instr);
2119     IsLastInstrNop = MIB->isNoop(Instr);
2120     if (!IsLastInstrNop)
2121       LastInstrOffset = Offset;
2122     InsertBB->addInstruction(std::move(Instr));
2123 
2124     // Add associated CFI instrs. We always add the CFI instruction that is
2125     // located immediately after this instruction, since the next CFI
2126     // instruction reflects the change in state caused by this instruction.
2127     auto NextInstr = std::next(I);
2128     uint64_t CFIOffset;
2129     if (NextInstr != E)
2130       CFIOffset = NextInstr->first;
2131     else
2132       CFIOffset = getSize();
2133 
2134     // Note: this potentially invalidates instruction pointers/iterators.
2135     addCFIPlaceholders(CFIOffset, InsertBB);
2136 
2137     if (IsBlockEnd) {
2138       PrevBB = InsertBB;
2139       InsertBB = nullptr;
2140     }
2141   }
2142 
2143   if (BasicBlocks.empty()) {
2144     setSimple(false);
2145     return createNonFatalBOLTError("");
2146   }
2147 
2148   // Intermediate dump.
2149   LLVM_DEBUG(print(dbgs(), "after creating basic blocks"));
2150 
2151   // TODO: handle properly calls to no-return functions,
2152   // e.g. exit(3), etc. Otherwise we'll see a false fall-through
2153   // blocks.
2154 
2155   // Remove duplicates branches. We can get a bunch of them from jump tables.
2156   // Without doing jump table value profiling we don't have a use for extra
2157   // (duplicate) branches.
2158   llvm::sort(TakenBranches);
2159   auto NewEnd = std::unique(TakenBranches.begin(), TakenBranches.end());
2160   TakenBranches.erase(NewEnd, TakenBranches.end());
2161 
2162   for (std::pair<uint32_t, uint32_t> &Branch : TakenBranches) {
2163     LLVM_DEBUG(dbgs() << "registering branch [0x"
2164                       << Twine::utohexstr(Branch.first) << "] -> [0x"
2165                       << Twine::utohexstr(Branch.second) << "]\n");
2166     BinaryBasicBlock *FromBB = getBasicBlockContainingOffset(Branch.first);
2167     BinaryBasicBlock *ToBB = getBasicBlockAtOffset(Branch.second);
2168     if (!FromBB || !ToBB) {
2169       if (!FromBB)
2170         BC.errs() << "BOLT-ERROR: cannot find BB containing the branch.\n";
2171       if (!ToBB)
2172         BC.errs()
2173             << "BOLT-ERROR: cannot find BB containing branch destination.\n";
2174       return createFatalBOLTError(BC.generateBugReportMessage(
2175           "disassembly failed - inconsistent branch found.", *this));
2176     }
2177 
2178     FromBB->addSuccessor(ToBB);
2179   }
2180 
2181   // Add fall-through branches.
2182   PrevBB = nullptr;
2183   bool IsPrevFT = false; // Is previous block a fall-through.
2184   for (BinaryBasicBlock *BB : BasicBlocks) {
2185     if (IsPrevFT)
2186       PrevBB->addSuccessor(BB);
2187 
2188     if (BB->empty()) {
2189       IsPrevFT = true;
2190       PrevBB = BB;
2191       continue;
2192     }
2193 
2194     MCInst *LastInstr = BB->getLastNonPseudoInstr();
2195     assert(LastInstr &&
2196            "should have non-pseudo instruction in non-empty block");
2197 
2198     if (BB->succ_size() == 0) {
2199       // Since there's no existing successors, we know the last instruction is
2200       // not a conditional branch. Thus if it's a terminator, it shouldn't be a
2201       // fall-through.
2202       //
2203       // Conditional tail call is a special case since we don't add a taken
2204       // branch successor for it.
2205       IsPrevFT = !MIB->isTerminator(*LastInstr) ||
2206                  MIB->getConditionalTailCall(*LastInstr);
2207     } else if (BB->succ_size() == 1) {
2208       IsPrevFT = MIB->isConditionalBranch(*LastInstr);
2209     } else {
2210       IsPrevFT = false;
2211     }
2212 
2213     PrevBB = BB;
2214   }
2215 
2216   // Assign landing pads and throwers info.
2217   recomputeLandingPads();
2218 
2219   // Assign CFI information to each BB entry.
2220   annotateCFIState();
2221 
2222   // Annotate invoke instructions with GNU_args_size data.
2223   propagateGnuArgsSizeInfo(AllocatorId);
2224 
2225   // Set the basic block layout to the original order and set end offsets.
2226   PrevBB = nullptr;
2227   for (BinaryBasicBlock *BB : BasicBlocks) {
2228     Layout.addBasicBlock(BB);
2229     if (PrevBB)
2230       PrevBB->setEndOffset(BB->getOffset());
2231     PrevBB = BB;
2232   }
2233   PrevBB->setEndOffset(getSize());
2234 
2235   Layout.updateLayoutIndices();
2236 
2237   normalizeCFIState();
2238 
2239   // Clean-up memory taken by intermediate structures.
2240   //
2241   // NB: don't clear Labels list as we may need them if we mark the function
2242   //     as non-simple later in the process of discovering extra entry points.
2243   clearList(Instructions);
2244   clearList(OffsetToCFI);
2245   clearList(TakenBranches);
2246 
2247   // Update the state.
2248   CurrentState = State::CFG;
2249 
2250   // Make any necessary adjustments for indirect branches.
2251   if (!postProcessIndirectBranches(AllocatorId)) {
2252     if (opts::Verbosity) {
2253       BC.errs() << "BOLT-WARNING: failed to post-process indirect branches for "
2254                 << *this << '\n';
2255     }
2256     // In relocation mode we want to keep processing the function but avoid
2257     // optimizing it.
2258     setSimple(false);
2259   }
2260 
2261   clearList(ExternallyReferencedOffsets);
2262   clearList(UnknownIndirectBranchOffsets);
2263 
2264   return Error::success();
2265 }
2266 
2267 void BinaryFunction::postProcessCFG() {
2268   if (isSimple() && !BasicBlocks.empty()) {
2269     // Convert conditional tail call branches to conditional branches that jump
2270     // to a tail call.
2271     removeConditionalTailCalls();
2272 
2273     postProcessProfile();
2274 
2275     // Eliminate inconsistencies between branch instructions and CFG.
2276     postProcessBranches();
2277   }
2278 
2279   calculateMacroOpFusionStats();
2280 
2281   // The final cleanup of intermediate structures.
2282   clearList(IgnoredBranches);
2283 
2284   // Remove "Offset" annotations, unless we need an address-translation table
2285   // later. This has no cost, since annotations are allocated by a bumpptr
2286   // allocator and won't be released anyway until late in the pipeline.
2287   if (!requiresAddressTranslation() && !opts::Instrument) {
2288     for (BinaryBasicBlock &BB : blocks())
2289       for (MCInst &Inst : BB)
2290         BC.MIB->clearOffset(Inst);
2291   }
2292 
2293   assert((!isSimple() || validateCFG()) &&
2294          "invalid CFG detected after post-processing");
2295 }
2296 
2297 void BinaryFunction::calculateMacroOpFusionStats() {
2298   if (!getBinaryContext().isX86())
2299     return;
2300   for (const BinaryBasicBlock &BB : blocks()) {
2301     auto II = BB.getMacroOpFusionPair();
2302     if (II == BB.end())
2303       continue;
2304 
2305     // Check offset of the second instruction.
2306     // FIXME: arch-specific.
2307     const uint32_t Offset = BC.MIB->getOffsetWithDefault(*std::next(II), 0);
2308     if (!Offset || (getAddress() + Offset) % 64)
2309       continue;
2310 
2311     LLVM_DEBUG(dbgs() << "\nmissed macro-op fusion at address 0x"
2312                       << Twine::utohexstr(getAddress() + Offset)
2313                       << " in function " << *this << "; executed "
2314                       << BB.getKnownExecutionCount() << " times.\n");
2315     ++BC.Stats.MissedMacroFusionPairs;
2316     BC.Stats.MissedMacroFusionExecCount += BB.getKnownExecutionCount();
2317   }
2318 }
2319 
2320 void BinaryFunction::removeTagsFromProfile() {
2321   for (BinaryBasicBlock *BB : BasicBlocks) {
2322     if (BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
2323       BB->ExecutionCount = 0;
2324     for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) {
2325       if (BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
2326           BI.MispredictedCount != BinaryBasicBlock::COUNT_NO_PROFILE)
2327         continue;
2328       BI.Count = 0;
2329       BI.MispredictedCount = 0;
2330     }
2331   }
2332 }
2333 
2334 void BinaryFunction::removeConditionalTailCalls() {
2335   // Blocks to be appended at the end.
2336   std::vector<std::unique_ptr<BinaryBasicBlock>> NewBlocks;
2337 
2338   for (auto BBI = begin(); BBI != end(); ++BBI) {
2339     BinaryBasicBlock &BB = *BBI;
2340     MCInst *CTCInstr = BB.getLastNonPseudoInstr();
2341     if (!CTCInstr)
2342       continue;
2343 
2344     std::optional<uint64_t> TargetAddressOrNone =
2345         BC.MIB->getConditionalTailCall(*CTCInstr);
2346     if (!TargetAddressOrNone)
2347       continue;
2348 
2349     // Gather all necessary information about CTC instruction before
2350     // annotations are destroyed.
2351     const int32_t CFIStateBeforeCTC = BB.getCFIStateAtInstr(CTCInstr);
2352     uint64_t CTCTakenCount = BinaryBasicBlock::COUNT_NO_PROFILE;
2353     uint64_t CTCMispredCount = BinaryBasicBlock::COUNT_NO_PROFILE;
2354     if (hasValidProfile()) {
2355       CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
2356           *CTCInstr, "CTCTakenCount");
2357       CTCMispredCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
2358           *CTCInstr, "CTCMispredCount");
2359     }
2360 
2361     // Assert that the tail call does not throw.
2362     assert(!BC.MIB->getEHInfo(*CTCInstr) &&
2363            "found tail call with associated landing pad");
2364 
2365     // Create a basic block with an unconditional tail call instruction using
2366     // the same destination.
2367     const MCSymbol *CTCTargetLabel = BC.MIB->getTargetSymbol(*CTCInstr);
2368     assert(CTCTargetLabel && "symbol expected for conditional tail call");
2369     MCInst TailCallInstr;
2370     BC.MIB->createTailCall(TailCallInstr, CTCTargetLabel, BC.Ctx.get());
2371 
2372     // Move offset from CTCInstr to TailCallInstr.
2373     if (const std::optional<uint32_t> Offset = BC.MIB->getOffset(*CTCInstr)) {
2374       BC.MIB->setOffset(TailCallInstr, *Offset);
2375       BC.MIB->clearOffset(*CTCInstr);
2376     }
2377 
2378     // Link new BBs to the original input offset of the BB where the CTC
2379     // is, so we can map samples recorded in new BBs back to the original BB
2380     // seem in the input binary (if using BAT)
2381     std::unique_ptr<BinaryBasicBlock> TailCallBB =
2382         createBasicBlock(BC.Ctx->createNamedTempSymbol("TC"));
2383     TailCallBB->setOffset(BB.getInputOffset());
2384     TailCallBB->addInstruction(TailCallInstr);
2385     TailCallBB->setCFIState(CFIStateBeforeCTC);
2386 
2387     // Add CFG edge with profile info from BB to TailCallBB.
2388     BB.addSuccessor(TailCallBB.get(), CTCTakenCount, CTCMispredCount);
2389 
2390     // Add execution count for the block.
2391     TailCallBB->setExecutionCount(CTCTakenCount);
2392 
2393     BC.MIB->convertTailCallToJmp(*CTCInstr);
2394 
2395     BC.MIB->replaceBranchTarget(*CTCInstr, TailCallBB->getLabel(),
2396                                 BC.Ctx.get());
2397 
2398     // Add basic block to the list that will be added to the end.
2399     NewBlocks.emplace_back(std::move(TailCallBB));
2400 
2401     // Swap edges as the TailCallBB corresponds to the taken branch.
2402     BB.swapConditionalSuccessors();
2403 
2404     // This branch is no longer a conditional tail call.
2405     BC.MIB->unsetConditionalTailCall(*CTCInstr);
2406   }
2407 
2408   insertBasicBlocks(std::prev(end()), std::move(NewBlocks),
2409                     /* UpdateLayout */ true,
2410                     /* UpdateCFIState */ false);
2411 }
2412 
2413 uint64_t BinaryFunction::getFunctionScore() const {
2414   if (FunctionScore != -1)
2415     return FunctionScore;
2416 
2417   if (!isSimple() || !hasValidProfile()) {
2418     FunctionScore = 0;
2419     return FunctionScore;
2420   }
2421 
2422   uint64_t TotalScore = 0ULL;
2423   for (const BinaryBasicBlock &BB : blocks()) {
2424     uint64_t BBExecCount = BB.getExecutionCount();
2425     if (BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
2426       continue;
2427     TotalScore += BBExecCount * BB.getNumNonPseudos();
2428   }
2429   FunctionScore = TotalScore;
2430   return FunctionScore;
2431 }
2432 
2433 void BinaryFunction::annotateCFIState() {
2434   assert(CurrentState == State::Disassembled && "unexpected function state");
2435   assert(!BasicBlocks.empty() && "basic block list should not be empty");
2436 
2437   // This is an index of the last processed CFI in FDE CFI program.
2438   uint32_t State = 0;
2439 
2440   // This is an index of RememberState CFI reflecting effective state right
2441   // after execution of RestoreState CFI.
2442   //
2443   // It differs from State iff the CFI at (State-1)
2444   // was RestoreState (modulo GNU_args_size CFIs, which are ignored).
2445   //
2446   // This allows us to generate shorter replay sequences when producing new
2447   // CFI programs.
2448   uint32_t EffectiveState = 0;
2449 
2450   // For tracking RememberState/RestoreState sequences.
2451   std::stack<uint32_t> StateStack;
2452 
2453   for (BinaryBasicBlock *BB : BasicBlocks) {
2454     BB->setCFIState(EffectiveState);
2455 
2456     for (const MCInst &Instr : *BB) {
2457       const MCCFIInstruction *CFI = getCFIFor(Instr);
2458       if (!CFI)
2459         continue;
2460 
2461       ++State;
2462 
2463       switch (CFI->getOperation()) {
2464       case MCCFIInstruction::OpRememberState:
2465         StateStack.push(EffectiveState);
2466         EffectiveState = State;
2467         break;
2468       case MCCFIInstruction::OpRestoreState:
2469         assert(!StateStack.empty() && "corrupt CFI stack");
2470         EffectiveState = StateStack.top();
2471         StateStack.pop();
2472         break;
2473       case MCCFIInstruction::OpGnuArgsSize:
2474         // OpGnuArgsSize CFIs do not affect the CFI state.
2475         break;
2476       default:
2477         // Any other CFI updates the state.
2478         EffectiveState = State;
2479         break;
2480       }
2481     }
2482   }
2483 
2484   assert(StateStack.empty() && "corrupt CFI stack");
2485 }
2486 
2487 namespace {
2488 
2489 /// Our full interpretation of a DWARF CFI machine state at a given point
2490 struct CFISnapshot {
2491   /// CFA register number and offset defining the canonical frame at this
2492   /// point, or the number of a rule (CFI state) that computes it with a
2493   /// DWARF expression. This number will be negative if it refers to a CFI
2494   /// located in the CIE instead of the FDE.
2495   uint32_t CFAReg;
2496   int32_t CFAOffset;
2497   int32_t CFARule;
2498   /// Mapping of rules (CFI states) that define the location of each
2499   /// register. If absent, no rule defining the location of such register
2500   /// was ever read. This number will be negative if it refers to a CFI
2501   /// located in the CIE instead of the FDE.
2502   DenseMap<int32_t, int32_t> RegRule;
2503 
2504   /// References to CIE, FDE and expanded instructions after a restore state
2505   const BinaryFunction::CFIInstrMapType &CIE;
2506   const BinaryFunction::CFIInstrMapType &FDE;
2507   const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents;
2508 
2509   /// Current FDE CFI number representing the state where the snapshot is at
2510   int32_t CurState;
2511 
2512   /// Used when we don't have information about which state/rule to apply
2513   /// to recover the location of either the CFA or a specific register
2514   constexpr static int32_t UNKNOWN = std::numeric_limits<int32_t>::min();
2515 
2516 private:
2517   /// Update our snapshot by executing a single CFI
2518   void update(const MCCFIInstruction &Instr, int32_t RuleNumber) {
2519     switch (Instr.getOperation()) {
2520     case MCCFIInstruction::OpSameValue:
2521     case MCCFIInstruction::OpRelOffset:
2522     case MCCFIInstruction::OpOffset:
2523     case MCCFIInstruction::OpRestore:
2524     case MCCFIInstruction::OpUndefined:
2525     case MCCFIInstruction::OpRegister:
2526       RegRule[Instr.getRegister()] = RuleNumber;
2527       break;
2528     case MCCFIInstruction::OpDefCfaRegister:
2529       CFAReg = Instr.getRegister();
2530       CFARule = UNKNOWN;
2531 
2532       // This shouldn't happen according to the spec but GNU binutils on RISC-V
2533       // emits a DW_CFA_def_cfa_register in CIE's which leaves the offset
2534       // unspecified. Both readelf and llvm-dwarfdump interpret the offset as 0
2535       // in this case so let's do the same.
2536       if (CFAOffset == UNKNOWN)
2537         CFAOffset = 0;
2538       break;
2539     case MCCFIInstruction::OpDefCfaOffset:
2540       CFAOffset = Instr.getOffset();
2541       CFARule = UNKNOWN;
2542       break;
2543     case MCCFIInstruction::OpDefCfa:
2544       CFAReg = Instr.getRegister();
2545       CFAOffset = Instr.getOffset();
2546       CFARule = UNKNOWN;
2547       break;
2548     case MCCFIInstruction::OpEscape: {
2549       std::optional<uint8_t> Reg =
2550           readDWARFExpressionTargetReg(Instr.getValues());
2551       // Handle DW_CFA_def_cfa_expression
2552       if (!Reg) {
2553         CFARule = RuleNumber;
2554         break;
2555       }
2556       RegRule[*Reg] = RuleNumber;
2557       break;
2558     }
2559     case MCCFIInstruction::OpAdjustCfaOffset:
2560     case MCCFIInstruction::OpWindowSave:
2561     case MCCFIInstruction::OpNegateRAState:
2562     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2563       llvm_unreachable("unsupported CFI opcode");
2564       break;
2565     case MCCFIInstruction::OpRememberState:
2566     case MCCFIInstruction::OpRestoreState:
2567     case MCCFIInstruction::OpGnuArgsSize:
2568       // do not affect CFI state
2569       break;
2570     }
2571   }
2572 
2573 public:
2574   /// Advance state reading FDE CFI instructions up to State number
2575   void advanceTo(int32_t State) {
2576     for (int32_t I = CurState, E = State; I != E; ++I) {
2577       const MCCFIInstruction &Instr = FDE[I];
2578       if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) {
2579         update(Instr, I);
2580         continue;
2581       }
2582       // If restore state instruction, fetch the equivalent CFIs that have
2583       // the same effect of this restore. This is used to ensure remember-
2584       // restore pairs are completely removed.
2585       auto Iter = FrameRestoreEquivalents.find(I);
2586       if (Iter == FrameRestoreEquivalents.end())
2587         continue;
2588       for (int32_t RuleNumber : Iter->second)
2589         update(FDE[RuleNumber], RuleNumber);
2590     }
2591 
2592     assert(((CFAReg != (uint32_t)UNKNOWN && CFAOffset != UNKNOWN) ||
2593             CFARule != UNKNOWN) &&
2594            "CIE did not define default CFA?");
2595 
2596     CurState = State;
2597   }
2598 
2599   /// Interpret all CIE and FDE instructions up until CFI State number and
2600   /// populate this snapshot
2601   CFISnapshot(
2602       const BinaryFunction::CFIInstrMapType &CIE,
2603       const BinaryFunction::CFIInstrMapType &FDE,
2604       const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents,
2605       int32_t State)
2606       : CIE(CIE), FDE(FDE), FrameRestoreEquivalents(FrameRestoreEquivalents) {
2607     CFAReg = UNKNOWN;
2608     CFAOffset = UNKNOWN;
2609     CFARule = UNKNOWN;
2610     CurState = 0;
2611 
2612     for (int32_t I = 0, E = CIE.size(); I != E; ++I) {
2613       const MCCFIInstruction &Instr = CIE[I];
2614       update(Instr, -I);
2615     }
2616 
2617     advanceTo(State);
2618   }
2619 };
2620 
2621 /// A CFI snapshot with the capability of checking if incremental additions to
2622 /// it are redundant. This is used to ensure we do not emit two CFI instructions
2623 /// back-to-back that are doing the same state change, or to avoid emitting a
2624 /// CFI at all when the state at that point would not be modified after that CFI
2625 struct CFISnapshotDiff : public CFISnapshot {
2626   bool RestoredCFAReg{false};
2627   bool RestoredCFAOffset{false};
2628   DenseMap<int32_t, bool> RestoredRegs;
2629 
2630   CFISnapshotDiff(const CFISnapshot &S) : CFISnapshot(S) {}
2631 
2632   CFISnapshotDiff(
2633       const BinaryFunction::CFIInstrMapType &CIE,
2634       const BinaryFunction::CFIInstrMapType &FDE,
2635       const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents,
2636       int32_t State)
2637       : CFISnapshot(CIE, FDE, FrameRestoreEquivalents, State) {}
2638 
2639   /// Return true if applying Instr to this state is redundant and can be
2640   /// dismissed.
2641   bool isRedundant(const MCCFIInstruction &Instr) {
2642     switch (Instr.getOperation()) {
2643     case MCCFIInstruction::OpSameValue:
2644     case MCCFIInstruction::OpRelOffset:
2645     case MCCFIInstruction::OpOffset:
2646     case MCCFIInstruction::OpRestore:
2647     case MCCFIInstruction::OpUndefined:
2648     case MCCFIInstruction::OpRegister:
2649     case MCCFIInstruction::OpEscape: {
2650       uint32_t Reg;
2651       if (Instr.getOperation() != MCCFIInstruction::OpEscape) {
2652         Reg = Instr.getRegister();
2653       } else {
2654         std::optional<uint8_t> R =
2655             readDWARFExpressionTargetReg(Instr.getValues());
2656         // Handle DW_CFA_def_cfa_expression
2657         if (!R) {
2658           if (RestoredCFAReg && RestoredCFAOffset)
2659             return true;
2660           RestoredCFAReg = true;
2661           RestoredCFAOffset = true;
2662           return false;
2663         }
2664         Reg = *R;
2665       }
2666       if (RestoredRegs[Reg])
2667         return true;
2668       RestoredRegs[Reg] = true;
2669       const int32_t CurRegRule = RegRule.contains(Reg) ? RegRule[Reg] : UNKNOWN;
2670       if (CurRegRule == UNKNOWN) {
2671         if (Instr.getOperation() == MCCFIInstruction::OpRestore ||
2672             Instr.getOperation() == MCCFIInstruction::OpSameValue)
2673           return true;
2674         return false;
2675       }
2676       const MCCFIInstruction &LastDef =
2677           CurRegRule < 0 ? CIE[-CurRegRule] : FDE[CurRegRule];
2678       return LastDef == Instr;
2679     }
2680     case MCCFIInstruction::OpDefCfaRegister:
2681       if (RestoredCFAReg)
2682         return true;
2683       RestoredCFAReg = true;
2684       return CFAReg == Instr.getRegister();
2685     case MCCFIInstruction::OpDefCfaOffset:
2686       if (RestoredCFAOffset)
2687         return true;
2688       RestoredCFAOffset = true;
2689       return CFAOffset == Instr.getOffset();
2690     case MCCFIInstruction::OpDefCfa:
2691       if (RestoredCFAReg && RestoredCFAOffset)
2692         return true;
2693       RestoredCFAReg = true;
2694       RestoredCFAOffset = true;
2695       return CFAReg == Instr.getRegister() && CFAOffset == Instr.getOffset();
2696     case MCCFIInstruction::OpAdjustCfaOffset:
2697     case MCCFIInstruction::OpWindowSave:
2698     case MCCFIInstruction::OpNegateRAState:
2699     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2700       llvm_unreachable("unsupported CFI opcode");
2701       return false;
2702     case MCCFIInstruction::OpRememberState:
2703     case MCCFIInstruction::OpRestoreState:
2704     case MCCFIInstruction::OpGnuArgsSize:
2705       // do not affect CFI state
2706       return true;
2707     }
2708     return false;
2709   }
2710 };
2711 
2712 } // end anonymous namespace
2713 
2714 bool BinaryFunction::replayCFIInstrs(int32_t FromState, int32_t ToState,
2715                                      BinaryBasicBlock *InBB,
2716                                      BinaryBasicBlock::iterator InsertIt) {
2717   if (FromState == ToState)
2718     return true;
2719   assert(FromState < ToState && "can only replay CFIs forward");
2720 
2721   CFISnapshotDiff CFIDiff(CIEFrameInstructions, FrameInstructions,
2722                           FrameRestoreEquivalents, FromState);
2723 
2724   std::vector<uint32_t> NewCFIs;
2725   for (int32_t CurState = FromState; CurState < ToState; ++CurState) {
2726     MCCFIInstruction *Instr = &FrameInstructions[CurState];
2727     if (Instr->getOperation() == MCCFIInstruction::OpRestoreState) {
2728       auto Iter = FrameRestoreEquivalents.find(CurState);
2729       assert(Iter != FrameRestoreEquivalents.end());
2730       NewCFIs.insert(NewCFIs.end(), Iter->second.begin(), Iter->second.end());
2731       // RestoreState / Remember will be filtered out later by CFISnapshotDiff,
2732       // so we might as well fall-through here.
2733     }
2734     NewCFIs.push_back(CurState);
2735   }
2736 
2737   // Replay instructions while avoiding duplicates
2738   for (int32_t State : llvm::reverse(NewCFIs)) {
2739     if (CFIDiff.isRedundant(FrameInstructions[State]))
2740       continue;
2741     InsertIt = addCFIPseudo(InBB, InsertIt, State);
2742   }
2743 
2744   return true;
2745 }
2746 
2747 SmallVector<int32_t, 4>
2748 BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState,
2749                                BinaryBasicBlock *InBB,
2750                                BinaryBasicBlock::iterator &InsertIt) {
2751   SmallVector<int32_t, 4> NewStates;
2752 
2753   CFISnapshot ToCFITable(CIEFrameInstructions, FrameInstructions,
2754                          FrameRestoreEquivalents, ToState);
2755   CFISnapshotDiff FromCFITable(ToCFITable);
2756   FromCFITable.advanceTo(FromState);
2757 
2758   auto undoStateDefCfa = [&]() {
2759     if (ToCFITable.CFARule == CFISnapshot::UNKNOWN) {
2760       FrameInstructions.emplace_back(MCCFIInstruction::cfiDefCfa(
2761           nullptr, ToCFITable.CFAReg, ToCFITable.CFAOffset));
2762       if (FromCFITable.isRedundant(FrameInstructions.back())) {
2763         FrameInstructions.pop_back();
2764         return;
2765       }
2766       NewStates.push_back(FrameInstructions.size() - 1);
2767       InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size() - 1);
2768       ++InsertIt;
2769     } else if (ToCFITable.CFARule < 0) {
2770       if (FromCFITable.isRedundant(CIEFrameInstructions[-ToCFITable.CFARule]))
2771         return;
2772       NewStates.push_back(FrameInstructions.size());
2773       InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size());
2774       ++InsertIt;
2775       FrameInstructions.emplace_back(CIEFrameInstructions[-ToCFITable.CFARule]);
2776     } else if (!FromCFITable.isRedundant(
2777                    FrameInstructions[ToCFITable.CFARule])) {
2778       NewStates.push_back(ToCFITable.CFARule);
2779       InsertIt = addCFIPseudo(InBB, InsertIt, ToCFITable.CFARule);
2780       ++InsertIt;
2781     }
2782   };
2783 
2784   auto undoState = [&](const MCCFIInstruction &Instr) {
2785     switch (Instr.getOperation()) {
2786     case MCCFIInstruction::OpRememberState:
2787     case MCCFIInstruction::OpRestoreState:
2788       break;
2789     case MCCFIInstruction::OpSameValue:
2790     case MCCFIInstruction::OpRelOffset:
2791     case MCCFIInstruction::OpOffset:
2792     case MCCFIInstruction::OpRestore:
2793     case MCCFIInstruction::OpUndefined:
2794     case MCCFIInstruction::OpEscape:
2795     case MCCFIInstruction::OpRegister: {
2796       uint32_t Reg;
2797       if (Instr.getOperation() != MCCFIInstruction::OpEscape) {
2798         Reg = Instr.getRegister();
2799       } else {
2800         std::optional<uint8_t> R =
2801             readDWARFExpressionTargetReg(Instr.getValues());
2802         // Handle DW_CFA_def_cfa_expression
2803         if (!R) {
2804           undoStateDefCfa();
2805           return;
2806         }
2807         Reg = *R;
2808       }
2809 
2810       if (!ToCFITable.RegRule.contains(Reg)) {
2811         FrameInstructions.emplace_back(
2812             MCCFIInstruction::createRestore(nullptr, Reg));
2813         if (FromCFITable.isRedundant(FrameInstructions.back())) {
2814           FrameInstructions.pop_back();
2815           break;
2816         }
2817         NewStates.push_back(FrameInstructions.size() - 1);
2818         InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size() - 1);
2819         ++InsertIt;
2820         break;
2821       }
2822       const int32_t Rule = ToCFITable.RegRule[Reg];
2823       if (Rule < 0) {
2824         if (FromCFITable.isRedundant(CIEFrameInstructions[-Rule]))
2825           break;
2826         NewStates.push_back(FrameInstructions.size());
2827         InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size());
2828         ++InsertIt;
2829         FrameInstructions.emplace_back(CIEFrameInstructions[-Rule]);
2830         break;
2831       }
2832       if (FromCFITable.isRedundant(FrameInstructions[Rule]))
2833         break;
2834       NewStates.push_back(Rule);
2835       InsertIt = addCFIPseudo(InBB, InsertIt, Rule);
2836       ++InsertIt;
2837       break;
2838     }
2839     case MCCFIInstruction::OpDefCfaRegister:
2840     case MCCFIInstruction::OpDefCfaOffset:
2841     case MCCFIInstruction::OpDefCfa:
2842       undoStateDefCfa();
2843       break;
2844     case MCCFIInstruction::OpAdjustCfaOffset:
2845     case MCCFIInstruction::OpWindowSave:
2846     case MCCFIInstruction::OpNegateRAState:
2847     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2848       llvm_unreachable("unsupported CFI opcode");
2849       break;
2850     case MCCFIInstruction::OpGnuArgsSize:
2851       // do not affect CFI state
2852       break;
2853     }
2854   };
2855 
2856   // Undo all modifications from ToState to FromState
2857   for (int32_t I = ToState, E = FromState; I != E; ++I) {
2858     const MCCFIInstruction &Instr = FrameInstructions[I];
2859     if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) {
2860       undoState(Instr);
2861       continue;
2862     }
2863     auto Iter = FrameRestoreEquivalents.find(I);
2864     if (Iter == FrameRestoreEquivalents.end())
2865       continue;
2866     for (int32_t State : Iter->second)
2867       undoState(FrameInstructions[State]);
2868   }
2869 
2870   return NewStates;
2871 }
2872 
2873 void BinaryFunction::normalizeCFIState() {
2874   // Reordering blocks with remember-restore state instructions can be specially
2875   // tricky. When rewriting the CFI, we omit remember-restore state instructions
2876   // entirely. For restore state, we build a map expanding each restore to the
2877   // equivalent unwindCFIState sequence required at that point to achieve the
2878   // same effect of the restore. All remember state are then just ignored.
2879   std::stack<int32_t> Stack;
2880   for (BinaryBasicBlock *CurBB : Layout.blocks()) {
2881     for (auto II = CurBB->begin(); II != CurBB->end(); ++II) {
2882       if (const MCCFIInstruction *CFI = getCFIFor(*II)) {
2883         if (CFI->getOperation() == MCCFIInstruction::OpRememberState) {
2884           Stack.push(II->getOperand(0).getImm());
2885           continue;
2886         }
2887         if (CFI->getOperation() == MCCFIInstruction::OpRestoreState) {
2888           const int32_t RememberState = Stack.top();
2889           const int32_t CurState = II->getOperand(0).getImm();
2890           FrameRestoreEquivalents[CurState] =
2891               unwindCFIState(CurState, RememberState, CurBB, II);
2892           Stack.pop();
2893         }
2894       }
2895     }
2896   }
2897 }
2898 
2899 bool BinaryFunction::finalizeCFIState() {
2900   LLVM_DEBUG(
2901       dbgs() << "Trying to fix CFI states for each BB after reordering.\n");
2902   LLVM_DEBUG(dbgs() << "This is the list of CFI states for each BB of " << *this
2903                     << ": ");
2904 
2905   const char *Sep = "";
2906   (void)Sep;
2907   for (FunctionFragment &FF : Layout.fragments()) {
2908     // Hot-cold border: at start of each region (with a different FDE) we need
2909     // to reset the CFI state.
2910     int32_t State = 0;
2911 
2912     for (BinaryBasicBlock *BB : FF) {
2913       const int32_t CFIStateAtExit = BB->getCFIStateAtExit();
2914 
2915       // We need to recover the correct state if it doesn't match expected
2916       // state at BB entry point.
2917       if (BB->getCFIState() < State) {
2918         // In this case, State is currently higher than what this BB expect it
2919         // to be. To solve this, we need to insert CFI instructions to undo
2920         // the effect of all CFI from BB's state to current State.
2921         auto InsertIt = BB->begin();
2922         unwindCFIState(State, BB->getCFIState(), BB, InsertIt);
2923       } else if (BB->getCFIState() > State) {
2924         // If BB's CFI state is greater than State, it means we are behind in
2925         // the state. Just emit all instructions to reach this state at the
2926         // beginning of this BB. If this sequence of instructions involve
2927         // remember state or restore state, bail out.
2928         if (!replayCFIInstrs(State, BB->getCFIState(), BB, BB->begin()))
2929           return false;
2930       }
2931 
2932       State = CFIStateAtExit;
2933       LLVM_DEBUG(dbgs() << Sep << State; Sep = ", ");
2934     }
2935   }
2936   LLVM_DEBUG(dbgs() << "\n");
2937 
2938   for (BinaryBasicBlock &BB : blocks()) {
2939     for (auto II = BB.begin(); II != BB.end();) {
2940       const MCCFIInstruction *CFI = getCFIFor(*II);
2941       if (CFI && (CFI->getOperation() == MCCFIInstruction::OpRememberState ||
2942                   CFI->getOperation() == MCCFIInstruction::OpRestoreState)) {
2943         II = BB.eraseInstruction(II);
2944       } else {
2945         ++II;
2946       }
2947     }
2948   }
2949 
2950   return true;
2951 }
2952 
2953 bool BinaryFunction::requiresAddressTranslation() const {
2954   return opts::EnableBAT || hasSDTMarker() || hasPseudoProbe();
2955 }
2956 
2957 bool BinaryFunction::requiresAddressMap() const {
2958   if (isInjected())
2959     return false;
2960 
2961   return opts::UpdateDebugSections || isMultiEntry() ||
2962          requiresAddressTranslation();
2963 }
2964 
2965 uint64_t BinaryFunction::getInstructionCount() const {
2966   uint64_t Count = 0;
2967   for (const BinaryBasicBlock &BB : blocks())
2968     Count += BB.getNumNonPseudos();
2969   return Count;
2970 }
2971 
2972 void BinaryFunction::clearDisasmState() {
2973   clearList(Instructions);
2974   clearList(IgnoredBranches);
2975   clearList(TakenBranches);
2976 
2977   if (BC.HasRelocations) {
2978     for (std::pair<const uint32_t, MCSymbol *> &LI : Labels)
2979       BC.UndefinedSymbols.insert(LI.second);
2980     for (MCSymbol *const EndLabel : FunctionEndLabels)
2981       if (EndLabel)
2982         BC.UndefinedSymbols.insert(EndLabel);
2983   }
2984 }
2985 
2986 void BinaryFunction::setTrapOnEntry() {
2987   clearDisasmState();
2988 
2989   forEachEntryPoint([&](uint64_t Offset, const MCSymbol *Label) -> bool {
2990     MCInst TrapInstr;
2991     BC.MIB->createTrap(TrapInstr);
2992     addInstruction(Offset, std::move(TrapInstr));
2993     return true;
2994   });
2995 
2996   TrapsOnEntry = true;
2997 }
2998 
2999 void BinaryFunction::setIgnored() {
3000   if (opts::processAllFunctions()) {
3001     // We can accept ignored functions before they've been disassembled.
3002     // In that case, they would still get disassembled and emited, but not
3003     // optimized.
3004     assert(CurrentState == State::Empty &&
3005            "cannot ignore non-empty functions in current mode");
3006     IsIgnored = true;
3007     return;
3008   }
3009 
3010   clearDisasmState();
3011 
3012   // Clear CFG state too.
3013   if (hasCFG()) {
3014     releaseCFG();
3015 
3016     for (BinaryBasicBlock *BB : BasicBlocks)
3017       delete BB;
3018     clearList(BasicBlocks);
3019 
3020     for (BinaryBasicBlock *BB : DeletedBasicBlocks)
3021       delete BB;
3022     clearList(DeletedBasicBlocks);
3023 
3024     Layout.clear();
3025   }
3026 
3027   CurrentState = State::Empty;
3028 
3029   IsIgnored = true;
3030   IsSimple = false;
3031   LLVM_DEBUG(dbgs() << "Ignoring " << getPrintName() << '\n');
3032 }
3033 
3034 void BinaryFunction::duplicateConstantIslands() {
3035   assert(Islands && "function expected to have constant islands");
3036 
3037   for (BinaryBasicBlock *BB : getLayout().blocks()) {
3038     if (!BB->isCold())
3039       continue;
3040 
3041     for (MCInst &Inst : *BB) {
3042       int OpNum = 0;
3043       for (MCOperand &Operand : Inst) {
3044         if (!Operand.isExpr()) {
3045           ++OpNum;
3046           continue;
3047         }
3048         const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Inst, OpNum);
3049         // Check if this is an island symbol
3050         if (!Islands->Symbols.count(Symbol) &&
3051             !Islands->ProxySymbols.count(Symbol))
3052           continue;
3053 
3054         // Create cold symbol, if missing
3055         auto ISym = Islands->ColdSymbols.find(Symbol);
3056         MCSymbol *ColdSymbol;
3057         if (ISym != Islands->ColdSymbols.end()) {
3058           ColdSymbol = ISym->second;
3059         } else {
3060           ColdSymbol = BC.Ctx->getOrCreateSymbol(Symbol->getName() + ".cold");
3061           Islands->ColdSymbols[Symbol] = ColdSymbol;
3062           // Check if this is a proxy island symbol and update owner proxy map
3063           if (Islands->ProxySymbols.count(Symbol)) {
3064             BinaryFunction *Owner = Islands->ProxySymbols[Symbol];
3065             auto IProxiedSym = Owner->Islands->Proxies[this].find(Symbol);
3066             Owner->Islands->ColdProxies[this][IProxiedSym->second] = ColdSymbol;
3067           }
3068         }
3069 
3070         // Update instruction reference
3071         Operand = MCOperand::createExpr(BC.MIB->getTargetExprFor(
3072             Inst,
3073             MCSymbolRefExpr::create(ColdSymbol, MCSymbolRefExpr::VK_None,
3074                                     *BC.Ctx),
3075             *BC.Ctx, 0));
3076         ++OpNum;
3077       }
3078     }
3079   }
3080 }
3081 
3082 #ifndef MAX_PATH
3083 #define MAX_PATH 255
3084 #endif
3085 
3086 static std::string constructFilename(std::string Filename,
3087                                      std::string Annotation,
3088                                      std::string Suffix) {
3089   std::replace(Filename.begin(), Filename.end(), '/', '-');
3090   if (!Annotation.empty())
3091     Annotation.insert(0, "-");
3092   if (Filename.size() + Annotation.size() + Suffix.size() > MAX_PATH) {
3093     assert(Suffix.size() + Annotation.size() <= MAX_PATH);
3094     Filename.resize(MAX_PATH - (Suffix.size() + Annotation.size()));
3095   }
3096   Filename += Annotation;
3097   Filename += Suffix;
3098   return Filename;
3099 }
3100 
3101 static std::string formatEscapes(const std::string &Str) {
3102   std::string Result;
3103   for (unsigned I = 0; I < Str.size(); ++I) {
3104     char C = Str[I];
3105     switch (C) {
3106     case '\n':
3107       Result += "&#13;";
3108       break;
3109     case '"':
3110       break;
3111     default:
3112       Result += C;
3113       break;
3114     }
3115   }
3116   return Result;
3117 }
3118 
3119 void BinaryFunction::dumpGraph(raw_ostream &OS) const {
3120   OS << "digraph \"" << getPrintName() << "\" {\n"
3121      << "node [fontname=courier, shape=box, style=filled, colorscheme=brbg9]\n";
3122   uint64_t Offset = Address;
3123   for (BinaryBasicBlock *BB : BasicBlocks) {
3124     auto LayoutPos = find(Layout.blocks(), BB);
3125     unsigned LayoutIndex = LayoutPos - Layout.block_begin();
3126     const char *ColdStr = BB->isCold() ? " (cold)" : "";
3127     std::vector<std::string> Attrs;
3128     // Bold box for entry points
3129     if (isEntryPoint(*BB))
3130       Attrs.push_back("penwidth=2");
3131     if (BLI && BLI->getLoopFor(BB)) {
3132       // Distinguish innermost loops
3133       const BinaryLoop *Loop = BLI->getLoopFor(BB);
3134       if (Loop->isInnermost())
3135         Attrs.push_back("fillcolor=6");
3136       else // some outer loop
3137         Attrs.push_back("fillcolor=4");
3138     } else { // non-loopy code
3139       Attrs.push_back("fillcolor=5");
3140     }
3141     ListSeparator LS;
3142     OS << "\"" << BB->getName() << "\" [";
3143     for (StringRef Attr : Attrs)
3144       OS << LS << Attr;
3145     OS << "]\n";
3146     OS << format("\"%s\" [label=\"%s%s\\n(C:%lu,O:%lu,I:%u,L:%u,CFI:%u)\\n",
3147                  BB->getName().data(), BB->getName().data(), ColdStr,
3148                  BB->getKnownExecutionCount(), BB->getOffset(), getIndex(BB),
3149                  LayoutIndex, BB->getCFIState());
3150 
3151     if (opts::DotToolTipCode) {
3152       std::string Str;
3153       raw_string_ostream CS(Str);
3154       Offset = BC.printInstructions(CS, BB->begin(), BB->end(), Offset, this,
3155                                     /* PrintMCInst = */ false,
3156                                     /* PrintMemData = */ false,
3157                                     /* PrintRelocations = */ false,
3158                                     /* Endl = */ R"(\\l)");
3159       OS << formatEscapes(CS.str()) << '\n';
3160     }
3161     OS << "\"]\n";
3162 
3163     // analyzeBranch is just used to get the names of the branch
3164     // opcodes.
3165     const MCSymbol *TBB = nullptr;
3166     const MCSymbol *FBB = nullptr;
3167     MCInst *CondBranch = nullptr;
3168     MCInst *UncondBranch = nullptr;
3169     const bool Success = BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch);
3170 
3171     const MCInst *LastInstr = BB->getLastNonPseudoInstr();
3172     const bool IsJumpTable = LastInstr && BC.MIB->getJumpTable(*LastInstr);
3173 
3174     auto BI = BB->branch_info_begin();
3175     for (BinaryBasicBlock *Succ : BB->successors()) {
3176       std::string Branch;
3177       if (Success) {
3178         if (Succ == BB->getConditionalSuccessor(true)) {
3179           Branch = CondBranch ? std::string(BC.InstPrinter->getOpcodeName(
3180                                     CondBranch->getOpcode()))
3181                               : "TB";
3182         } else if (Succ == BB->getConditionalSuccessor(false)) {
3183           Branch = UncondBranch ? std::string(BC.InstPrinter->getOpcodeName(
3184                                       UncondBranch->getOpcode()))
3185                                 : "FB";
3186         } else {
3187           Branch = "FT";
3188         }
3189       }
3190       if (IsJumpTable)
3191         Branch = "JT";
3192       OS << format("\"%s\" -> \"%s\" [label=\"%s", BB->getName().data(),
3193                    Succ->getName().data(), Branch.c_str());
3194 
3195       if (BB->getExecutionCount() != COUNT_NO_PROFILE &&
3196           BI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
3197         OS << "\\n(C:" << BI->Count << ",M:" << BI->MispredictedCount << ")";
3198       } else if (ExecutionCount != COUNT_NO_PROFILE &&
3199                  BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
3200         OS << "\\n(IC:" << BI->Count << ")";
3201       }
3202       OS << "\"]\n";
3203 
3204       ++BI;
3205     }
3206     for (BinaryBasicBlock *LP : BB->landing_pads()) {
3207       OS << format("\"%s\" -> \"%s\" [constraint=false style=dashed]\n",
3208                    BB->getName().data(), LP->getName().data());
3209     }
3210   }
3211   OS << "}\n";
3212 }
3213 
3214 void BinaryFunction::viewGraph() const {
3215   SmallString<MAX_PATH> Filename;
3216   if (std::error_code EC =
3217           sys::fs::createTemporaryFile("bolt-cfg", "dot", Filename)) {
3218     BC.errs() << "BOLT-ERROR: " << EC.message() << ", unable to create "
3219               << " bolt-cfg-XXXXX.dot temporary file.\n";
3220     return;
3221   }
3222   dumpGraphToFile(std::string(Filename));
3223   if (DisplayGraph(Filename))
3224     BC.errs() << "BOLT-ERROR: Can't display " << Filename
3225               << " with graphviz.\n";
3226   if (std::error_code EC = sys::fs::remove(Filename)) {
3227     BC.errs() << "BOLT-WARNING: " << EC.message() << ", failed to remove "
3228               << Filename << "\n";
3229   }
3230 }
3231 
3232 void BinaryFunction::dumpGraphForPass(std::string Annotation) const {
3233   if (!opts::shouldPrint(*this))
3234     return;
3235 
3236   std::string Filename = constructFilename(getPrintName(), Annotation, ".dot");
3237   if (opts::Verbosity >= 1)
3238     BC.outs() << "BOLT-INFO: dumping CFG to " << Filename << "\n";
3239   dumpGraphToFile(Filename);
3240 }
3241 
3242 void BinaryFunction::dumpGraphToFile(std::string Filename) const {
3243   std::error_code EC;
3244   raw_fd_ostream of(Filename, EC, sys::fs::OF_None);
3245   if (EC) {
3246     if (opts::Verbosity >= 1) {
3247       BC.errs() << "BOLT-WARNING: " << EC.message() << ", unable to open "
3248                 << Filename << " for output.\n";
3249     }
3250     return;
3251   }
3252   dumpGraph(of);
3253 }
3254 
3255 bool BinaryFunction::validateCFG() const {
3256   // Skip the validation of CFG after it is finalized
3257   if (CurrentState == State::CFG_Finalized)
3258     return true;
3259 
3260   for (BinaryBasicBlock *BB : BasicBlocks)
3261     if (!BB->validateSuccessorInvariants())
3262       return false;
3263 
3264   // Make sure all blocks in CFG are valid.
3265   auto validateBlock = [this](const BinaryBasicBlock *BB, StringRef Desc) {
3266     if (!BB->isValid()) {
3267       BC.errs() << "BOLT-ERROR: deleted " << Desc << " " << BB->getName()
3268                 << " detected in:\n";
3269       this->dump();
3270       return false;
3271     }
3272     return true;
3273   };
3274   for (const BinaryBasicBlock *BB : BasicBlocks) {
3275     if (!validateBlock(BB, "block"))
3276       return false;
3277     for (const BinaryBasicBlock *PredBB : BB->predecessors())
3278       if (!validateBlock(PredBB, "predecessor"))
3279         return false;
3280     for (const BinaryBasicBlock *SuccBB : BB->successors())
3281       if (!validateBlock(SuccBB, "successor"))
3282         return false;
3283     for (const BinaryBasicBlock *LP : BB->landing_pads())
3284       if (!validateBlock(LP, "landing pad"))
3285         return false;
3286     for (const BinaryBasicBlock *Thrower : BB->throwers())
3287       if (!validateBlock(Thrower, "thrower"))
3288         return false;
3289   }
3290 
3291   for (const BinaryBasicBlock *BB : BasicBlocks) {
3292     std::unordered_set<const BinaryBasicBlock *> BBLandingPads;
3293     for (const BinaryBasicBlock *LP : BB->landing_pads()) {
3294       if (BBLandingPads.count(LP)) {
3295         BC.errs() << "BOLT-ERROR: duplicate landing pad detected in"
3296                   << BB->getName() << " in function " << *this << '\n';
3297         return false;
3298       }
3299       BBLandingPads.insert(LP);
3300     }
3301 
3302     std::unordered_set<const BinaryBasicBlock *> BBThrowers;
3303     for (const BinaryBasicBlock *Thrower : BB->throwers()) {
3304       if (BBThrowers.count(Thrower)) {
3305         BC.errs() << "BOLT-ERROR: duplicate thrower detected in"
3306                   << BB->getName() << " in function " << *this << '\n';
3307         return false;
3308       }
3309       BBThrowers.insert(Thrower);
3310     }
3311 
3312     for (const BinaryBasicBlock *LPBlock : BB->landing_pads()) {
3313       if (!llvm::is_contained(LPBlock->throwers(), BB)) {
3314         BC.errs() << "BOLT-ERROR: inconsistent landing pad detected in "
3315                   << *this << ": " << BB->getName()
3316                   << " is in LandingPads but not in " << LPBlock->getName()
3317                   << " Throwers\n";
3318         return false;
3319       }
3320     }
3321     for (const BinaryBasicBlock *Thrower : BB->throwers()) {
3322       if (!llvm::is_contained(Thrower->landing_pads(), BB)) {
3323         BC.errs() << "BOLT-ERROR: inconsistent thrower detected in " << *this
3324                   << ": " << BB->getName() << " is in Throwers list but not in "
3325                   << Thrower->getName() << " LandingPads\n";
3326         return false;
3327       }
3328     }
3329   }
3330 
3331   return true;
3332 }
3333 
3334 void BinaryFunction::fixBranches() {
3335   auto &MIB = BC.MIB;
3336   MCContext *Ctx = BC.Ctx.get();
3337 
3338   for (BinaryBasicBlock *BB : BasicBlocks) {
3339     const MCSymbol *TBB = nullptr;
3340     const MCSymbol *FBB = nullptr;
3341     MCInst *CondBranch = nullptr;
3342     MCInst *UncondBranch = nullptr;
3343     if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch))
3344       continue;
3345 
3346     // We will create unconditional branch with correct destination if needed.
3347     if (UncondBranch)
3348       BB->eraseInstruction(BB->findInstruction(UncondBranch));
3349 
3350     // Basic block that follows the current one in the final layout.
3351     const BinaryBasicBlock *const NextBB =
3352         Layout.getBasicBlockAfter(BB, /*IgnoreSplits=*/false);
3353 
3354     if (BB->succ_size() == 1) {
3355       // __builtin_unreachable() could create a conditional branch that
3356       // falls-through into the next function - hence the block will have only
3357       // one valid successor. Since behaviour is undefined - we replace
3358       // the conditional branch with an unconditional if required.
3359       if (CondBranch)
3360         BB->eraseInstruction(BB->findInstruction(CondBranch));
3361       if (BB->getSuccessor() == NextBB)
3362         continue;
3363       BB->addBranchInstruction(BB->getSuccessor());
3364     } else if (BB->succ_size() == 2) {
3365       assert(CondBranch && "conditional branch expected");
3366       const BinaryBasicBlock *TSuccessor = BB->getConditionalSuccessor(true);
3367       const BinaryBasicBlock *FSuccessor = BB->getConditionalSuccessor(false);
3368 
3369       // Eliminate unnecessary conditional branch.
3370       if (TSuccessor == FSuccessor) {
3371         // FIXME: at the moment, we cannot safely remove static key branches.
3372         if (MIB->isDynamicBranch(*CondBranch)) {
3373           if (opts::Verbosity) {
3374             BC.outs()
3375                 << "BOLT-INFO: unable to remove redundant dynamic branch in "
3376                 << *this << '\n';
3377           }
3378           continue;
3379         }
3380 
3381         BB->removeDuplicateConditionalSuccessor(CondBranch);
3382         if (TSuccessor != NextBB)
3383           BB->addBranchInstruction(TSuccessor);
3384         continue;
3385       }
3386 
3387       // Reverse branch condition and swap successors.
3388       auto swapSuccessors = [&]() {
3389         if (!MIB->isReversibleBranch(*CondBranch)) {
3390           if (opts::Verbosity) {
3391             BC.outs() << "BOLT-INFO: unable to swap successors in " << *this
3392                       << '\n';
3393           }
3394           return false;
3395         }
3396         std::swap(TSuccessor, FSuccessor);
3397         BB->swapConditionalSuccessors();
3398         auto L = BC.scopeLock();
3399         MIB->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(), Ctx);
3400         return true;
3401       };
3402 
3403       // Check whether the next block is a "taken" target and try to swap it
3404       // with a "fall-through" target.
3405       if (TSuccessor == NextBB && swapSuccessors())
3406         continue;
3407 
3408       // Update conditional branch destination if needed.
3409       if (MIB->getTargetSymbol(*CondBranch) != TSuccessor->getLabel()) {
3410         auto L = BC.scopeLock();
3411         MIB->replaceBranchTarget(*CondBranch, TSuccessor->getLabel(), Ctx);
3412       }
3413 
3414       // No need for the unconditional branch.
3415       if (FSuccessor == NextBB)
3416         continue;
3417 
3418       if (BC.isX86()) {
3419         // We are going to generate two branches. Check if their targets are in
3420         // the same fragment as this block. If only one target is in the same
3421         // fragment, make it the destination of the conditional branch. There
3422         // is a chance it will be a short branch which takes 4 bytes fewer than
3423         // a long conditional branch. For unconditional branch, the difference
3424         // is 3 bytes.
3425         if (BB->getFragmentNum() != TSuccessor->getFragmentNum() &&
3426             BB->getFragmentNum() == FSuccessor->getFragmentNum())
3427           swapSuccessors();
3428       }
3429 
3430       BB->addBranchInstruction(FSuccessor);
3431     }
3432     // Cases where the number of successors is 0 (block ends with a
3433     // terminator) or more than 2 (switch table) don't require branch
3434     // instruction adjustments.
3435   }
3436   assert((!isSimple() || validateCFG()) &&
3437          "Invalid CFG detected after fixing branches");
3438 }
3439 
3440 void BinaryFunction::propagateGnuArgsSizeInfo(
3441     MCPlusBuilder::AllocatorIdTy AllocId) {
3442   assert(CurrentState == State::Disassembled && "unexpected function state");
3443 
3444   if (!hasEHRanges() || !usesGnuArgsSize())
3445     return;
3446 
3447   // The current value of DW_CFA_GNU_args_size affects all following
3448   // invoke instructions until the next CFI overrides it.
3449   // It is important to iterate basic blocks in the original order when
3450   // assigning the value.
3451   uint64_t CurrentGnuArgsSize = 0;
3452   for (BinaryBasicBlock *BB : BasicBlocks) {
3453     for (auto II = BB->begin(); II != BB->end();) {
3454       MCInst &Instr = *II;
3455       if (BC.MIB->isCFI(Instr)) {
3456         const MCCFIInstruction *CFI = getCFIFor(Instr);
3457         if (CFI->getOperation() == MCCFIInstruction::OpGnuArgsSize) {
3458           CurrentGnuArgsSize = CFI->getOffset();
3459           // Delete DW_CFA_GNU_args_size instructions and only regenerate
3460           // during the final code emission. The information is embedded
3461           // inside call instructions.
3462           II = BB->erasePseudoInstruction(II);
3463           continue;
3464         }
3465       } else if (BC.MIB->isInvoke(Instr)) {
3466         // Add the value of GNU_args_size as an extra operand to invokes.
3467         BC.MIB->addGnuArgsSize(Instr, CurrentGnuArgsSize);
3468       }
3469       ++II;
3470     }
3471   }
3472 }
3473 
3474 void BinaryFunction::postProcessBranches() {
3475   if (!isSimple())
3476     return;
3477   for (BinaryBasicBlock &BB : blocks()) {
3478     auto LastInstrRI = BB.getLastNonPseudo();
3479     if (BB.succ_size() == 1) {
3480       if (LastInstrRI != BB.rend() &&
3481           BC.MIB->isConditionalBranch(*LastInstrRI)) {
3482         // __builtin_unreachable() could create a conditional branch that
3483         // falls-through into the next function - hence the block will have only
3484         // one valid successor. Such behaviour is undefined and thus we remove
3485         // the conditional branch while leaving a valid successor.
3486         BB.eraseInstruction(std::prev(LastInstrRI.base()));
3487         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: erasing conditional branch in "
3488                           << BB.getName() << " in function " << *this << '\n');
3489       }
3490     } else if (BB.succ_size() == 0) {
3491       // Ignore unreachable basic blocks.
3492       if (BB.pred_size() == 0 || BB.isLandingPad())
3493         continue;
3494 
3495       // If it's the basic block that does not end up with a terminator - we
3496       // insert a return instruction unless it's a call instruction.
3497       if (LastInstrRI == BB.rend()) {
3498         LLVM_DEBUG(
3499             dbgs() << "BOLT-DEBUG: at least one instruction expected in BB "
3500                    << BB.getName() << " in function " << *this << '\n');
3501         continue;
3502       }
3503       if (!BC.MIB->isTerminator(*LastInstrRI) &&
3504           !BC.MIB->isCall(*LastInstrRI)) {
3505         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding return to basic block "
3506                           << BB.getName() << " in function " << *this << '\n');
3507         MCInst ReturnInstr;
3508         BC.MIB->createReturn(ReturnInstr);
3509         BB.addInstruction(ReturnInstr);
3510       }
3511     }
3512   }
3513   assert(validateCFG() && "invalid CFG");
3514 }
3515 
3516 MCSymbol *BinaryFunction::addEntryPointAtOffset(uint64_t Offset) {
3517   assert(Offset && "cannot add primary entry point");
3518   assert(CurrentState == State::Empty || CurrentState == State::Disassembled);
3519 
3520   const uint64_t EntryPointAddress = getAddress() + Offset;
3521   MCSymbol *LocalSymbol = getOrCreateLocalLabel(EntryPointAddress);
3522 
3523   MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(LocalSymbol);
3524   if (EntrySymbol)
3525     return EntrySymbol;
3526 
3527   if (BinaryData *EntryBD = BC.getBinaryDataAtAddress(EntryPointAddress)) {
3528     EntrySymbol = EntryBD->getSymbol();
3529   } else {
3530     EntrySymbol = BC.getOrCreateGlobalSymbol(
3531         EntryPointAddress, Twine("__ENTRY_") + getOneName() + "@");
3532   }
3533   SecondaryEntryPoints[LocalSymbol] = EntrySymbol;
3534 
3535   BC.setSymbolToFunctionMap(EntrySymbol, this);
3536 
3537   return EntrySymbol;
3538 }
3539 
3540 MCSymbol *BinaryFunction::addEntryPoint(const BinaryBasicBlock &BB) {
3541   assert(CurrentState == State::CFG &&
3542          "basic block can be added as an entry only in a function with CFG");
3543 
3544   if (&BB == BasicBlocks.front())
3545     return getSymbol();
3546 
3547   MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(BB);
3548   if (EntrySymbol)
3549     return EntrySymbol;
3550 
3551   EntrySymbol =
3552       BC.Ctx->getOrCreateSymbol("__ENTRY_" + BB.getLabel()->getName());
3553 
3554   SecondaryEntryPoints[BB.getLabel()] = EntrySymbol;
3555 
3556   BC.setSymbolToFunctionMap(EntrySymbol, this);
3557 
3558   return EntrySymbol;
3559 }
3560 
3561 MCSymbol *BinaryFunction::getSymbolForEntryID(uint64_t EntryID) {
3562   if (EntryID == 0)
3563     return getSymbol();
3564 
3565   if (!isMultiEntry())
3566     return nullptr;
3567 
3568   uint64_t NumEntries = 1;
3569   if (hasCFG()) {
3570     for (BinaryBasicBlock *BB : BasicBlocks) {
3571       MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
3572       if (!EntrySymbol)
3573         continue;
3574       if (NumEntries == EntryID)
3575         return EntrySymbol;
3576       ++NumEntries;
3577     }
3578   } else {
3579     for (std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3580       MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3581       if (!EntrySymbol)
3582         continue;
3583       if (NumEntries == EntryID)
3584         return EntrySymbol;
3585       ++NumEntries;
3586     }
3587   }
3588 
3589   return nullptr;
3590 }
3591 
3592 uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
3593   if (!isMultiEntry())
3594     return 0;
3595 
3596   for (const MCSymbol *FunctionSymbol : getSymbols())
3597     if (FunctionSymbol == Symbol)
3598       return 0;
3599 
3600   // Check all secondary entries available as either basic blocks or lables.
3601   uint64_t NumEntries = 1;
3602   for (const BinaryBasicBlock *BB : BasicBlocks) {
3603     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
3604     if (!EntrySymbol)
3605       continue;
3606     if (EntrySymbol == Symbol)
3607       return NumEntries;
3608     ++NumEntries;
3609   }
3610   NumEntries = 1;
3611   for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3612     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3613     if (!EntrySymbol)
3614       continue;
3615     if (EntrySymbol == Symbol)
3616       return NumEntries;
3617     ++NumEntries;
3618   }
3619 
3620   llvm_unreachable("symbol not found");
3621 }
3622 
3623 bool BinaryFunction::forEachEntryPoint(EntryPointCallbackTy Callback) const {
3624   bool Status = Callback(0, getSymbol());
3625   if (!isMultiEntry())
3626     return Status;
3627 
3628   for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3629     if (!Status)
3630       break;
3631 
3632     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3633     if (!EntrySymbol)
3634       continue;
3635 
3636     Status = Callback(KV.first, EntrySymbol);
3637   }
3638 
3639   return Status;
3640 }
3641 
3642 BinaryFunction::BasicBlockListType BinaryFunction::dfs() const {
3643   BasicBlockListType DFS;
3644   std::stack<BinaryBasicBlock *> Stack;
3645   std::set<BinaryBasicBlock *> Visited;
3646 
3647   // Push entry points to the stack in reverse order.
3648   //
3649   // NB: we rely on the original order of entries to match.
3650   SmallVector<BinaryBasicBlock *> EntryPoints;
3651   llvm::copy_if(BasicBlocks, std::back_inserter(EntryPoints),
3652           [&](const BinaryBasicBlock *const BB) { return isEntryPoint(*BB); });
3653   // Sort entry points by their offset to make sure we got them in the right
3654   // order.
3655   llvm::stable_sort(EntryPoints, [](const BinaryBasicBlock *const A,
3656                               const BinaryBasicBlock *const B) {
3657     return A->getOffset() < B->getOffset();
3658   });
3659   for (BinaryBasicBlock *const BB : reverse(EntryPoints))
3660     Stack.push(BB);
3661 
3662   while (!Stack.empty()) {
3663     BinaryBasicBlock *BB = Stack.top();
3664     Stack.pop();
3665 
3666     if (Visited.find(BB) != Visited.end())
3667       continue;
3668     Visited.insert(BB);
3669     DFS.push_back(BB);
3670 
3671     for (BinaryBasicBlock *SuccBB : BB->landing_pads()) {
3672       Stack.push(SuccBB);
3673     }
3674 
3675     const MCSymbol *TBB = nullptr;
3676     const MCSymbol *FBB = nullptr;
3677     MCInst *CondBranch = nullptr;
3678     MCInst *UncondBranch = nullptr;
3679     if (BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch) && CondBranch &&
3680         BB->succ_size() == 2) {
3681       if (BC.MIB->getCanonicalBranchCondCode(BC.MIB->getCondCode(
3682               *CondBranch)) == BC.MIB->getCondCode(*CondBranch)) {
3683         Stack.push(BB->getConditionalSuccessor(true));
3684         Stack.push(BB->getConditionalSuccessor(false));
3685       } else {
3686         Stack.push(BB->getConditionalSuccessor(false));
3687         Stack.push(BB->getConditionalSuccessor(true));
3688       }
3689     } else {
3690       for (BinaryBasicBlock *SuccBB : BB->successors()) {
3691         Stack.push(SuccBB);
3692       }
3693     }
3694   }
3695 
3696   return DFS;
3697 }
3698 
3699 size_t BinaryFunction::computeHash(bool UseDFS, HashFunction HashFunction,
3700                                    OperandHashFuncTy OperandHashFunc) const {
3701   LLVM_DEBUG({
3702     dbgs() << "BOLT-DEBUG: computeHash " << getPrintName() << ' '
3703            << (UseDFS ? "dfs" : "bin") << " order "
3704            << (HashFunction == HashFunction::StdHash ? "std::hash" : "xxh3")
3705            << '\n';
3706   });
3707 
3708   if (size() == 0)
3709     return 0;
3710 
3711   assert(hasCFG() && "function is expected to have CFG");
3712 
3713   SmallVector<const BinaryBasicBlock *, 0> Order;
3714   if (UseDFS)
3715     llvm::copy(dfs(), std::back_inserter(Order));
3716   else
3717     llvm::copy(Layout.blocks(), std::back_inserter(Order));
3718 
3719   // The hash is computed by creating a string of all instruction opcodes and
3720   // possibly their operands and then hashing that string with std::hash.
3721   std::string HashString;
3722   for (const BinaryBasicBlock *BB : Order)
3723     HashString.append(hashBlock(BC, *BB, OperandHashFunc));
3724 
3725   switch (HashFunction) {
3726   case HashFunction::StdHash:
3727     return Hash = std::hash<std::string>{}(HashString);
3728   case HashFunction::XXH3:
3729     return Hash = llvm::xxh3_64bits(HashString);
3730   }
3731   llvm_unreachable("Unhandled HashFunction");
3732 }
3733 
3734 void BinaryFunction::insertBasicBlocks(
3735     BinaryBasicBlock *Start,
3736     std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
3737     const bool UpdateLayout, const bool UpdateCFIState,
3738     const bool RecomputeLandingPads) {
3739   const int64_t StartIndex = Start ? getIndex(Start) : -1LL;
3740   const size_t NumNewBlocks = NewBBs.size();
3741 
3742   BasicBlocks.insert(BasicBlocks.begin() + (StartIndex + 1), NumNewBlocks,
3743                      nullptr);
3744 
3745   int64_t I = StartIndex + 1;
3746   for (std::unique_ptr<BinaryBasicBlock> &BB : NewBBs) {
3747     assert(!BasicBlocks[I]);
3748     BasicBlocks[I++] = BB.release();
3749   }
3750 
3751   if (RecomputeLandingPads)
3752     recomputeLandingPads();
3753   else
3754     updateBBIndices(0);
3755 
3756   if (UpdateLayout)
3757     updateLayout(Start, NumNewBlocks);
3758 
3759   if (UpdateCFIState)
3760     updateCFIState(Start, NumNewBlocks);
3761 }
3762 
3763 BinaryFunction::iterator BinaryFunction::insertBasicBlocks(
3764     BinaryFunction::iterator StartBB,
3765     std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
3766     const bool UpdateLayout, const bool UpdateCFIState,
3767     const bool RecomputeLandingPads) {
3768   const unsigned StartIndex = getIndex(&*StartBB);
3769   const size_t NumNewBlocks = NewBBs.size();
3770 
3771   BasicBlocks.insert(BasicBlocks.begin() + StartIndex + 1, NumNewBlocks,
3772                      nullptr);
3773   auto RetIter = BasicBlocks.begin() + StartIndex + 1;
3774 
3775   unsigned I = StartIndex + 1;
3776   for (std::unique_ptr<BinaryBasicBlock> &BB : NewBBs) {
3777     assert(!BasicBlocks[I]);
3778     BasicBlocks[I++] = BB.release();
3779   }
3780 
3781   if (RecomputeLandingPads)
3782     recomputeLandingPads();
3783   else
3784     updateBBIndices(0);
3785 
3786   if (UpdateLayout)
3787     updateLayout(*std::prev(RetIter), NumNewBlocks);
3788 
3789   if (UpdateCFIState)
3790     updateCFIState(*std::prev(RetIter), NumNewBlocks);
3791 
3792   return RetIter;
3793 }
3794 
3795 void BinaryFunction::updateBBIndices(const unsigned StartIndex) {
3796   for (unsigned I = StartIndex; I < BasicBlocks.size(); ++I)
3797     BasicBlocks[I]->Index = I;
3798 }
3799 
3800 void BinaryFunction::updateCFIState(BinaryBasicBlock *Start,
3801                                     const unsigned NumNewBlocks) {
3802   const int32_t CFIState = Start->getCFIStateAtExit();
3803   const unsigned StartIndex = getIndex(Start) + 1;
3804   for (unsigned I = 0; I < NumNewBlocks; ++I)
3805     BasicBlocks[StartIndex + I]->setCFIState(CFIState);
3806 }
3807 
3808 void BinaryFunction::updateLayout(BinaryBasicBlock *Start,
3809                                   const unsigned NumNewBlocks) {
3810   BasicBlockListType::iterator Begin;
3811   BasicBlockListType::iterator End;
3812 
3813   // If start not provided copy new blocks from the beginning of BasicBlocks
3814   if (!Start) {
3815     Begin = BasicBlocks.begin();
3816     End = BasicBlocks.begin() + NumNewBlocks;
3817   } else {
3818     unsigned StartIndex = getIndex(Start);
3819     Begin = std::next(BasicBlocks.begin(), StartIndex + 1);
3820     End = std::next(BasicBlocks.begin(), StartIndex + NumNewBlocks + 1);
3821   }
3822 
3823   // Insert new blocks in the layout immediately after Start.
3824   Layout.insertBasicBlocks(Start, {Begin, End});
3825   Layout.updateLayoutIndices();
3826 }
3827 
3828 bool BinaryFunction::checkForAmbiguousJumpTables() {
3829   SmallSet<uint64_t, 4> JumpTables;
3830   for (BinaryBasicBlock *&BB : BasicBlocks) {
3831     for (MCInst &Inst : *BB) {
3832       if (!BC.MIB->isIndirectBranch(Inst))
3833         continue;
3834       uint64_t JTAddress = BC.MIB->getJumpTable(Inst);
3835       if (!JTAddress)
3836         continue;
3837       // This address can be inside another jump table, but we only consider
3838       // it ambiguous when the same start address is used, not the same JT
3839       // object.
3840       if (!JumpTables.count(JTAddress)) {
3841         JumpTables.insert(JTAddress);
3842         continue;
3843       }
3844       return true;
3845     }
3846   }
3847   return false;
3848 }
3849 
3850 void BinaryFunction::disambiguateJumpTables(
3851     MCPlusBuilder::AllocatorIdTy AllocId) {
3852   assert((opts::JumpTables != JTS_BASIC && isSimple()) || !BC.HasRelocations);
3853   SmallPtrSet<JumpTable *, 4> JumpTables;
3854   for (BinaryBasicBlock *&BB : BasicBlocks) {
3855     for (MCInst &Inst : *BB) {
3856       if (!BC.MIB->isIndirectBranch(Inst))
3857         continue;
3858       JumpTable *JT = getJumpTable(Inst);
3859       if (!JT)
3860         continue;
3861       auto Iter = JumpTables.find(JT);
3862       if (Iter == JumpTables.end()) {
3863         JumpTables.insert(JT);
3864         continue;
3865       }
3866       // This instruction is an indirect jump using a jump table, but it is
3867       // using the same jump table of another jump. Try all our tricks to
3868       // extract the jump table symbol and make it point to a new, duplicated JT
3869       MCPhysReg BaseReg1;
3870       uint64_t Scale;
3871       const MCSymbol *Target;
3872       // In case we match if our first matcher, first instruction is the one to
3873       // patch
3874       MCInst *JTLoadInst = &Inst;
3875       // Try a standard indirect jump matcher, scale 8
3876       std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher =
3877           BC.MIB->matchIndJmp(BC.MIB->matchReg(BaseReg1),
3878                               BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3879                               /*Offset=*/BC.MIB->matchSymbol(Target));
3880       if (!IndJmpMatcher->match(
3881               *BC.MRI, *BC.MIB,
3882               MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3883           BaseReg1 != BC.MIB->getNoRegister() || Scale != 8) {
3884         MCPhysReg BaseReg2;
3885         uint64_t Offset;
3886         // Standard JT matching failed. Trying now:
3887         //     movq  "jt.2397/1"(,%rax,8), %rax
3888         //     jmpq  *%rax
3889         std::unique_ptr<MCPlusBuilder::MCInstMatcher> LoadMatcherOwner =
3890             BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg1),
3891                               BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3892                               /*Offset=*/BC.MIB->matchSymbol(Target));
3893         MCPlusBuilder::MCInstMatcher *LoadMatcher = LoadMatcherOwner.get();
3894         std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher2 =
3895             BC.MIB->matchIndJmp(std::move(LoadMatcherOwner));
3896         if (!IndJmpMatcher2->match(
3897                 *BC.MRI, *BC.MIB,
3898                 MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3899             BaseReg1 != BC.MIB->getNoRegister() || Scale != 8) {
3900           // JT matching failed. Trying now:
3901           // PIC-style matcher, scale 4
3902           //    addq    %rdx, %rsi
3903           //    addq    %rdx, %rdi
3904           //    leaq    DATAat0x402450(%rip), %r11
3905           //    movslq  (%r11,%rdx,4), %rcx
3906           //    addq    %r11, %rcx
3907           //    jmpq    *%rcx # JUMPTABLE @0x402450
3908           std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICIndJmpMatcher =
3909               BC.MIB->matchIndJmp(BC.MIB->matchAdd(
3910                   BC.MIB->matchReg(BaseReg1),
3911                   BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg2),
3912                                     BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3913                                     BC.MIB->matchImm(Offset))));
3914           std::unique_ptr<MCPlusBuilder::MCInstMatcher> LEAMatcherOwner =
3915               BC.MIB->matchLoadAddr(BC.MIB->matchSymbol(Target));
3916           MCPlusBuilder::MCInstMatcher *LEAMatcher = LEAMatcherOwner.get();
3917           std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICBaseAddrMatcher =
3918               BC.MIB->matchIndJmp(BC.MIB->matchAdd(std::move(LEAMatcherOwner),
3919                                                    BC.MIB->matchAnyOperand()));
3920           if (!PICIndJmpMatcher->match(
3921                   *BC.MRI, *BC.MIB,
3922                   MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3923               Scale != 4 || BaseReg1 != BaseReg2 || Offset != 0 ||
3924               !PICBaseAddrMatcher->match(
3925                   *BC.MRI, *BC.MIB,
3926                   MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1)) {
3927             llvm_unreachable("Failed to extract jump table base");
3928             continue;
3929           }
3930           // Matched PIC, identify the instruction with the reference to the JT
3931           JTLoadInst = LEAMatcher->CurInst;
3932         } else {
3933           // Matched non-PIC
3934           JTLoadInst = LoadMatcher->CurInst;
3935         }
3936       }
3937 
3938       uint64_t NewJumpTableID = 0;
3939       const MCSymbol *NewJTLabel;
3940       std::tie(NewJumpTableID, NewJTLabel) =
3941           BC.duplicateJumpTable(*this, JT, Target);
3942       {
3943         auto L = BC.scopeLock();
3944         BC.MIB->replaceMemOperandDisp(*JTLoadInst, NewJTLabel, BC.Ctx.get());
3945       }
3946       // We use a unique ID with the high bit set as address for this "injected"
3947       // jump table (not originally in the input binary).
3948       BC.MIB->setJumpTable(Inst, NewJumpTableID, 0, AllocId);
3949     }
3950   }
3951 }
3952 
3953 bool BinaryFunction::replaceJumpTableEntryIn(BinaryBasicBlock *BB,
3954                                              BinaryBasicBlock *OldDest,
3955                                              BinaryBasicBlock *NewDest) {
3956   MCInst *Instr = BB->getLastNonPseudoInstr();
3957   if (!Instr || !BC.MIB->isIndirectBranch(*Instr))
3958     return false;
3959   uint64_t JTAddress = BC.MIB->getJumpTable(*Instr);
3960   assert(JTAddress && "Invalid jump table address");
3961   JumpTable *JT = getJumpTableContainingAddress(JTAddress);
3962   assert(JT && "No jump table structure for this indirect branch");
3963   bool Patched = JT->replaceDestination(JTAddress, OldDest->getLabel(),
3964                                         NewDest->getLabel());
3965   (void)Patched;
3966   assert(Patched && "Invalid entry to be replaced in jump table");
3967   return true;
3968 }
3969 
3970 BinaryBasicBlock *BinaryFunction::splitEdge(BinaryBasicBlock *From,
3971                                             BinaryBasicBlock *To) {
3972   // Create intermediate BB
3973   MCSymbol *Tmp;
3974   {
3975     auto L = BC.scopeLock();
3976     Tmp = BC.Ctx->createNamedTempSymbol("SplitEdge");
3977   }
3978   // Link new BBs to the original input offset of the From BB, so we can map
3979   // samples recorded in new BBs back to the original BB seem in the input
3980   // binary (if using BAT)
3981   std::unique_ptr<BinaryBasicBlock> NewBB = createBasicBlock(Tmp);
3982   NewBB->setOffset(From->getInputOffset());
3983   BinaryBasicBlock *NewBBPtr = NewBB.get();
3984 
3985   // Update "From" BB
3986   auto I = From->succ_begin();
3987   auto BI = From->branch_info_begin();
3988   for (; I != From->succ_end(); ++I) {
3989     if (*I == To)
3990       break;
3991     ++BI;
3992   }
3993   assert(I != From->succ_end() && "Invalid CFG edge in splitEdge!");
3994   uint64_t OrigCount = BI->Count;
3995   uint64_t OrigMispreds = BI->MispredictedCount;
3996   replaceJumpTableEntryIn(From, To, NewBBPtr);
3997   From->replaceSuccessor(To, NewBBPtr, OrigCount, OrigMispreds);
3998 
3999   NewBB->addSuccessor(To, OrigCount, OrigMispreds);
4000   NewBB->setExecutionCount(OrigCount);
4001   NewBB->setIsCold(From->isCold());
4002 
4003   // Update CFI and BB layout with new intermediate BB
4004   std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs;
4005   NewBBs.emplace_back(std::move(NewBB));
4006   insertBasicBlocks(From, std::move(NewBBs), true, true,
4007                     /*RecomputeLandingPads=*/false);
4008   return NewBBPtr;
4009 }
4010 
4011 void BinaryFunction::deleteConservativeEdges() {
4012   // Our goal is to aggressively remove edges from the CFG that we believe are
4013   // wrong. This is used for instrumentation, where it is safe to remove
4014   // fallthrough edges because we won't reorder blocks.
4015   for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) {
4016     BinaryBasicBlock *BB = *I;
4017     if (BB->succ_size() != 1 || BB->size() == 0)
4018       continue;
4019 
4020     auto NextBB = std::next(I);
4021     MCInst *Last = BB->getLastNonPseudoInstr();
4022     // Fallthrough is a landing pad? Delete this edge (as long as we don't
4023     // have a direct jump to it)
4024     if ((*BB->succ_begin())->isLandingPad() && NextBB != E &&
4025         *BB->succ_begin() == *NextBB && Last && !BC.MIB->isBranch(*Last)) {
4026       BB->removeAllSuccessors();
4027       continue;
4028     }
4029 
4030     // Look for suspicious calls at the end of BB where gcc may optimize it and
4031     // remove the jump to the epilogue when it knows the call won't return.
4032     if (!Last || !BC.MIB->isCall(*Last))
4033       continue;
4034 
4035     const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(*Last);
4036     if (!CalleeSymbol)
4037       continue;
4038 
4039     StringRef CalleeName = CalleeSymbol->getName();
4040     if (CalleeName != "__cxa_throw@PLT" && CalleeName != "_Unwind_Resume@PLT" &&
4041         CalleeName != "__cxa_rethrow@PLT" && CalleeName != "exit@PLT" &&
4042         CalleeName != "abort@PLT")
4043       continue;
4044 
4045     BB->removeAllSuccessors();
4046   }
4047 }
4048 
4049 bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
4050                                           uint64_t SymbolSize) const {
4051   // If this symbol is in a different section from the one where the
4052   // function symbol is, don't consider it as valid.
4053   if (!getOriginSection()->containsAddress(
4054           cantFail(Symbol.getAddress(), "cannot get symbol address")))
4055     return false;
4056 
4057   // Some symbols are tolerated inside function bodies, others are not.
4058   // The real function boundaries may not be known at this point.
4059   if (BC.isMarker(Symbol))
4060     return true;
4061 
4062   // It's okay to have a zero-sized symbol in the middle of non-zero-sized
4063   // function.
4064   if (SymbolSize == 0 && containsAddress(cantFail(Symbol.getAddress())))
4065     return true;
4066 
4067   if (cantFail(Symbol.getType()) != SymbolRef::ST_Unknown)
4068     return false;
4069 
4070   if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)
4071     return false;
4072 
4073   return true;
4074 }
4075 
4076 void BinaryFunction::adjustExecutionCount(uint64_t Count) {
4077   if (getKnownExecutionCount() == 0 || Count == 0)
4078     return;
4079 
4080   if (ExecutionCount < Count)
4081     Count = ExecutionCount;
4082 
4083   double AdjustmentRatio = ((double)ExecutionCount - Count) / ExecutionCount;
4084   if (AdjustmentRatio < 0.0)
4085     AdjustmentRatio = 0.0;
4086 
4087   for (BinaryBasicBlock &BB : blocks())
4088     BB.adjustExecutionCount(AdjustmentRatio);
4089 
4090   ExecutionCount -= Count;
4091 }
4092 
4093 BinaryFunction::~BinaryFunction() {
4094   for (BinaryBasicBlock *BB : BasicBlocks)
4095     delete BB;
4096   for (BinaryBasicBlock *BB : DeletedBasicBlocks)
4097     delete BB;
4098 }
4099 
4100 void BinaryFunction::constructDomTree() {
4101   BDT.reset(new BinaryDominatorTree);
4102   BDT->recalculate(*this);
4103 }
4104 
4105 void BinaryFunction::calculateLoopInfo() {
4106   if (!hasDomTree())
4107     constructDomTree();
4108   // Discover loops.
4109   BLI.reset(new BinaryLoopInfo());
4110   BLI->analyze(getDomTree());
4111 
4112   // Traverse discovered loops and add depth and profile information.
4113   std::stack<BinaryLoop *> St;
4114   for (auto I = BLI->begin(), E = BLI->end(); I != E; ++I) {
4115     St.push(*I);
4116     ++BLI->OuterLoops;
4117   }
4118 
4119   while (!St.empty()) {
4120     BinaryLoop *L = St.top();
4121     St.pop();
4122     ++BLI->TotalLoops;
4123     BLI->MaximumDepth = std::max(L->getLoopDepth(), BLI->MaximumDepth);
4124 
4125     // Add nested loops in the stack.
4126     for (BinaryLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
4127       St.push(*I);
4128 
4129     // Skip if no valid profile is found.
4130     if (!hasValidProfile()) {
4131       L->EntryCount = COUNT_NO_PROFILE;
4132       L->ExitCount = COUNT_NO_PROFILE;
4133       L->TotalBackEdgeCount = COUNT_NO_PROFILE;
4134       continue;
4135     }
4136 
4137     // Compute back edge count.
4138     SmallVector<BinaryBasicBlock *, 1> Latches;
4139     L->getLoopLatches(Latches);
4140 
4141     for (BinaryBasicBlock *Latch : Latches) {
4142       auto BI = Latch->branch_info_begin();
4143       for (BinaryBasicBlock *Succ : Latch->successors()) {
4144         if (Succ == L->getHeader()) {
4145           assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
4146                  "profile data not found");
4147           L->TotalBackEdgeCount += BI->Count;
4148         }
4149         ++BI;
4150       }
4151     }
4152 
4153     // Compute entry count.
4154     L->EntryCount = L->getHeader()->getExecutionCount() - L->TotalBackEdgeCount;
4155 
4156     // Compute exit count.
4157     SmallVector<BinaryLoop::Edge, 1> ExitEdges;
4158     L->getExitEdges(ExitEdges);
4159     for (BinaryLoop::Edge &Exit : ExitEdges) {
4160       const BinaryBasicBlock *Exiting = Exit.first;
4161       const BinaryBasicBlock *ExitTarget = Exit.second;
4162       auto BI = Exiting->branch_info_begin();
4163       for (BinaryBasicBlock *Succ : Exiting->successors()) {
4164         if (Succ == ExitTarget) {
4165           assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
4166                  "profile data not found");
4167           L->ExitCount += BI->Count;
4168         }
4169         ++BI;
4170       }
4171     }
4172   }
4173 }
4174 
4175 void BinaryFunction::updateOutputValues(const BOLTLinker &Linker) {
4176   if (!isEmitted()) {
4177     assert(!isInjected() && "injected function should be emitted");
4178     setOutputAddress(getAddress());
4179     setOutputSize(getSize());
4180     return;
4181   }
4182 
4183   const auto SymbolInfo = Linker.lookupSymbolInfo(getSymbol()->getName());
4184   assert(SymbolInfo && "Cannot find function entry symbol");
4185   setOutputAddress(SymbolInfo->Address);
4186   setOutputSize(SymbolInfo->Size);
4187 
4188   if (BC.HasRelocations || isInjected()) {
4189     if (hasConstantIsland()) {
4190       const auto DataAddress =
4191           Linker.lookupSymbol(getFunctionConstantIslandLabel()->getName());
4192       assert(DataAddress && "Cannot find function CI symbol");
4193       setOutputDataAddress(*DataAddress);
4194       for (auto It : Islands->Offsets) {
4195         const uint64_t OldOffset = It.first;
4196         BinaryData *BD = BC.getBinaryDataAtAddress(getAddress() + OldOffset);
4197         if (!BD)
4198           continue;
4199 
4200         MCSymbol *Symbol = It.second;
4201         const auto NewAddress = Linker.lookupSymbol(Symbol->getName());
4202         assert(NewAddress && "Cannot find CI symbol");
4203         auto &Section = *getCodeSection();
4204         const auto NewOffset = *NewAddress - Section.getOutputAddress();
4205         BD->setOutputLocation(Section, NewOffset);
4206       }
4207     }
4208     if (isSplit()) {
4209       for (FunctionFragment &FF : getLayout().getSplitFragments()) {
4210         ErrorOr<BinarySection &> ColdSection =
4211             getCodeSection(FF.getFragmentNum());
4212         // If fragment is empty, cold section might not exist
4213         if (FF.empty() && ColdSection.getError())
4214           continue;
4215 
4216         const MCSymbol *ColdStartSymbol = getSymbol(FF.getFragmentNum());
4217         // If fragment is empty, symbol might have not been emitted
4218         if (FF.empty() && (!ColdStartSymbol || !ColdStartSymbol->isDefined()) &&
4219             !hasConstantIsland())
4220           continue;
4221         assert(ColdStartSymbol && ColdStartSymbol->isDefined() &&
4222                "split function should have defined cold symbol");
4223         const auto ColdStartSymbolInfo =
4224             Linker.lookupSymbolInfo(ColdStartSymbol->getName());
4225         assert(ColdStartSymbolInfo && "Cannot find cold start symbol");
4226         FF.setAddress(ColdStartSymbolInfo->Address);
4227         FF.setImageSize(ColdStartSymbolInfo->Size);
4228         if (hasConstantIsland()) {
4229           const auto DataAddress = Linker.lookupSymbol(
4230               getFunctionColdConstantIslandLabel()->getName());
4231           assert(DataAddress && "Cannot find cold CI symbol");
4232           setOutputColdDataAddress(*DataAddress);
4233         }
4234       }
4235     }
4236   }
4237 
4238   // Update basic block output ranges for the debug info, if we have
4239   // secondary entry points in the symbol table to update or if writing BAT.
4240   if (!requiresAddressMap())
4241     return;
4242 
4243   // AArch64 may have functions that only contains a constant island (no code).
4244   if (getLayout().block_empty())
4245     return;
4246 
4247   for (FunctionFragment &FF : getLayout().fragments()) {
4248     if (FF.empty())
4249       continue;
4250 
4251     const uint64_t FragmentBaseAddress =
4252         getCodeSection(isSimple() ? FF.getFragmentNum() : FragmentNum::main())
4253             ->getOutputAddress();
4254 
4255     BinaryBasicBlock *PrevBB = nullptr;
4256     for (BinaryBasicBlock *const BB : FF) {
4257       assert(BB->getLabel()->isDefined() && "symbol should be defined");
4258       if (!BC.HasRelocations) {
4259         if (BB->isSplit())
4260           assert(FragmentBaseAddress == FF.getAddress());
4261         else
4262           assert(FragmentBaseAddress == getOutputAddress());
4263         (void)FragmentBaseAddress;
4264       }
4265 
4266       // Injected functions likely will fail lookup, as they have no
4267       // input range. Just assign the BB the output address of the
4268       // function.
4269       auto MaybeBBAddress = BC.getIOAddressMap().lookup(BB->getLabel());
4270       const uint64_t BBAddress = MaybeBBAddress  ? *MaybeBBAddress
4271                                  : BB->isSplit() ? FF.getAddress()
4272                                                  : getOutputAddress();
4273       BB->setOutputStartAddress(BBAddress);
4274 
4275       if (PrevBB) {
4276         assert(PrevBB->getOutputAddressRange().first <= BBAddress &&
4277                "Bad output address for basic block.");
4278         assert((PrevBB->getOutputAddressRange().first != BBAddress ||
4279                 !hasInstructions() || !PrevBB->getNumNonPseudos()) &&
4280                "Bad output address for basic block.");
4281         PrevBB->setOutputEndAddress(BBAddress);
4282       }
4283       PrevBB = BB;
4284     }
4285 
4286     PrevBB->setOutputEndAddress(PrevBB->isSplit()
4287                                     ? FF.getAddress() + FF.getImageSize()
4288                                     : getOutputAddress() + getOutputSize());
4289   }
4290 
4291   // Reset output addresses for deleted blocks.
4292   for (BinaryBasicBlock *BB : DeletedBasicBlocks) {
4293     BB->setOutputStartAddress(0);
4294     BB->setOutputEndAddress(0);
4295   }
4296 }
4297 
4298 DebugAddressRangesVector BinaryFunction::getOutputAddressRanges() const {
4299   DebugAddressRangesVector OutputRanges;
4300 
4301   if (isFolded())
4302     return OutputRanges;
4303 
4304   if (IsFragment)
4305     return OutputRanges;
4306 
4307   OutputRanges.emplace_back(getOutputAddress(),
4308                             getOutputAddress() + getOutputSize());
4309   if (isSplit()) {
4310     assert(isEmitted() && "split function should be emitted");
4311     for (const FunctionFragment &FF : getLayout().getSplitFragments())
4312       OutputRanges.emplace_back(FF.getAddress(),
4313                                 FF.getAddress() + FF.getImageSize());
4314   }
4315 
4316   if (isSimple())
4317     return OutputRanges;
4318 
4319   for (BinaryFunction *Frag : Fragments) {
4320     assert(!Frag->isSimple() &&
4321            "fragment of non-simple function should also be non-simple");
4322     OutputRanges.emplace_back(Frag->getOutputAddress(),
4323                               Frag->getOutputAddress() + Frag->getOutputSize());
4324   }
4325 
4326   return OutputRanges;
4327 }
4328 
4329 uint64_t BinaryFunction::translateInputToOutputAddress(uint64_t Address) const {
4330   if (isFolded())
4331     return 0;
4332 
4333   // If the function hasn't changed return the same address.
4334   if (!isEmitted())
4335     return Address;
4336 
4337   if (Address < getAddress())
4338     return 0;
4339 
4340   // Check if the address is associated with an instruction that is tracked
4341   // by address translation.
4342   if (auto OutputAddress = BC.getIOAddressMap().lookup(Address))
4343     return *OutputAddress;
4344 
4345   // FIXME: #18950828 - we rely on relative offsets inside basic blocks to stay
4346   //        intact. Instead we can use pseudo instructions and/or annotations.
4347   const uint64_t Offset = Address - getAddress();
4348   const BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
4349   if (!BB) {
4350     // Special case for address immediately past the end of the function.
4351     if (Offset == getSize())
4352       return getOutputAddress() + getOutputSize();
4353 
4354     return 0;
4355   }
4356 
4357   return std::min(BB->getOutputAddressRange().first + Offset - BB->getOffset(),
4358                   BB->getOutputAddressRange().second);
4359 }
4360 
4361 DebugAddressRangesVector
4362 BinaryFunction::translateInputToOutputRange(DebugAddressRange InRange) const {
4363   DebugAddressRangesVector OutRanges;
4364 
4365   // The function was removed from the output. Return an empty range.
4366   if (isFolded())
4367     return OutRanges;
4368 
4369   // If the function hasn't changed return the same range.
4370   if (!isEmitted()) {
4371     OutRanges.emplace_back(InRange);
4372     return OutRanges;
4373   }
4374 
4375   if (!containsAddress(InRange.LowPC))
4376     return OutRanges;
4377 
4378   // Special case of an empty range [X, X). Some tools expect X to be updated.
4379   if (InRange.LowPC == InRange.HighPC) {
4380     if (uint64_t NewPC = translateInputToOutputAddress(InRange.LowPC))
4381       OutRanges.push_back(DebugAddressRange{NewPC, NewPC});
4382     return OutRanges;
4383   }
4384 
4385   uint64_t InputOffset = InRange.LowPC - getAddress();
4386   const uint64_t InputEndOffset =
4387       std::min(InRange.HighPC - getAddress(), getSize());
4388 
4389   auto BBI = llvm::upper_bound(BasicBlockOffsets,
4390                                BasicBlockOffset(InputOffset, nullptr),
4391                                CompareBasicBlockOffsets());
4392   assert(BBI != BasicBlockOffsets.begin());
4393 
4394   // Iterate over blocks in the input order using BasicBlockOffsets.
4395   for (--BBI; InputOffset < InputEndOffset && BBI != BasicBlockOffsets.end();
4396        InputOffset = BBI->second->getEndOffset(), ++BBI) {
4397     const BinaryBasicBlock &BB = *BBI->second;
4398     if (InputOffset < BB.getOffset() || InputOffset >= BB.getEndOffset()) {
4399       LLVM_DEBUG(
4400           dbgs() << "BOLT-DEBUG: invalid debug address range detected for "
4401                  << *this << " : [0x" << Twine::utohexstr(InRange.LowPC)
4402                  << ", 0x" << Twine::utohexstr(InRange.HighPC) << "]\n");
4403       break;
4404     }
4405 
4406     // Skip the block if it wasn't emitted.
4407     if (!BB.getOutputAddressRange().first)
4408       continue;
4409 
4410     // Find output address for an instruction with an offset greater or equal
4411     // to /p Offset. The output address should fall within the same basic
4412     // block boundaries.
4413     auto translateBlockOffset = [&](const uint64_t Offset) {
4414       const uint64_t OutAddress = BB.getOutputAddressRange().first + Offset;
4415       return std::min(OutAddress, BB.getOutputAddressRange().second);
4416     };
4417 
4418     uint64_t OutLowPC = BB.getOutputAddressRange().first;
4419     if (InputOffset > BB.getOffset())
4420       OutLowPC = translateBlockOffset(InputOffset - BB.getOffset());
4421 
4422     uint64_t OutHighPC = BB.getOutputAddressRange().second;
4423     if (InputEndOffset < BB.getEndOffset()) {
4424       assert(InputEndOffset >= BB.getOffset());
4425       OutHighPC = translateBlockOffset(InputEndOffset - BB.getOffset());
4426     }
4427 
4428     // Check if we can expand the last translated range.
4429     if (!OutRanges.empty() && OutRanges.back().HighPC == OutLowPC)
4430       OutRanges.back().HighPC = std::max(OutRanges.back().HighPC, OutHighPC);
4431     else
4432       OutRanges.emplace_back(OutLowPC, std::max(OutLowPC, OutHighPC));
4433   }
4434 
4435   LLVM_DEBUG({
4436     dbgs() << "BOLT-DEBUG: translated address range " << InRange << " -> ";
4437     for (const DebugAddressRange &R : OutRanges)
4438       dbgs() << R << ' ';
4439     dbgs() << '\n';
4440   });
4441 
4442   return OutRanges;
4443 }
4444 
4445 MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) {
4446   if (CurrentState == State::Disassembled) {
4447     auto II = Instructions.find(Offset);
4448     return (II == Instructions.end()) ? nullptr : &II->second;
4449   } else if (CurrentState == State::CFG) {
4450     BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
4451     if (!BB)
4452       return nullptr;
4453 
4454     for (MCInst &Inst : *BB) {
4455       constexpr uint32_t InvalidOffset = std::numeric_limits<uint32_t>::max();
4456       if (Offset == BC.MIB->getOffsetWithDefault(Inst, InvalidOffset))
4457         return &Inst;
4458     }
4459 
4460     if (MCInst *LastInstr = BB->getLastNonPseudoInstr()) {
4461       if (std::optional<uint32_t> Size = BC.MIB->getSize(*LastInstr)) {
4462         if (BB->getEndOffset() - Offset == Size) {
4463           return LastInstr;
4464         }
4465       }
4466     }
4467 
4468     return nullptr;
4469   } else {
4470     llvm_unreachable("invalid CFG state to use getInstructionAtOffset()");
4471   }
4472 }
4473 
4474 void BinaryFunction::printLoopInfo(raw_ostream &OS) const {
4475   if (!opts::shouldPrint(*this))
4476     return;
4477 
4478   OS << "Loop Info for Function \"" << *this << "\"";
4479   if (hasValidProfile())
4480     OS << " (count: " << getExecutionCount() << ")";
4481   OS << "\n";
4482 
4483   std::stack<BinaryLoop *> St;
4484   for (BinaryLoop *L : *BLI)
4485     St.push(L);
4486   while (!St.empty()) {
4487     BinaryLoop *L = St.top();
4488     St.pop();
4489 
4490     for (BinaryLoop *Inner : *L)
4491       St.push(Inner);
4492 
4493     if (!hasValidProfile())
4494       continue;
4495 
4496     OS << (L->getLoopDepth() > 1 ? "Nested" : "Outer")
4497        << " loop header: " << L->getHeader()->getName();
4498     OS << "\n";
4499     OS << "Loop basic blocks: ";
4500     ListSeparator LS;
4501     for (BinaryBasicBlock *BB : L->blocks())
4502       OS << LS << BB->getName();
4503     OS << "\n";
4504     if (hasValidProfile()) {
4505       OS << "Total back edge count: " << L->TotalBackEdgeCount << "\n";
4506       OS << "Loop entry count: " << L->EntryCount << "\n";
4507       OS << "Loop exit count: " << L->ExitCount << "\n";
4508       if (L->EntryCount > 0) {
4509         OS << "Average iters per entry: "
4510            << format("%.4lf", (double)L->TotalBackEdgeCount / L->EntryCount)
4511            << "\n";
4512       }
4513     }
4514     OS << "----\n";
4515   }
4516 
4517   OS << "Total number of loops: " << BLI->TotalLoops << "\n";
4518   OS << "Number of outer loops: " << BLI->OuterLoops << "\n";
4519   OS << "Maximum nested loop depth: " << BLI->MaximumDepth << "\n\n";
4520 }
4521 
4522 bool BinaryFunction::isAArch64Veneer() const {
4523   if (empty() || hasIslandsInfo())
4524     return false;
4525 
4526   BinaryBasicBlock &BB = **BasicBlocks.begin();
4527   for (MCInst &Inst : BB)
4528     if (!BC.MIB->hasAnnotation(Inst, "AArch64Veneer"))
4529       return false;
4530 
4531   for (auto I = BasicBlocks.begin() + 1, E = BasicBlocks.end(); I != E; ++I) {
4532     for (MCInst &Inst : **I)
4533       if (!BC.MIB->isNoop(Inst))
4534         return false;
4535   }
4536 
4537   return true;
4538 }
4539 
4540 void BinaryFunction::addRelocation(uint64_t Address, MCSymbol *Symbol,
4541                                    uint64_t RelType, uint64_t Addend,
4542                                    uint64_t Value) {
4543   assert(Address >= getAddress() && Address < getAddress() + getMaxSize() &&
4544          "address is outside of the function");
4545   uint64_t Offset = Address - getAddress();
4546   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addRelocation in "
4547                     << formatv("{0}@{1:x} against {2}\n", *this, Offset,
4548                                (Symbol ? Symbol->getName() : "<undef>")));
4549   bool IsCI = BC.isAArch64() && isInConstantIsland(Address);
4550   std::map<uint64_t, Relocation> &Rels =
4551       IsCI ? Islands->Relocations : Relocations;
4552   if (BC.MIB->shouldRecordCodeRelocation(RelType))
4553     Rels[Offset] = Relocation{Offset, Symbol, RelType, Addend, Value};
4554 }
4555 
4556 } // namespace bolt
4557 } // namespace llvm
4558