xref: /llvm-project/bolt/lib/Core/BinaryFunction.cpp (revision db29f20fdd4f715553f663f21021330cb4497e00)
1 //===- bolt/Core/BinaryFunction.cpp - Low-level function ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryFunction class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryFunction.h"
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/DynoStats.h"
16 #include "bolt/Core/HashUtilities.h"
17 #include "bolt/Core/MCPlusBuilder.h"
18 #include "bolt/Utils/NameResolver.h"
19 #include "bolt/Utils/NameShortener.h"
20 #include "bolt/Utils/Utils.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/Demangle/Demangle.h"
26 #include "llvm/MC/MCAsmInfo.h"
27 #include "llvm/MC/MCContext.h"
28 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
29 #include "llvm/MC/MCExpr.h"
30 #include "llvm/MC/MCInst.h"
31 #include "llvm/MC/MCInstPrinter.h"
32 #include "llvm/MC/MCRegisterInfo.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/Object/ObjectFile.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/GenericDomTreeConstruction.h"
38 #include "llvm/Support/GenericLoopInfoImpl.h"
39 #include "llvm/Support/GraphWriter.h"
40 #include "llvm/Support/LEB128.h"
41 #include "llvm/Support/Regex.h"
42 #include "llvm/Support/Timer.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Support/xxhash.h"
45 #include <functional>
46 #include <limits>
47 #include <numeric>
48 #include <string>
49 
50 #define DEBUG_TYPE "bolt"
51 
52 using namespace llvm;
53 using namespace bolt;
54 
55 namespace opts {
56 
57 extern cl::OptionCategory BoltCategory;
58 extern cl::OptionCategory BoltOptCategory;
59 
60 extern cl::opt<bool> EnableBAT;
61 extern cl::opt<bool> Instrument;
62 extern cl::opt<bool> StrictMode;
63 extern cl::opt<bool> UpdateDebugSections;
64 extern cl::opt<unsigned> Verbosity;
65 
66 extern bool processAllFunctions();
67 
68 cl::opt<bool> CheckEncoding(
69     "check-encoding",
70     cl::desc("perform verification of LLVM instruction encoding/decoding. "
71              "Every instruction in the input is decoded and re-encoded. "
72              "If the resulting bytes do not match the input, a warning message "
73              "is printed."),
74     cl::Hidden, cl::cat(BoltCategory));
75 
76 static cl::opt<bool> DotToolTipCode(
77     "dot-tooltip-code",
78     cl::desc("add basic block instructions as tool tips on nodes"), cl::Hidden,
79     cl::cat(BoltCategory));
80 
81 cl::opt<JumpTableSupportLevel>
82 JumpTables("jump-tables",
83   cl::desc("jump tables support (default=basic)"),
84   cl::init(JTS_BASIC),
85   cl::values(
86       clEnumValN(JTS_NONE, "none",
87                  "do not optimize functions with jump tables"),
88       clEnumValN(JTS_BASIC, "basic",
89                  "optimize functions with jump tables"),
90       clEnumValN(JTS_MOVE, "move",
91                  "move jump tables to a separate section"),
92       clEnumValN(JTS_SPLIT, "split",
93                  "split jump tables section into hot and cold based on "
94                  "function execution frequency"),
95       clEnumValN(JTS_AGGRESSIVE, "aggressive",
96                  "aggressively split jump tables section based on usage "
97                  "of the tables")),
98   cl::ZeroOrMore,
99   cl::cat(BoltOptCategory));
100 
101 static cl::opt<bool> NoScan(
102     "no-scan",
103     cl::desc(
104         "do not scan cold functions for external references (may result in "
105         "slower binary)"),
106     cl::Hidden, cl::cat(BoltOptCategory));
107 
108 cl::opt<bool>
109     PreserveBlocksAlignment("preserve-blocks-alignment",
110                             cl::desc("try to preserve basic block alignment"),
111                             cl::cat(BoltOptCategory));
112 
113 static cl::opt<bool> PrintOutputAddressRange(
114     "print-output-address-range",
115     cl::desc(
116         "print output address range for each basic block in the function when"
117         "BinaryFunction::print is called"),
118     cl::Hidden, cl::cat(BoltOptCategory));
119 
120 cl::opt<bool>
121 PrintDynoStats("dyno-stats",
122   cl::desc("print execution info based on profile"),
123   cl::cat(BoltCategory));
124 
125 static cl::opt<bool>
126 PrintDynoStatsOnly("print-dyno-stats-only",
127   cl::desc("while printing functions output dyno-stats and skip instructions"),
128   cl::init(false),
129   cl::Hidden,
130   cl::cat(BoltCategory));
131 
132 static cl::list<std::string>
133 PrintOnly("print-only",
134   cl::CommaSeparated,
135   cl::desc("list of functions to print"),
136   cl::value_desc("func1,func2,func3,..."),
137   cl::Hidden,
138   cl::cat(BoltCategory));
139 
140 cl::opt<bool>
141     TimeBuild("time-build",
142               cl::desc("print time spent constructing binary functions"),
143               cl::Hidden, cl::cat(BoltCategory));
144 
145 cl::opt<bool>
146 TrapOnAVX512("trap-avx512",
147   cl::desc("in relocation mode trap upon entry to any function that uses "
148             "AVX-512 instructions"),
149   cl::init(false),
150   cl::ZeroOrMore,
151   cl::Hidden,
152   cl::cat(BoltCategory));
153 
154 bool shouldPrint(const BinaryFunction &Function) {
155   if (Function.isIgnored())
156     return false;
157 
158   if (PrintOnly.empty())
159     return true;
160 
161   for (std::string &Name : opts::PrintOnly) {
162     if (Function.hasNameRegex(Name)) {
163       return true;
164     }
165   }
166 
167   return false;
168 }
169 
170 } // namespace opts
171 
172 namespace llvm {
173 namespace bolt {
174 
175 template <typename R> static bool emptyRange(const R &Range) {
176   return Range.begin() == Range.end();
177 }
178 
179 /// Gets debug line information for the instruction located at the given
180 /// address in the original binary. The SMLoc's pointer is used
181 /// to point to this information, which is represented by a
182 /// DebugLineTableRowRef. The returned pointer is null if no debug line
183 /// information for this instruction was found.
184 static SMLoc findDebugLineInformationForInstructionAt(
185     uint64_t Address, DWARFUnit *Unit,
186     const DWARFDebugLine::LineTable *LineTable) {
187   // We use the pointer in SMLoc to store an instance of DebugLineTableRowRef,
188   // which occupies 64 bits. Thus, we can only proceed if the struct fits into
189   // the pointer itself.
190   static_assert(
191       sizeof(decltype(SMLoc().getPointer())) >= sizeof(DebugLineTableRowRef),
192       "Cannot fit instruction debug line information into SMLoc's pointer");
193 
194   SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc();
195   uint32_t RowIndex = LineTable->lookupAddress(
196       {Address, object::SectionedAddress::UndefSection});
197   if (RowIndex == LineTable->UnknownRowIndex)
198     return NullResult;
199 
200   assert(RowIndex < LineTable->Rows.size() &&
201          "Line Table lookup returned invalid index.");
202 
203   decltype(SMLoc().getPointer()) Ptr;
204   DebugLineTableRowRef *InstructionLocation =
205       reinterpret_cast<DebugLineTableRowRef *>(&Ptr);
206 
207   InstructionLocation->DwCompileUnitIndex = Unit->getOffset();
208   InstructionLocation->RowIndex = RowIndex + 1;
209 
210   return SMLoc::getFromPointer(Ptr);
211 }
212 
213 static std::string buildSectionName(StringRef Prefix, StringRef Name,
214                                     const BinaryContext &BC) {
215   if (BC.isELF())
216     return (Prefix + Name).str();
217   static NameShortener NS;
218   return (Prefix + Twine(NS.getID(Name))).str();
219 }
220 
221 static raw_ostream &operator<<(raw_ostream &OS,
222                                const BinaryFunction::State State) {
223   switch (State) {
224   case BinaryFunction::State::Empty:         OS << "empty"; break;
225   case BinaryFunction::State::Disassembled:  OS << "disassembled"; break;
226   case BinaryFunction::State::CFG:           OS << "CFG constructed"; break;
227   case BinaryFunction::State::CFG_Finalized: OS << "CFG finalized"; break;
228   case BinaryFunction::State::EmittedCFG:    OS << "emitted with CFG"; break;
229   case BinaryFunction::State::Emitted:       OS << "emitted"; break;
230   }
231 
232   return OS;
233 }
234 
235 std::string BinaryFunction::buildCodeSectionName(StringRef Name,
236                                                  const BinaryContext &BC) {
237   return buildSectionName(BC.isELF() ? ".local.text." : ".l.text.", Name, BC);
238 }
239 
240 std::string BinaryFunction::buildColdCodeSectionName(StringRef Name,
241                                                      const BinaryContext &BC) {
242   return buildSectionName(BC.isELF() ? ".local.cold.text." : ".l.c.text.", Name,
243                           BC);
244 }
245 
246 uint64_t BinaryFunction::Count = 0;
247 
248 std::optional<StringRef>
249 BinaryFunction::hasNameRegex(const StringRef Name) const {
250   const std::string RegexName = (Twine("^") + StringRef(Name) + "$").str();
251   Regex MatchName(RegexName);
252   return forEachName(
253       [&MatchName](StringRef Name) { return MatchName.match(Name); });
254 }
255 
256 std::optional<StringRef>
257 BinaryFunction::hasRestoredNameRegex(const StringRef Name) const {
258   const std::string RegexName = (Twine("^") + StringRef(Name) + "$").str();
259   Regex MatchName(RegexName);
260   return forEachName([&MatchName](StringRef Name) {
261     return MatchName.match(NameResolver::restore(Name));
262   });
263 }
264 
265 std::string BinaryFunction::getDemangledName() const {
266   StringRef MangledName = NameResolver::restore(getOneName());
267   return demangle(MangledName.str());
268 }
269 
270 BinaryBasicBlock *
271 BinaryFunction::getBasicBlockContainingOffset(uint64_t Offset) {
272   if (Offset > Size)
273     return nullptr;
274 
275   if (BasicBlockOffsets.empty())
276     return nullptr;
277 
278   /*
279    * This is commented out because it makes BOLT too slow.
280    * assert(std::is_sorted(BasicBlockOffsets.begin(),
281    *                       BasicBlockOffsets.end(),
282    *                       CompareBasicBlockOffsets())));
283    */
284   auto I =
285       llvm::upper_bound(BasicBlockOffsets, BasicBlockOffset(Offset, nullptr),
286                         CompareBasicBlockOffsets());
287   assert(I != BasicBlockOffsets.begin() && "first basic block not at offset 0");
288   --I;
289   BinaryBasicBlock *BB = I->second;
290   return (Offset < BB->getOffset() + BB->getOriginalSize()) ? BB : nullptr;
291 }
292 
293 void BinaryFunction::markUnreachableBlocks() {
294   std::stack<BinaryBasicBlock *> Stack;
295 
296   for (BinaryBasicBlock &BB : blocks())
297     BB.markValid(false);
298 
299   // Add all entries and landing pads as roots.
300   for (BinaryBasicBlock *BB : BasicBlocks) {
301     if (isEntryPoint(*BB) || BB->isLandingPad()) {
302       Stack.push(BB);
303       BB->markValid(true);
304       continue;
305     }
306     // FIXME:
307     // Also mark BBs with indirect jumps as reachable, since we do not
308     // support removing unused jump tables yet (GH-issue20).
309     for (const MCInst &Inst : *BB) {
310       if (BC.MIB->getJumpTable(Inst)) {
311         Stack.push(BB);
312         BB->markValid(true);
313         break;
314       }
315     }
316   }
317 
318   // Determine reachable BBs from the entry point
319   while (!Stack.empty()) {
320     BinaryBasicBlock *BB = Stack.top();
321     Stack.pop();
322     for (BinaryBasicBlock *Succ : BB->successors()) {
323       if (Succ->isValid())
324         continue;
325       Succ->markValid(true);
326       Stack.push(Succ);
327     }
328   }
329 }
330 
331 // Any unnecessary fallthrough jumps revealed after calling eraseInvalidBBs
332 // will be cleaned up by fixBranches().
333 std::pair<unsigned, uint64_t>
334 BinaryFunction::eraseInvalidBBs(const MCCodeEmitter *Emitter) {
335   DenseSet<const BinaryBasicBlock *> InvalidBBs;
336   unsigned Count = 0;
337   uint64_t Bytes = 0;
338   for (BinaryBasicBlock *const BB : BasicBlocks) {
339     if (!BB->isValid()) {
340       assert(!isEntryPoint(*BB) && "all entry blocks must be valid");
341       InvalidBBs.insert(BB);
342       ++Count;
343       Bytes += BC.computeCodeSize(BB->begin(), BB->end(), Emitter);
344     }
345   }
346 
347   Layout.eraseBasicBlocks(InvalidBBs);
348 
349   BasicBlockListType NewBasicBlocks;
350   for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) {
351     BinaryBasicBlock *BB = *I;
352     if (InvalidBBs.contains(BB)) {
353       // Make sure the block is removed from the list of predecessors.
354       BB->removeAllSuccessors();
355       DeletedBasicBlocks.push_back(BB);
356     } else {
357       NewBasicBlocks.push_back(BB);
358     }
359   }
360   BasicBlocks = std::move(NewBasicBlocks);
361 
362   assert(BasicBlocks.size() == Layout.block_size());
363 
364   // Update CFG state if needed
365   if (Count > 0)
366     recomputeLandingPads();
367 
368   return std::make_pair(Count, Bytes);
369 }
370 
371 bool BinaryFunction::isForwardCall(const MCSymbol *CalleeSymbol) const {
372   // This function should work properly before and after function reordering.
373   // In order to accomplish this, we use the function index (if it is valid).
374   // If the function indices are not valid, we fall back to the original
375   // addresses.  This should be ok because the functions without valid indices
376   // should have been ordered with a stable sort.
377   const BinaryFunction *CalleeBF = BC.getFunctionForSymbol(CalleeSymbol);
378   if (CalleeBF) {
379     if (CalleeBF->isInjected())
380       return true;
381 
382     if (hasValidIndex() && CalleeBF->hasValidIndex()) {
383       return getIndex() < CalleeBF->getIndex();
384     } else if (hasValidIndex() && !CalleeBF->hasValidIndex()) {
385       return true;
386     } else if (!hasValidIndex() && CalleeBF->hasValidIndex()) {
387       return false;
388     } else {
389       return getAddress() < CalleeBF->getAddress();
390     }
391   } else {
392     // Absolute symbol.
393     ErrorOr<uint64_t> CalleeAddressOrError = BC.getSymbolValue(*CalleeSymbol);
394     assert(CalleeAddressOrError && "unregistered symbol found");
395     return *CalleeAddressOrError > getAddress();
396   }
397 }
398 
399 void BinaryFunction::dump() const {
400   // getDynoStats calls FunctionLayout::updateLayoutIndices and
401   // BasicBlock::analyzeBranch. The former cannot be const, but should be
402   // removed, the latter should be made const, but seems to require refactoring.
403   // Forcing all callers to have a non-const reference to BinaryFunction to call
404   // dump non-const however is not ideal either. Adding this const_cast is right
405   // now the best solution. It is safe, because BinaryFunction itself is not
406   // modified. Only BinaryBasicBlocks are actually modified (if it all) and we
407   // have mutable pointers to those regardless whether this function is
408   // const-qualified or not.
409   const_cast<BinaryFunction &>(*this).print(dbgs(), "");
410 }
411 
412 void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
413   if (!opts::shouldPrint(*this))
414     return;
415 
416   StringRef SectionName =
417       OriginSection ? OriginSection->getName() : "<no origin section>";
418   OS << "Binary Function \"" << *this << "\" " << Annotation << " {";
419   std::vector<StringRef> AllNames = getNames();
420   if (AllNames.size() > 1) {
421     OS << "\n  All names   : ";
422     const char *Sep = "";
423     for (const StringRef &Name : AllNames) {
424       OS << Sep << Name;
425       Sep = "\n                ";
426     }
427   }
428   OS << "\n  Number      : " << FunctionNumber;
429   OS << "\n  State       : " << CurrentState;
430   OS << "\n  Address     : 0x" << Twine::utohexstr(Address);
431   OS << "\n  Size        : 0x" << Twine::utohexstr(Size);
432   OS << "\n  MaxSize     : 0x" << Twine::utohexstr(MaxSize);
433   OS << "\n  Offset      : 0x" << Twine::utohexstr(getFileOffset());
434   OS << "\n  Section     : " << SectionName;
435   OS << "\n  Orc Section : " << getCodeSectionName();
436   OS << "\n  LSDA        : 0x" << Twine::utohexstr(getLSDAAddress());
437   OS << "\n  IsSimple    : " << IsSimple;
438   OS << "\n  IsMultiEntry: " << isMultiEntry();
439   OS << "\n  IsSplit     : " << isSplit();
440   OS << "\n  BB Count    : " << size();
441 
442   if (HasUnknownControlFlow)
443     OS << "\n  Unknown CF  : true";
444   if (getPersonalityFunction())
445     OS << "\n  Personality : " << getPersonalityFunction()->getName();
446   if (IsFragment)
447     OS << "\n  IsFragment  : true";
448   if (isFolded())
449     OS << "\n  FoldedInto  : " << *getFoldedIntoFunction();
450   for (BinaryFunction *ParentFragment : ParentFragments)
451     OS << "\n  Parent      : " << *ParentFragment;
452   if (!Fragments.empty()) {
453     OS << "\n  Fragments   : ";
454     ListSeparator LS;
455     for (BinaryFunction *Frag : Fragments)
456       OS << LS << *Frag;
457   }
458   if (hasCFG())
459     OS << "\n  Hash        : " << Twine::utohexstr(computeHash());
460   if (isMultiEntry()) {
461     OS << "\n  Secondary Entry Points : ";
462     ListSeparator LS;
463     for (const auto &KV : SecondaryEntryPoints)
464       OS << LS << KV.second->getName();
465   }
466   if (FrameInstructions.size())
467     OS << "\n  CFI Instrs  : " << FrameInstructions.size();
468   if (!Layout.block_empty()) {
469     OS << "\n  BB Layout   : ";
470     ListSeparator LS;
471     for (const BinaryBasicBlock *BB : Layout.blocks())
472       OS << LS << BB->getName();
473   }
474   if (getImageAddress())
475     OS << "\n  Image       : 0x" << Twine::utohexstr(getImageAddress());
476   if (ExecutionCount != COUNT_NO_PROFILE) {
477     OS << "\n  Exec Count  : " << ExecutionCount;
478     OS << "\n  Branch Count: " << RawBranchCount;
479     OS << "\n  Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
480   }
481 
482   if (opts::PrintDynoStats && !getLayout().block_empty()) {
483     OS << '\n';
484     DynoStats dynoStats = getDynoStats(*this);
485     OS << dynoStats;
486   }
487 
488   OS << "\n}\n";
489 
490   if (opts::PrintDynoStatsOnly || !BC.InstPrinter)
491     return;
492 
493   // Offset of the instruction in function.
494   uint64_t Offset = 0;
495 
496   if (BasicBlocks.empty() && !Instructions.empty()) {
497     // Print before CFG was built.
498     for (const std::pair<const uint32_t, MCInst> &II : Instructions) {
499       Offset = II.first;
500 
501       // Print label if exists at this offset.
502       auto LI = Labels.find(Offset);
503       if (LI != Labels.end()) {
504         if (const MCSymbol *EntrySymbol =
505                 getSecondaryEntryPointSymbol(LI->second))
506           OS << EntrySymbol->getName() << " (Entry Point):\n";
507         OS << LI->second->getName() << ":\n";
508       }
509 
510       BC.printInstruction(OS, II.second, Offset, this);
511     }
512   }
513 
514   StringRef SplitPointMsg = "";
515   for (const FunctionFragment &FF : Layout.fragments()) {
516     OS << SplitPointMsg;
517     SplitPointMsg = "-------   HOT-COLD SPLIT POINT   -------\n\n";
518     for (const BinaryBasicBlock *BB : FF) {
519       OS << BB->getName() << " (" << BB->size()
520          << " instructions, align : " << BB->getAlignment() << ")\n";
521 
522       if (opts::PrintOutputAddressRange)
523         OS << formatv("  Output Address Range: [{0:x}, {1:x}) ({2} bytes)\n",
524                       BB->getOutputAddressRange().first,
525                       BB->getOutputAddressRange().second, BB->getOutputSize());
526 
527       if (isEntryPoint(*BB)) {
528         if (MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB))
529           OS << "  Secondary Entry Point: " << EntrySymbol->getName() << '\n';
530         else
531           OS << "  Entry Point\n";
532       }
533 
534       if (BB->isLandingPad())
535         OS << "  Landing Pad\n";
536 
537       uint64_t BBExecCount = BB->getExecutionCount();
538       if (hasValidProfile()) {
539         OS << "  Exec Count : ";
540         if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE)
541           OS << BBExecCount << '\n';
542         else
543           OS << "<unknown>\n";
544       }
545       if (BB->getCFIState() >= 0)
546         OS << "  CFI State : " << BB->getCFIState() << '\n';
547       if (opts::EnableBAT) {
548         OS << "  Input offset: 0x" << Twine::utohexstr(BB->getInputOffset())
549            << "\n";
550       }
551       if (!BB->pred_empty()) {
552         OS << "  Predecessors: ";
553         ListSeparator LS;
554         for (BinaryBasicBlock *Pred : BB->predecessors())
555           OS << LS << Pred->getName();
556         OS << '\n';
557       }
558       if (!BB->throw_empty()) {
559         OS << "  Throwers: ";
560         ListSeparator LS;
561         for (BinaryBasicBlock *Throw : BB->throwers())
562           OS << LS << Throw->getName();
563         OS << '\n';
564       }
565 
566       Offset = alignTo(Offset, BB->getAlignment());
567 
568       // Note: offsets are imprecise since this is happening prior to
569       // relaxation.
570       Offset = BC.printInstructions(OS, BB->begin(), BB->end(), Offset, this);
571 
572       if (!BB->succ_empty()) {
573         OS << "  Successors: ";
574         // For more than 2 successors, sort them based on frequency.
575         std::vector<uint64_t> Indices(BB->succ_size());
576         std::iota(Indices.begin(), Indices.end(), 0);
577         if (BB->succ_size() > 2 && BB->getKnownExecutionCount()) {
578           llvm::stable_sort(Indices, [&](const uint64_t A, const uint64_t B) {
579             return BB->BranchInfo[B] < BB->BranchInfo[A];
580           });
581         }
582         ListSeparator LS;
583         for (unsigned I = 0; I < Indices.size(); ++I) {
584           BinaryBasicBlock *Succ = BB->Successors[Indices[I]];
585           const BinaryBasicBlock::BinaryBranchInfo &BI =
586               BB->BranchInfo[Indices[I]];
587           OS << LS << Succ->getName();
588           if (ExecutionCount != COUNT_NO_PROFILE &&
589               BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
590             OS << " (mispreds: " << BI.MispredictedCount
591                << ", count: " << BI.Count << ")";
592           } else if (ExecutionCount != COUNT_NO_PROFILE &&
593                      BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
594             OS << " (inferred count: " << BI.Count << ")";
595           }
596         }
597         OS << '\n';
598       }
599 
600       if (!BB->lp_empty()) {
601         OS << "  Landing Pads: ";
602         ListSeparator LS;
603         for (BinaryBasicBlock *LP : BB->landing_pads()) {
604           OS << LS << LP->getName();
605           if (ExecutionCount != COUNT_NO_PROFILE) {
606             OS << " (count: " << LP->getExecutionCount() << ")";
607           }
608         }
609         OS << '\n';
610       }
611 
612       // In CFG_Finalized state we can miscalculate CFI state at exit.
613       if (CurrentState == State::CFG) {
614         const int32_t CFIStateAtExit = BB->getCFIStateAtExit();
615         if (CFIStateAtExit >= 0)
616           OS << "  CFI State: " << CFIStateAtExit << '\n';
617       }
618 
619       OS << '\n';
620     }
621   }
622 
623   // Dump new exception ranges for the function.
624   if (!CallSites.empty()) {
625     OS << "EH table:\n";
626     for (const FunctionFragment &FF : getLayout().fragments()) {
627       for (const auto &FCSI : getCallSites(FF.getFragmentNum())) {
628         const CallSite &CSI = FCSI.second;
629         OS << "  [" << *CSI.Start << ", " << *CSI.End << ") landing pad : ";
630         if (CSI.LP)
631           OS << *CSI.LP;
632         else
633           OS << "0";
634         OS << ", action : " << CSI.Action << '\n';
635       }
636     }
637     OS << '\n';
638   }
639 
640   // Print all jump tables.
641   for (const std::pair<const uint64_t, JumpTable *> &JTI : JumpTables)
642     JTI.second->print(OS);
643 
644   OS << "DWARF CFI Instructions:\n";
645   if (OffsetToCFI.size()) {
646     // Pre-buildCFG information
647     for (const std::pair<const uint32_t, uint32_t> &Elmt : OffsetToCFI) {
648       OS << format("    %08x:\t", Elmt.first);
649       assert(Elmt.second < FrameInstructions.size() && "Incorrect CFI offset");
650       BinaryContext::printCFI(OS, FrameInstructions[Elmt.second]);
651       OS << "\n";
652     }
653   } else {
654     // Post-buildCFG information
655     for (uint32_t I = 0, E = FrameInstructions.size(); I != E; ++I) {
656       const MCCFIInstruction &CFI = FrameInstructions[I];
657       OS << format("    %d:\t", I);
658       BinaryContext::printCFI(OS, CFI);
659       OS << "\n";
660     }
661   }
662   if (FrameInstructions.empty())
663     OS << "    <empty>\n";
664 
665   OS << "End of Function \"" << *this << "\"\n\n";
666 }
667 
668 void BinaryFunction::printRelocations(raw_ostream &OS, uint64_t Offset,
669                                       uint64_t Size) const {
670   const char *Sep = " # Relocs: ";
671 
672   auto RI = Relocations.lower_bound(Offset);
673   while (RI != Relocations.end() && RI->first < Offset + Size) {
674     OS << Sep << "(R: " << RI->second << ")";
675     Sep = ", ";
676     ++RI;
677   }
678 }
679 
680 static std::string mutateDWARFExpressionTargetReg(const MCCFIInstruction &Instr,
681                                                   MCPhysReg NewReg) {
682   StringRef ExprBytes = Instr.getValues();
683   assert(ExprBytes.size() > 1 && "DWARF expression CFI is too short");
684   uint8_t Opcode = ExprBytes[0];
685   assert((Opcode == dwarf::DW_CFA_expression ||
686           Opcode == dwarf::DW_CFA_val_expression) &&
687          "invalid DWARF expression CFI");
688   (void)Opcode;
689   const uint8_t *const Start =
690       reinterpret_cast<const uint8_t *>(ExprBytes.drop_front(1).data());
691   const uint8_t *const End =
692       reinterpret_cast<const uint8_t *>(Start + ExprBytes.size() - 1);
693   unsigned Size = 0;
694   decodeULEB128(Start, &Size, End);
695   assert(Size > 0 && "Invalid reg encoding for DWARF expression CFI");
696   SmallString<8> Tmp;
697   raw_svector_ostream OSE(Tmp);
698   encodeULEB128(NewReg, OSE);
699   return Twine(ExprBytes.slice(0, 1))
700       .concat(OSE.str())
701       .concat(ExprBytes.drop_front(1 + Size))
702       .str();
703 }
704 
705 void BinaryFunction::mutateCFIRegisterFor(const MCInst &Instr,
706                                           MCPhysReg NewReg) {
707   const MCCFIInstruction *OldCFI = getCFIFor(Instr);
708   assert(OldCFI && "invalid CFI instr");
709   switch (OldCFI->getOperation()) {
710   default:
711     llvm_unreachable("Unexpected instruction");
712   case MCCFIInstruction::OpDefCfa:
713     setCFIFor(Instr, MCCFIInstruction::cfiDefCfa(nullptr, NewReg,
714                                                  OldCFI->getOffset()));
715     break;
716   case MCCFIInstruction::OpDefCfaRegister:
717     setCFIFor(Instr, MCCFIInstruction::createDefCfaRegister(nullptr, NewReg));
718     break;
719   case MCCFIInstruction::OpOffset:
720     setCFIFor(Instr, MCCFIInstruction::createOffset(nullptr, NewReg,
721                                                     OldCFI->getOffset()));
722     break;
723   case MCCFIInstruction::OpRegister:
724     setCFIFor(Instr, MCCFIInstruction::createRegister(nullptr, NewReg,
725                                                       OldCFI->getRegister2()));
726     break;
727   case MCCFIInstruction::OpSameValue:
728     setCFIFor(Instr, MCCFIInstruction::createSameValue(nullptr, NewReg));
729     break;
730   case MCCFIInstruction::OpEscape:
731     setCFIFor(Instr,
732               MCCFIInstruction::createEscape(
733                   nullptr,
734                   StringRef(mutateDWARFExpressionTargetReg(*OldCFI, NewReg))));
735     break;
736   case MCCFIInstruction::OpRestore:
737     setCFIFor(Instr, MCCFIInstruction::createRestore(nullptr, NewReg));
738     break;
739   case MCCFIInstruction::OpUndefined:
740     setCFIFor(Instr, MCCFIInstruction::createUndefined(nullptr, NewReg));
741     break;
742   }
743 }
744 
745 const MCCFIInstruction *BinaryFunction::mutateCFIOffsetFor(const MCInst &Instr,
746                                                            int64_t NewOffset) {
747   const MCCFIInstruction *OldCFI = getCFIFor(Instr);
748   assert(OldCFI && "invalid CFI instr");
749   switch (OldCFI->getOperation()) {
750   default:
751     llvm_unreachable("Unexpected instruction");
752   case MCCFIInstruction::OpDefCfaOffset:
753     setCFIFor(Instr, MCCFIInstruction::cfiDefCfaOffset(nullptr, NewOffset));
754     break;
755   case MCCFIInstruction::OpAdjustCfaOffset:
756     setCFIFor(Instr,
757               MCCFIInstruction::createAdjustCfaOffset(nullptr, NewOffset));
758     break;
759   case MCCFIInstruction::OpDefCfa:
760     setCFIFor(Instr, MCCFIInstruction::cfiDefCfa(nullptr, OldCFI->getRegister(),
761                                                  NewOffset));
762     break;
763   case MCCFIInstruction::OpOffset:
764     setCFIFor(Instr, MCCFIInstruction::createOffset(
765                          nullptr, OldCFI->getRegister(), NewOffset));
766     break;
767   }
768   return getCFIFor(Instr);
769 }
770 
771 IndirectBranchType
772 BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
773                                       uint64_t Offset,
774                                       uint64_t &TargetAddress) {
775   const unsigned PtrSize = BC.AsmInfo->getCodePointerSize();
776 
777   // The instruction referencing memory used by the branch instruction.
778   // It could be the branch instruction itself or one of the instructions
779   // setting the value of the register used by the branch.
780   MCInst *MemLocInstr;
781 
782   // Address of the table referenced by MemLocInstr. Could be either an
783   // array of function pointers, or a jump table.
784   uint64_t ArrayStart = 0;
785 
786   unsigned BaseRegNum, IndexRegNum;
787   int64_t DispValue;
788   const MCExpr *DispExpr;
789 
790   // In AArch, identify the instruction adding the PC-relative offset to
791   // jump table entries to correctly decode it.
792   MCInst *PCRelBaseInstr;
793   uint64_t PCRelAddr = 0;
794 
795   auto Begin = Instructions.begin();
796   if (BC.isAArch64()) {
797     PreserveNops = BC.HasRelocations;
798     // Start at the last label as an approximation of the current basic block.
799     // This is a heuristic, since the full set of labels have yet to be
800     // determined
801     for (const uint32_t Offset :
802          llvm::make_first_range(llvm::reverse(Labels))) {
803       auto II = Instructions.find(Offset);
804       if (II != Instructions.end()) {
805         Begin = II;
806         break;
807       }
808     }
809   }
810 
811   IndirectBranchType BranchType = BC.MIB->analyzeIndirectBranch(
812       Instruction, Begin, Instructions.end(), PtrSize, MemLocInstr, BaseRegNum,
813       IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
814 
815   if (BranchType == IndirectBranchType::UNKNOWN && !MemLocInstr)
816     return BranchType;
817 
818   if (MemLocInstr != &Instruction)
819     IndexRegNum = BC.MIB->getNoRegister();
820 
821   if (BC.isAArch64()) {
822     const MCSymbol *Sym = BC.MIB->getTargetSymbol(*PCRelBaseInstr, 1);
823     assert(Sym && "Symbol extraction failed");
824     ErrorOr<uint64_t> SymValueOrError = BC.getSymbolValue(*Sym);
825     if (SymValueOrError) {
826       PCRelAddr = *SymValueOrError;
827     } else {
828       for (std::pair<const uint32_t, MCSymbol *> &Elmt : Labels) {
829         if (Elmt.second == Sym) {
830           PCRelAddr = Elmt.first + getAddress();
831           break;
832         }
833       }
834     }
835     uint64_t InstrAddr = 0;
836     for (auto II = Instructions.rbegin(); II != Instructions.rend(); ++II) {
837       if (&II->second == PCRelBaseInstr) {
838         InstrAddr = II->first + getAddress();
839         break;
840       }
841     }
842     assert(InstrAddr != 0 && "instruction not found");
843     // We do this to avoid spurious references to code locations outside this
844     // function (for example, if the indirect jump lives in the last basic
845     // block of the function, it will create a reference to the next function).
846     // This replaces a symbol reference with an immediate.
847     BC.MIB->replaceMemOperandDisp(*PCRelBaseInstr,
848                                   MCOperand::createImm(PCRelAddr - InstrAddr));
849     // FIXME: Disable full jump table processing for AArch64 until we have a
850     // proper way of determining the jump table limits.
851     return IndirectBranchType::UNKNOWN;
852   }
853 
854   // RIP-relative addressing should be converted to symbol form by now
855   // in processed instructions (but not in jump).
856   if (DispExpr) {
857     const MCSymbol *TargetSym;
858     uint64_t TargetOffset;
859     std::tie(TargetSym, TargetOffset) = BC.MIB->getTargetSymbolInfo(DispExpr);
860     ErrorOr<uint64_t> SymValueOrError = BC.getSymbolValue(*TargetSym);
861     assert(SymValueOrError && "global symbol needs a value");
862     ArrayStart = *SymValueOrError + TargetOffset;
863     BaseRegNum = BC.MIB->getNoRegister();
864     if (BC.isAArch64()) {
865       ArrayStart &= ~0xFFFULL;
866       ArrayStart += DispValue & 0xFFFULL;
867     }
868   } else {
869     ArrayStart = static_cast<uint64_t>(DispValue);
870   }
871 
872   if (BaseRegNum == BC.MRI->getProgramCounter())
873     ArrayStart += getAddress() + Offset + Size;
874 
875   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addressed memory is 0x"
876                     << Twine::utohexstr(ArrayStart) << '\n');
877 
878   ErrorOr<BinarySection &> Section = BC.getSectionForAddress(ArrayStart);
879   if (!Section) {
880     // No section - possibly an absolute address. Since we don't allow
881     // internal function addresses to escape the function scope - we
882     // consider it a tail call.
883     if (opts::Verbosity >= 1) {
884       BC.errs() << "BOLT-WARNING: no section for address 0x"
885                 << Twine::utohexstr(ArrayStart) << " referenced from function "
886                 << *this << '\n';
887     }
888     return IndirectBranchType::POSSIBLE_TAIL_CALL;
889   }
890   if (Section->isVirtual()) {
891     // The contents are filled at runtime.
892     return IndirectBranchType::POSSIBLE_TAIL_CALL;
893   }
894 
895   if (BranchType == IndirectBranchType::POSSIBLE_FIXED_BRANCH) {
896     ErrorOr<uint64_t> Value = BC.getPointerAtAddress(ArrayStart);
897     if (!Value)
898       return IndirectBranchType::UNKNOWN;
899 
900     if (BC.getSectionForAddress(ArrayStart)->isWritable())
901       return IndirectBranchType::UNKNOWN;
902 
903     BC.outs() << "BOLT-INFO: fixed indirect branch detected in " << *this
904               << " at 0x" << Twine::utohexstr(getAddress() + Offset)
905               << " referencing data at 0x" << Twine::utohexstr(ArrayStart)
906               << " the destination value is 0x" << Twine::utohexstr(*Value)
907               << '\n';
908 
909     TargetAddress = *Value;
910     return BranchType;
911   }
912 
913   // Check if there's already a jump table registered at this address.
914   MemoryContentsType MemType;
915   if (JumpTable *JT = BC.getJumpTableContainingAddress(ArrayStart)) {
916     switch (JT->Type) {
917     case JumpTable::JTT_NORMAL:
918       MemType = MemoryContentsType::POSSIBLE_JUMP_TABLE;
919       break;
920     case JumpTable::JTT_PIC:
921       MemType = MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
922       break;
923     }
924   } else {
925     MemType = BC.analyzeMemoryAt(ArrayStart, *this);
926   }
927 
928   // Check that jump table type in instruction pattern matches memory contents.
929   JumpTable::JumpTableType JTType;
930   if (BranchType == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
931     if (MemType != MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
932       return IndirectBranchType::UNKNOWN;
933     JTType = JumpTable::JTT_PIC;
934   } else {
935     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
936       return IndirectBranchType::UNKNOWN;
937 
938     if (MemType == MemoryContentsType::UNKNOWN)
939       return IndirectBranchType::POSSIBLE_TAIL_CALL;
940 
941     BranchType = IndirectBranchType::POSSIBLE_JUMP_TABLE;
942     JTType = JumpTable::JTT_NORMAL;
943   }
944 
945   // Convert the instruction into jump table branch.
946   const MCSymbol *JTLabel = BC.getOrCreateJumpTable(*this, ArrayStart, JTType);
947   BC.MIB->replaceMemOperandDisp(*MemLocInstr, JTLabel, BC.Ctx.get());
948   BC.MIB->setJumpTable(Instruction, ArrayStart, IndexRegNum);
949 
950   JTSites.emplace_back(Offset, ArrayStart);
951 
952   return BranchType;
953 }
954 
955 MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address,
956                                                 bool CreatePastEnd) {
957   const uint64_t Offset = Address - getAddress();
958 
959   if ((Offset == getSize()) && CreatePastEnd)
960     return getFunctionEndLabel();
961 
962   auto LI = Labels.find(Offset);
963   if (LI != Labels.end())
964     return LI->second;
965 
966   // For AArch64, check if this address is part of a constant island.
967   if (BC.isAArch64()) {
968     if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address))
969       return IslandSym;
970   }
971 
972   MCSymbol *Label = BC.Ctx->createNamedTempSymbol();
973   Labels[Offset] = Label;
974 
975   return Label;
976 }
977 
978 ErrorOr<ArrayRef<uint8_t>> BinaryFunction::getData() const {
979   BinarySection &Section = *getOriginSection();
980   assert(Section.containsRange(getAddress(), getMaxSize()) &&
981          "wrong section for function");
982 
983   if (!Section.isText() || Section.isVirtual() || !Section.getSize())
984     return std::make_error_code(std::errc::bad_address);
985 
986   StringRef SectionContents = Section.getContents();
987 
988   assert(SectionContents.size() == Section.getSize() &&
989          "section size mismatch");
990 
991   // Function offset from the section start.
992   uint64_t Offset = getAddress() - Section.getAddress();
993   auto *Bytes = reinterpret_cast<const uint8_t *>(SectionContents.data());
994   return ArrayRef<uint8_t>(Bytes + Offset, getMaxSize());
995 }
996 
997 size_t BinaryFunction::getSizeOfDataInCodeAt(uint64_t Offset) const {
998   if (!Islands)
999     return 0;
1000 
1001   if (!llvm::is_contained(Islands->DataOffsets, Offset))
1002     return 0;
1003 
1004   auto Iter = Islands->CodeOffsets.upper_bound(Offset);
1005   if (Iter != Islands->CodeOffsets.end())
1006     return *Iter - Offset;
1007   return getSize() - Offset;
1008 }
1009 
1010 bool BinaryFunction::isZeroPaddingAt(uint64_t Offset) const {
1011   ArrayRef<uint8_t> FunctionData = *getData();
1012   uint64_t EndOfCode = getSize();
1013   if (Islands) {
1014     auto Iter = Islands->DataOffsets.upper_bound(Offset);
1015     if (Iter != Islands->DataOffsets.end())
1016       EndOfCode = *Iter;
1017   }
1018   for (uint64_t I = Offset; I < EndOfCode; ++I)
1019     if (FunctionData[I] != 0)
1020       return false;
1021 
1022   return true;
1023 }
1024 
1025 Error BinaryFunction::handlePCRelOperand(MCInst &Instruction, uint64_t Address,
1026                                          uint64_t Size) {
1027   auto &MIB = BC.MIB;
1028   uint64_t TargetAddress = 0;
1029   if (!MIB->evaluateMemOperandTarget(Instruction, TargetAddress, Address,
1030                                      Size)) {
1031     std::string Msg;
1032     raw_string_ostream SS(Msg);
1033     SS << "BOLT-ERROR: PC-relative operand can't be evaluated:\n";
1034     BC.InstPrinter->printInst(&Instruction, 0, "", *BC.STI, SS);
1035     SS << '\n';
1036     Instruction.dump_pretty(SS, BC.InstPrinter.get());
1037     SS << '\n';
1038     SS << "BOLT-ERROR: cannot handle PC-relative operand at 0x"
1039        << Twine::utohexstr(Address) << ". Skipping function " << *this << ".\n";
1040     if (BC.HasRelocations)
1041       return createFatalBOLTError(Msg);
1042     IsSimple = false;
1043     return createNonFatalBOLTError(Msg);
1044   }
1045   if (TargetAddress == 0 && opts::Verbosity >= 1) {
1046     BC.outs() << "BOLT-INFO: PC-relative operand is zero in function " << *this
1047               << '\n';
1048   }
1049 
1050   const MCSymbol *TargetSymbol;
1051   uint64_t TargetOffset;
1052   std::tie(TargetSymbol, TargetOffset) =
1053       BC.handleAddressRef(TargetAddress, *this, /*IsPCRel*/ true);
1054 
1055   bool ReplaceSuccess = MIB->replaceMemOperandDisp(
1056       Instruction, TargetSymbol, static_cast<int64_t>(TargetOffset), &*BC.Ctx);
1057   (void)ReplaceSuccess;
1058   assert(ReplaceSuccess && "Failed to replace mem operand with symbol+off.");
1059   return Error::success();
1060 }
1061 
1062 MCSymbol *BinaryFunction::handleExternalReference(MCInst &Instruction,
1063                                                   uint64_t Size,
1064                                                   uint64_t Offset,
1065                                                   uint64_t TargetAddress,
1066                                                   bool &IsCall) {
1067   auto &MIB = BC.MIB;
1068 
1069   const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1070   BC.addInterproceduralReference(this, TargetAddress);
1071   if (opts::Verbosity >= 2 && !IsCall && Size == 2 && !BC.HasRelocations) {
1072     BC.errs() << "BOLT-WARNING: relaxed tail call detected at 0x"
1073               << Twine::utohexstr(AbsoluteInstrAddr) << " in function " << *this
1074               << ". Code size will be increased.\n";
1075   }
1076 
1077   assert(!MIB->isTailCall(Instruction) &&
1078          "synthetic tail call instruction found");
1079 
1080   // This is a call regardless of the opcode.
1081   // Assign proper opcode for tail calls, so that they could be
1082   // treated as calls.
1083   if (!IsCall) {
1084     if (!MIB->convertJmpToTailCall(Instruction)) {
1085       assert(MIB->isConditionalBranch(Instruction) &&
1086              "unknown tail call instruction");
1087       if (opts::Verbosity >= 2) {
1088         BC.errs() << "BOLT-WARNING: conditional tail call detected in "
1089                   << "function " << *this << " at 0x"
1090                   << Twine::utohexstr(AbsoluteInstrAddr) << ".\n";
1091       }
1092     }
1093     IsCall = true;
1094   }
1095 
1096   if (opts::Verbosity >= 2 && TargetAddress == 0) {
1097     // We actually see calls to address 0 in presence of weak
1098     // symbols originating from libraries. This code is never meant
1099     // to be executed.
1100     BC.outs() << "BOLT-INFO: Function " << *this
1101               << " has a call to address zero.\n";
1102   }
1103 
1104   return BC.getOrCreateGlobalSymbol(TargetAddress, "FUNCat");
1105 }
1106 
1107 void BinaryFunction::handleIndirectBranch(MCInst &Instruction, uint64_t Size,
1108                                           uint64_t Offset) {
1109   auto &MIB = BC.MIB;
1110   uint64_t IndirectTarget = 0;
1111   IndirectBranchType Result =
1112       processIndirectBranch(Instruction, Size, Offset, IndirectTarget);
1113   switch (Result) {
1114   default:
1115     llvm_unreachable("unexpected result");
1116   case IndirectBranchType::POSSIBLE_TAIL_CALL: {
1117     bool Result = MIB->convertJmpToTailCall(Instruction);
1118     (void)Result;
1119     assert(Result);
1120     break;
1121   }
1122   case IndirectBranchType::POSSIBLE_JUMP_TABLE:
1123   case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE:
1124     if (opts::JumpTables == JTS_NONE)
1125       IsSimple = false;
1126     break;
1127   case IndirectBranchType::POSSIBLE_FIXED_BRANCH: {
1128     if (containsAddress(IndirectTarget)) {
1129       const MCSymbol *TargetSymbol = getOrCreateLocalLabel(IndirectTarget);
1130       Instruction.clear();
1131       MIB->createUncondBranch(Instruction, TargetSymbol, BC.Ctx.get());
1132       TakenBranches.emplace_back(Offset, IndirectTarget - getAddress());
1133       addEntryPointAtOffset(IndirectTarget - getAddress());
1134     } else {
1135       MIB->convertJmpToTailCall(Instruction);
1136       BC.addInterproceduralReference(this, IndirectTarget);
1137     }
1138     break;
1139   }
1140   case IndirectBranchType::UNKNOWN:
1141     // Keep processing. We'll do more checks and fixes in
1142     // postProcessIndirectBranches().
1143     UnknownIndirectBranchOffsets.emplace(Offset);
1144     break;
1145   }
1146 }
1147 
1148 void BinaryFunction::handleAArch64IndirectCall(MCInst &Instruction,
1149                                                const uint64_t Offset) {
1150   auto &MIB = BC.MIB;
1151   const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1152   MCInst *TargetHiBits, *TargetLowBits;
1153   uint64_t TargetAddress, Count;
1154   Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1155                                  AbsoluteInstrAddr, Instruction, TargetHiBits,
1156                                  TargetLowBits, TargetAddress);
1157   if (Count) {
1158     MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1159     --Count;
1160     for (auto It = std::prev(Instructions.end()); Count != 0;
1161          It = std::prev(It), --Count) {
1162       MIB->addAnnotation(It->second, "AArch64Veneer", true);
1163     }
1164 
1165     BC.addAdrpAddRelocAArch64(*this, *TargetLowBits, *TargetHiBits,
1166                               TargetAddress);
1167   }
1168 }
1169 
1170 std::optional<MCInst>
1171 BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const {
1172   assert(CurrentState == State::Empty && "Function should not be disassembled");
1173   assert(Offset < MaxSize && "Invalid offset");
1174   ErrorOr<ArrayRef<unsigned char>> FunctionData = getData();
1175   assert(FunctionData && "Cannot get function as data");
1176   MCInst Instr;
1177   uint64_t InstrSize = 0;
1178   const uint64_t InstrAddress = getAddress() + Offset;
1179   if (BC.DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
1180                                 InstrAddress, nulls()))
1181     return Instr;
1182   return std::nullopt;
1183 }
1184 
1185 Error BinaryFunction::disassemble() {
1186   NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
1187                      "Build Binary Functions", opts::TimeBuild);
1188   ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
1189   assert(ErrorOrFunctionData && "function data is not available");
1190   ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
1191   assert(FunctionData.size() == getMaxSize() &&
1192          "function size does not match raw data size");
1193 
1194   auto &Ctx = BC.Ctx;
1195   auto &MIB = BC.MIB;
1196 
1197   BC.SymbolicDisAsm->setSymbolizer(MIB->createTargetSymbolizer(*this));
1198 
1199   // Insert a label at the beginning of the function. This will be our first
1200   // basic block.
1201   Labels[0] = Ctx->createNamedTempSymbol("BB0");
1202 
1203   // Map offsets in the function to a label that should always point to the
1204   // corresponding instruction. This is used for labels that shouldn't point to
1205   // the start of a basic block but always to a specific instruction. This is
1206   // used, for example, on RISC-V where %pcrel_lo relocations point to the
1207   // corresponding %pcrel_hi.
1208   LabelsMapType InstructionLabels;
1209 
1210   uint64_t Size = 0; // instruction size
1211   for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
1212     MCInst Instruction;
1213     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1214 
1215     // Check for data inside code and ignore it
1216     if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
1217       Size = DataInCodeSize;
1218       continue;
1219     }
1220 
1221     if (!BC.SymbolicDisAsm->getInstruction(Instruction, Size,
1222                                            FunctionData.slice(Offset),
1223                                            AbsoluteInstrAddr, nulls())) {
1224       // Functions with "soft" boundaries, e.g. coming from assembly source,
1225       // can have 0-byte padding at the end.
1226       if (isZeroPaddingAt(Offset))
1227         break;
1228 
1229       BC.errs()
1230           << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
1231           << Twine::utohexstr(Offset) << " (address 0x"
1232           << Twine::utohexstr(AbsoluteInstrAddr) << ") in function " << *this
1233           << '\n';
1234       // Some AVX-512 instructions could not be disassembled at all.
1235       if (BC.HasRelocations && opts::TrapOnAVX512 && BC.isX86()) {
1236         setTrapOnEntry();
1237         BC.TrappedFunctions.push_back(this);
1238       } else {
1239         setIgnored();
1240       }
1241 
1242       break;
1243     }
1244 
1245     // Check integrity of LLVM assembler/disassembler.
1246     if (opts::CheckEncoding && !BC.MIB->isBranch(Instruction) &&
1247         !BC.MIB->isCall(Instruction) && !BC.MIB->isNoop(Instruction)) {
1248       if (!BC.validateInstructionEncoding(FunctionData.slice(Offset, Size))) {
1249         BC.errs() << "BOLT-WARNING: mismatching LLVM encoding detected in "
1250                   << "function " << *this << " for instruction :\n";
1251         BC.printInstruction(BC.errs(), Instruction, AbsoluteInstrAddr);
1252         BC.errs() << '\n';
1253       }
1254     }
1255 
1256     // Special handling for AVX-512 instructions.
1257     if (MIB->hasEVEXEncoding(Instruction)) {
1258       if (BC.HasRelocations && opts::TrapOnAVX512) {
1259         setTrapOnEntry();
1260         BC.TrappedFunctions.push_back(this);
1261         break;
1262       }
1263 
1264       if (!BC.validateInstructionEncoding(FunctionData.slice(Offset, Size))) {
1265         BC.errs() << "BOLT-WARNING: internal assembler/disassembler error "
1266                      "detected for AVX512 instruction:\n";
1267         BC.printInstruction(BC.errs(), Instruction, AbsoluteInstrAddr);
1268         BC.errs() << " in function " << *this << '\n';
1269         setIgnored();
1270         break;
1271       }
1272     }
1273 
1274     if (MIB->isBranch(Instruction) || MIB->isCall(Instruction)) {
1275       uint64_t TargetAddress = 0;
1276       if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
1277                               TargetAddress)) {
1278         // Check if the target is within the same function. Otherwise it's
1279         // a call, possibly a tail call.
1280         //
1281         // If the target *is* the function address it could be either a branch
1282         // or a recursive call.
1283         bool IsCall = MIB->isCall(Instruction);
1284         const bool IsCondBranch = MIB->isConditionalBranch(Instruction);
1285         MCSymbol *TargetSymbol = nullptr;
1286 
1287         if (BC.MIB->isUnsupportedBranch(Instruction)) {
1288           setIgnored();
1289           if (BinaryFunction *TargetFunc =
1290                   BC.getBinaryFunctionContainingAddress(TargetAddress))
1291             TargetFunc->setIgnored();
1292         }
1293 
1294         if (IsCall && containsAddress(TargetAddress)) {
1295           if (TargetAddress == getAddress()) {
1296             // Recursive call.
1297             TargetSymbol = getSymbol();
1298           } else {
1299             if (BC.isX86()) {
1300               // Dangerous old-style x86 PIC code. We may need to freeze this
1301               // function, so preserve the function as is for now.
1302               PreserveNops = true;
1303             } else {
1304               BC.errs() << "BOLT-WARNING: internal call detected at 0x"
1305                         << Twine::utohexstr(AbsoluteInstrAddr)
1306                         << " in function " << *this << ". Skipping.\n";
1307               IsSimple = false;
1308             }
1309           }
1310         }
1311 
1312         if (!TargetSymbol) {
1313           // Create either local label or external symbol.
1314           if (containsAddress(TargetAddress)) {
1315             TargetSymbol = getOrCreateLocalLabel(TargetAddress);
1316           } else {
1317             if (TargetAddress == getAddress() + getSize() &&
1318                 TargetAddress < getAddress() + getMaxSize() &&
1319                 !(BC.isAArch64() &&
1320                   BC.handleAArch64Veneer(TargetAddress, /*MatchOnly*/ true))) {
1321               // Result of __builtin_unreachable().
1322               LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump past end detected at 0x"
1323                                 << Twine::utohexstr(AbsoluteInstrAddr)
1324                                 << " in function " << *this
1325                                 << " : replacing with nop.\n");
1326               BC.MIB->createNoop(Instruction);
1327               if (IsCondBranch) {
1328                 // Register branch offset for profile validation.
1329                 IgnoredBranches.emplace_back(Offset, Offset + Size);
1330               }
1331               goto add_instruction;
1332             }
1333             // May update Instruction and IsCall
1334             TargetSymbol = handleExternalReference(Instruction, Size, Offset,
1335                                                    TargetAddress, IsCall);
1336           }
1337         }
1338 
1339         if (!IsCall) {
1340           // Add taken branch info.
1341           TakenBranches.emplace_back(Offset, TargetAddress - getAddress());
1342         }
1343         BC.MIB->replaceBranchTarget(Instruction, TargetSymbol, &*Ctx);
1344 
1345         // Mark CTC.
1346         if (IsCondBranch && IsCall)
1347           MIB->setConditionalTailCall(Instruction, TargetAddress);
1348       } else {
1349         // Could not evaluate branch. Should be an indirect call or an
1350         // indirect branch. Bail out on the latter case.
1351         if (MIB->isIndirectBranch(Instruction))
1352           handleIndirectBranch(Instruction, Size, Offset);
1353         // Indirect call. We only need to fix it if the operand is RIP-relative.
1354         if (IsSimple && MIB->hasPCRelOperand(Instruction)) {
1355           if (auto NewE = handleErrors(
1356                   handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size),
1357                   [&](const BOLTError &E) -> Error {
1358                     if (E.isFatal())
1359                       return Error(std::make_unique<BOLTError>(std::move(E)));
1360                     if (!E.getMessage().empty())
1361                       E.log(BC.errs());
1362                     return Error::success();
1363                   })) {
1364             return Error(std::move(NewE));
1365           }
1366         }
1367 
1368         if (BC.isAArch64())
1369           handleAArch64IndirectCall(Instruction, Offset);
1370       }
1371     } else if (BC.isAArch64() || BC.isRISCV()) {
1372       // Check if there's a relocation associated with this instruction.
1373       bool UsedReloc = false;
1374       for (auto Itr = Relocations.lower_bound(Offset),
1375                 ItrE = Relocations.lower_bound(Offset + Size);
1376            Itr != ItrE; ++Itr) {
1377         const Relocation &Relocation = Itr->second;
1378         MCSymbol *Symbol = Relocation.Symbol;
1379 
1380         if (Relocation::isInstructionReference(Relocation.Type)) {
1381           uint64_t RefOffset = Relocation.Value - getAddress();
1382           LabelsMapType::iterator LI = InstructionLabels.find(RefOffset);
1383 
1384           if (LI == InstructionLabels.end()) {
1385             Symbol = BC.Ctx->createNamedTempSymbol();
1386             InstructionLabels.emplace(RefOffset, Symbol);
1387           } else {
1388             Symbol = LI->second;
1389           }
1390         }
1391 
1392         int64_t Value = Relocation.Value;
1393         const bool Result = BC.MIB->replaceImmWithSymbolRef(
1394             Instruction, Symbol, Relocation.Addend, Ctx.get(), Value,
1395             Relocation.Type);
1396         (void)Result;
1397         assert(Result && "cannot replace immediate with relocation");
1398 
1399         // For aarch64, if we replaced an immediate with a symbol from a
1400         // relocation, we mark it so we do not try to further process a
1401         // pc-relative operand. All we need is the symbol.
1402         UsedReloc = true;
1403       }
1404 
1405       if (!BC.isRISCV() && MIB->hasPCRelOperand(Instruction) && !UsedReloc) {
1406         if (auto NewE = handleErrors(
1407                 handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size),
1408                 [&](const BOLTError &E) -> Error {
1409                   if (E.isFatal())
1410                     return Error(std::make_unique<BOLTError>(std::move(E)));
1411                   if (!E.getMessage().empty())
1412                     E.log(BC.errs());
1413                   return Error::success();
1414                 }))
1415           return Error(std::move(NewE));
1416       }
1417     }
1418 
1419 add_instruction:
1420     if (getDWARFLineTable()) {
1421       Instruction.setLoc(findDebugLineInformationForInstructionAt(
1422           AbsoluteInstrAddr, getDWARFUnit(), getDWARFLineTable()));
1423     }
1424 
1425     // Record offset of the instruction for profile matching.
1426     if (BC.keepOffsetForInstruction(Instruction))
1427       MIB->setOffset(Instruction, static_cast<uint32_t>(Offset));
1428 
1429     if (BC.isX86() && BC.MIB->isNoop(Instruction)) {
1430       // NOTE: disassembly loses the correct size information for noops on x86.
1431       //       E.g. nopw 0x0(%rax,%rax,1) is 9 bytes, but re-encoded it's only
1432       //       5 bytes. Preserve the size info using annotations.
1433       MIB->setSize(Instruction, Size);
1434     }
1435 
1436     addInstruction(Offset, std::move(Instruction));
1437   }
1438 
1439   for (auto [Offset, Label] : InstructionLabels) {
1440     InstrMapType::iterator II = Instructions.find(Offset);
1441     assert(II != Instructions.end() && "reference to non-existing instruction");
1442 
1443     BC.MIB->setInstLabel(II->second, Label);
1444   }
1445 
1446   // Reset symbolizer for the disassembler.
1447   BC.SymbolicDisAsm->setSymbolizer(nullptr);
1448 
1449   if (uint64_t Offset = getFirstInstructionOffset())
1450     Labels[Offset] = BC.Ctx->createNamedTempSymbol();
1451 
1452   clearList(Relocations);
1453 
1454   if (!IsSimple) {
1455     clearList(Instructions);
1456     return createNonFatalBOLTError("");
1457   }
1458 
1459   updateState(State::Disassembled);
1460 
1461   return Error::success();
1462 }
1463 
1464 MCSymbol *BinaryFunction::registerBranch(uint64_t Src, uint64_t Dst) {
1465   assert(CurrentState == State::Disassembled &&
1466          "Cannot register branch unless function is in disassembled state.");
1467   assert(containsAddress(Src) && containsAddress(Dst) &&
1468          "Cannot register external branch.");
1469   MCSymbol *Target = getOrCreateLocalLabel(Dst);
1470   TakenBranches.emplace_back(Src - getAddress(), Dst - getAddress());
1471   return Target;
1472 }
1473 
1474 bool BinaryFunction::scanExternalRefs() {
1475   bool Success = true;
1476   bool DisassemblyFailed = false;
1477 
1478   // Ignore pseudo functions.
1479   if (isPseudo())
1480     return Success;
1481 
1482   if (opts::NoScan) {
1483     clearList(Relocations);
1484     clearList(ExternallyReferencedOffsets);
1485 
1486     return false;
1487   }
1488 
1489   // List of external references for this function.
1490   std::vector<Relocation> FunctionRelocations;
1491 
1492   static BinaryContext::IndependentCodeEmitter Emitter =
1493       BC.createIndependentMCCodeEmitter();
1494 
1495   ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
1496   assert(ErrorOrFunctionData && "function data is not available");
1497   ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
1498   assert(FunctionData.size() == getMaxSize() &&
1499          "function size does not match raw data size");
1500 
1501   BC.SymbolicDisAsm->setSymbolizer(
1502       BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));
1503 
1504   // Disassemble contents of the function. Detect code entry points and create
1505   // relocations for references to code that will be moved.
1506   uint64_t Size = 0; // instruction size
1507   for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
1508     // Check for data inside code and ignore it
1509     if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
1510       Size = DataInCodeSize;
1511       continue;
1512     }
1513 
1514     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1515     MCInst Instruction;
1516     if (!BC.SymbolicDisAsm->getInstruction(Instruction, Size,
1517                                            FunctionData.slice(Offset),
1518                                            AbsoluteInstrAddr, nulls())) {
1519       if (opts::Verbosity >= 1 && !isZeroPaddingAt(Offset)) {
1520         BC.errs()
1521             << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
1522             << Twine::utohexstr(Offset) << " (address 0x"
1523             << Twine::utohexstr(AbsoluteInstrAddr) << ") in function " << *this
1524             << '\n';
1525       }
1526       Success = false;
1527       DisassemblyFailed = true;
1528       break;
1529     }
1530 
1531     // Return true if we can skip handling the Target function reference.
1532     auto ignoreFunctionRef = [&](const BinaryFunction &Target) {
1533       if (&Target == this)
1534         return true;
1535 
1536       // Note that later we may decide not to emit Target function. In that
1537       // case, we conservatively create references that will be ignored or
1538       // resolved to the same function.
1539       if (!BC.shouldEmit(Target))
1540         return true;
1541 
1542       return false;
1543     };
1544 
1545     // Return true if we can ignore reference to the symbol.
1546     auto ignoreReference = [&](const MCSymbol *TargetSymbol) {
1547       if (!TargetSymbol)
1548         return true;
1549 
1550       if (BC.forceSymbolRelocations(TargetSymbol->getName()))
1551         return false;
1552 
1553       BinaryFunction *TargetFunction = BC.getFunctionForSymbol(TargetSymbol);
1554       if (!TargetFunction)
1555         return true;
1556 
1557       return ignoreFunctionRef(*TargetFunction);
1558     };
1559 
1560     // Handle calls and branches separately as symbolization doesn't work for
1561     // them yet.
1562     MCSymbol *BranchTargetSymbol = nullptr;
1563     if (BC.MIB->isCall(Instruction) || BC.MIB->isBranch(Instruction)) {
1564       uint64_t TargetAddress = 0;
1565       BC.MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
1566                              TargetAddress);
1567 
1568       // Create an entry point at reference address if needed.
1569       BinaryFunction *TargetFunction =
1570           BC.getBinaryFunctionContainingAddress(TargetAddress);
1571 
1572       if (!TargetFunction || ignoreFunctionRef(*TargetFunction))
1573         continue;
1574 
1575       const uint64_t FunctionOffset =
1576           TargetAddress - TargetFunction->getAddress();
1577       BranchTargetSymbol =
1578           FunctionOffset ? TargetFunction->addEntryPointAtOffset(FunctionOffset)
1579                          : TargetFunction->getSymbol();
1580     }
1581 
1582     // Can't find more references. Not creating relocations since we are not
1583     // moving code.
1584     if (!BC.HasRelocations)
1585       continue;
1586 
1587     if (BranchTargetSymbol) {
1588       BC.MIB->replaceBranchTarget(Instruction, BranchTargetSymbol,
1589                                   Emitter.LocalCtx.get());
1590     } else if (!llvm::any_of(Instruction,
1591                              [](const MCOperand &Op) { return Op.isExpr(); })) {
1592       // Skip assembly if the instruction may not have any symbolic operands.
1593       continue;
1594     }
1595 
1596     // Emit the instruction using temp emitter and generate relocations.
1597     SmallString<256> Code;
1598     SmallVector<MCFixup, 4> Fixups;
1599     Emitter.MCE->encodeInstruction(Instruction, Code, Fixups, *BC.STI);
1600 
1601     // Create relocation for every fixup.
1602     for (const MCFixup &Fixup : Fixups) {
1603       std::optional<Relocation> Rel = BC.MIB->createRelocation(Fixup, *BC.MAB);
1604       if (!Rel) {
1605         Success = false;
1606         continue;
1607       }
1608 
1609       if (ignoreReference(Rel->Symbol))
1610         continue;
1611 
1612       if (Relocation::getSizeForType(Rel->Type) < 4) {
1613         // If the instruction uses a short form, then we might not be able
1614         // to handle the rewrite without relaxation, and hence cannot reliably
1615         // create an external reference relocation.
1616         Success = false;
1617         continue;
1618       }
1619       Rel->Offset += getAddress() - getOriginSection()->getAddress() + Offset;
1620       FunctionRelocations.push_back(*Rel);
1621     }
1622 
1623     if (!Success)
1624       break;
1625   }
1626 
1627   // Reset symbolizer for the disassembler.
1628   BC.SymbolicDisAsm->setSymbolizer(nullptr);
1629 
1630   // Add relocations unless disassembly failed for this function.
1631   if (!DisassemblyFailed)
1632     for (Relocation &Rel : FunctionRelocations)
1633       getOriginSection()->addPendingRelocation(Rel);
1634 
1635   // Inform BinaryContext that this function symbols will not be defined and
1636   // relocations should not be created against them.
1637   if (BC.HasRelocations) {
1638     for (std::pair<const uint32_t, MCSymbol *> &LI : Labels)
1639       BC.UndefinedSymbols.insert(LI.second);
1640     for (MCSymbol *const EndLabel : FunctionEndLabels)
1641       if (EndLabel)
1642         BC.UndefinedSymbols.insert(EndLabel);
1643   }
1644 
1645   clearList(Relocations);
1646   clearList(ExternallyReferencedOffsets);
1647 
1648   if (Success && BC.HasRelocations)
1649     HasExternalRefRelocations = true;
1650 
1651   if (opts::Verbosity >= 1 && !Success)
1652     BC.outs() << "BOLT-INFO: failed to scan refs for  " << *this << '\n';
1653 
1654   return Success;
1655 }
1656 
1657 void BinaryFunction::postProcessEntryPoints() {
1658   if (!isSimple())
1659     return;
1660 
1661   for (auto &KV : Labels) {
1662     MCSymbol *Label = KV.second;
1663     if (!getSecondaryEntryPointSymbol(Label))
1664       continue;
1665 
1666     // In non-relocation mode there's potentially an external undetectable
1667     // reference to the entry point and hence we cannot move this entry
1668     // point. Optimizing without moving could be difficult.
1669     if (!BC.HasRelocations)
1670       setSimple(false);
1671 
1672     const uint32_t Offset = KV.first;
1673 
1674     // If we are at Offset 0 and there is no instruction associated with it,
1675     // this means this is an empty function. Just ignore. If we find an
1676     // instruction at this offset, this entry point is valid.
1677     if (!Offset || getInstructionAtOffset(Offset))
1678       continue;
1679 
1680     // On AArch64 there are legitimate reasons to have references past the
1681     // end of the function, e.g. jump tables.
1682     if (BC.isAArch64() && Offset == getSize())
1683       continue;
1684 
1685     BC.errs() << "BOLT-WARNING: reference in the middle of instruction "
1686                  "detected in function "
1687               << *this << " at offset 0x" << Twine::utohexstr(Offset) << '\n';
1688     if (BC.HasRelocations)
1689       setIgnored();
1690     setSimple(false);
1691     return;
1692   }
1693 }
1694 
1695 void BinaryFunction::postProcessJumpTables() {
1696   // Create labels for all entries.
1697   for (auto &JTI : JumpTables) {
1698     JumpTable &JT = *JTI.second;
1699     if (JT.Type == JumpTable::JTT_PIC && opts::JumpTables == JTS_BASIC) {
1700       opts::JumpTables = JTS_MOVE;
1701       BC.outs() << "BOLT-INFO: forcing -jump-tables=move as PIC jump table was "
1702                    "detected in function "
1703                 << *this << '\n';
1704     }
1705     const uint64_t BDSize =
1706         BC.getBinaryDataAtAddress(JT.getAddress())->getSize();
1707     if (!BDSize) {
1708       BC.setBinaryDataSize(JT.getAddress(), JT.getSize());
1709     } else {
1710       assert(BDSize >= JT.getSize() &&
1711              "jump table cannot be larger than the containing object");
1712     }
1713     if (!JT.Entries.empty())
1714       continue;
1715 
1716     bool HasOneParent = (JT.Parents.size() == 1);
1717     for (uint64_t EntryAddress : JT.EntriesAsAddress) {
1718       // builtin_unreachable does not belong to any function
1719       // Need to handle separately
1720       bool IsBuiltinUnreachable =
1721           llvm::any_of(JT.Parents, [&](const BinaryFunction *Parent) {
1722             return EntryAddress == Parent->getAddress() + Parent->getSize();
1723           });
1724       if (IsBuiltinUnreachable) {
1725         MCSymbol *Label = getOrCreateLocalLabel(EntryAddress, true);
1726         JT.Entries.push_back(Label);
1727         continue;
1728       }
1729       // Create a local label for targets that cannot be reached by other
1730       // fragments. Otherwise, create a secondary entry point in the target
1731       // function.
1732       BinaryFunction *TargetBF =
1733           BC.getBinaryFunctionContainingAddress(EntryAddress);
1734       MCSymbol *Label;
1735       if (HasOneParent && TargetBF == this) {
1736         Label = getOrCreateLocalLabel(EntryAddress, true);
1737       } else {
1738         const uint64_t Offset = EntryAddress - TargetBF->getAddress();
1739         Label = Offset ? TargetBF->addEntryPointAtOffset(Offset)
1740                        : TargetBF->getSymbol();
1741       }
1742       JT.Entries.push_back(Label);
1743     }
1744   }
1745 
1746   // Add TakenBranches from JumpTables.
1747   //
1748   // We want to do it after initial processing since we don't know jump tables'
1749   // boundaries until we process them all.
1750   for (auto &JTSite : JTSites) {
1751     const uint64_t JTSiteOffset = JTSite.first;
1752     const uint64_t JTAddress = JTSite.second;
1753     const JumpTable *JT = getJumpTableContainingAddress(JTAddress);
1754     assert(JT && "cannot find jump table for address");
1755 
1756     uint64_t EntryOffset = JTAddress - JT->getAddress();
1757     while (EntryOffset < JT->getSize()) {
1758       uint64_t EntryAddress = JT->EntriesAsAddress[EntryOffset / JT->EntrySize];
1759       uint64_t TargetOffset = EntryAddress - getAddress();
1760       if (TargetOffset < getSize()) {
1761         TakenBranches.emplace_back(JTSiteOffset, TargetOffset);
1762 
1763         if (opts::StrictMode)
1764           registerReferencedOffset(TargetOffset);
1765       }
1766 
1767       EntryOffset += JT->EntrySize;
1768 
1769       // A label at the next entry means the end of this jump table.
1770       if (JT->Labels.count(EntryOffset))
1771         break;
1772     }
1773   }
1774   clearList(JTSites);
1775 
1776   // Conservatively populate all possible destinations for unknown indirect
1777   // branches.
1778   if (opts::StrictMode && hasInternalReference()) {
1779     for (uint64_t Offset : UnknownIndirectBranchOffsets) {
1780       for (uint64_t PossibleDestination : ExternallyReferencedOffsets) {
1781         // Ignore __builtin_unreachable().
1782         if (PossibleDestination == getSize())
1783           continue;
1784         TakenBranches.emplace_back(Offset, PossibleDestination);
1785       }
1786     }
1787   }
1788 }
1789 
1790 bool BinaryFunction::validateExternallyReferencedOffsets() {
1791   SmallPtrSet<MCSymbol *, 4> JTTargets;
1792   for (const JumpTable *JT : llvm::make_second_range(JumpTables))
1793     JTTargets.insert(JT->Entries.begin(), JT->Entries.end());
1794 
1795   bool HasUnclaimedReference = false;
1796   for (uint64_t Destination : ExternallyReferencedOffsets) {
1797     // Ignore __builtin_unreachable().
1798     if (Destination == getSize())
1799       continue;
1800     // Ignore constant islands
1801     if (isInConstantIsland(Destination + getAddress()))
1802       continue;
1803 
1804     if (BinaryBasicBlock *BB = getBasicBlockAtOffset(Destination)) {
1805       // Check if the externally referenced offset is a recognized jump table
1806       // target.
1807       if (JTTargets.contains(BB->getLabel()))
1808         continue;
1809 
1810       if (opts::Verbosity >= 1) {
1811         BC.errs() << "BOLT-WARNING: unclaimed data to code reference (possibly "
1812                   << "an unrecognized jump table entry) to " << BB->getName()
1813                   << " in " << *this << "\n";
1814       }
1815       auto L = BC.scopeLock();
1816       addEntryPoint(*BB);
1817     } else {
1818       BC.errs() << "BOLT-WARNING: unknown data to code reference to offset "
1819                 << Twine::utohexstr(Destination) << " in " << *this << "\n";
1820       setIgnored();
1821     }
1822     HasUnclaimedReference = true;
1823   }
1824   return !HasUnclaimedReference;
1825 }
1826 
1827 bool BinaryFunction::postProcessIndirectBranches(
1828     MCPlusBuilder::AllocatorIdTy AllocId) {
1829   auto addUnknownControlFlow = [&](BinaryBasicBlock &BB) {
1830     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding unknown control flow in " << *this
1831                       << " for " << BB.getName() << "\n");
1832     HasUnknownControlFlow = true;
1833     BB.removeAllSuccessors();
1834     for (uint64_t PossibleDestination : ExternallyReferencedOffsets)
1835       if (BinaryBasicBlock *SuccBB = getBasicBlockAtOffset(PossibleDestination))
1836         BB.addSuccessor(SuccBB);
1837   };
1838 
1839   uint64_t NumIndirectJumps = 0;
1840   MCInst *LastIndirectJump = nullptr;
1841   BinaryBasicBlock *LastIndirectJumpBB = nullptr;
1842   uint64_t LastJT = 0;
1843   uint16_t LastJTIndexReg = BC.MIB->getNoRegister();
1844   for (BinaryBasicBlock &BB : blocks()) {
1845     for (BinaryBasicBlock::iterator II = BB.begin(); II != BB.end(); ++II) {
1846       MCInst &Instr = *II;
1847       if (!BC.MIB->isIndirectBranch(Instr))
1848         continue;
1849 
1850       // If there's an indirect branch in a single-block function -
1851       // it must be a tail call.
1852       if (BasicBlocks.size() == 1) {
1853         BC.MIB->convertJmpToTailCall(Instr);
1854         return true;
1855       }
1856 
1857       ++NumIndirectJumps;
1858 
1859       if (opts::StrictMode && !hasInternalReference()) {
1860         BC.MIB->convertJmpToTailCall(Instr);
1861         break;
1862       }
1863 
1864       // Validate the tail call or jump table assumptions now that we know
1865       // basic block boundaries.
1866       if (BC.MIB->isTailCall(Instr) || BC.MIB->getJumpTable(Instr)) {
1867         const unsigned PtrSize = BC.AsmInfo->getCodePointerSize();
1868         MCInst *MemLocInstr;
1869         unsigned BaseRegNum, IndexRegNum;
1870         int64_t DispValue;
1871         const MCExpr *DispExpr;
1872         MCInst *PCRelBaseInstr;
1873         IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
1874             Instr, BB.begin(), II, PtrSize, MemLocInstr, BaseRegNum,
1875             IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
1876         if (Type != IndirectBranchType::UNKNOWN || MemLocInstr != nullptr)
1877           continue;
1878 
1879         if (!opts::StrictMode)
1880           return false;
1881 
1882         if (BC.MIB->isTailCall(Instr)) {
1883           BC.MIB->convertTailCallToJmp(Instr);
1884         } else {
1885           LastIndirectJump = &Instr;
1886           LastIndirectJumpBB = &BB;
1887           LastJT = BC.MIB->getJumpTable(Instr);
1888           LastJTIndexReg = BC.MIB->getJumpTableIndexReg(Instr);
1889           BC.MIB->unsetJumpTable(Instr);
1890 
1891           JumpTable *JT = BC.getJumpTableContainingAddress(LastJT);
1892           if (JT->Type == JumpTable::JTT_NORMAL) {
1893             // Invalidating the jump table may also invalidate other jump table
1894             // boundaries. Until we have/need a support for this, mark the
1895             // function as non-simple.
1896             LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejected jump table reference"
1897                               << JT->getName() << " in " << *this << '\n');
1898             return false;
1899           }
1900         }
1901 
1902         addUnknownControlFlow(BB);
1903         continue;
1904       }
1905 
1906       // If this block contains an epilogue code and has an indirect branch,
1907       // then most likely it's a tail call. Otherwise, we cannot tell for sure
1908       // what it is and conservatively reject the function's CFG.
1909       bool IsEpilogue = llvm::any_of(BB, [&](const MCInst &Instr) {
1910         return BC.MIB->isLeave(Instr) || BC.MIB->isPop(Instr);
1911       });
1912       if (IsEpilogue) {
1913         BC.MIB->convertJmpToTailCall(Instr);
1914         BB.removeAllSuccessors();
1915         continue;
1916       }
1917 
1918       if (opts::Verbosity >= 2) {
1919         BC.outs() << "BOLT-INFO: rejected potential indirect tail call in "
1920                   << "function " << *this << " in basic block " << BB.getName()
1921                   << ".\n";
1922         LLVM_DEBUG(BC.printInstructions(dbgs(), BB.begin(), BB.end(),
1923                                         BB.getOffset(), this, true));
1924       }
1925 
1926       if (!opts::StrictMode)
1927         return false;
1928 
1929       addUnknownControlFlow(BB);
1930     }
1931   }
1932 
1933   if (HasInternalLabelReference)
1934     return false;
1935 
1936   // If there's only one jump table, and one indirect jump, and no other
1937   // references, then we should be able to derive the jump table even if we
1938   // fail to match the pattern.
1939   if (HasUnknownControlFlow && NumIndirectJumps == 1 &&
1940       JumpTables.size() == 1 && LastIndirectJump &&
1941       !BC.getJumpTableContainingAddress(LastJT)->IsSplit) {
1942     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: unsetting unknown control flow in "
1943                       << *this << '\n');
1944     BC.MIB->setJumpTable(*LastIndirectJump, LastJT, LastJTIndexReg, AllocId);
1945     HasUnknownControlFlow = false;
1946 
1947     LastIndirectJumpBB->updateJumpTableSuccessors();
1948   }
1949 
1950   // Validate that all data references to function offsets are claimed by
1951   // recognized jump tables. Register externally referenced blocks as entry
1952   // points.
1953   if (!opts::StrictMode && hasInternalReference()) {
1954     if (!validateExternallyReferencedOffsets())
1955       return false;
1956   }
1957 
1958   if (HasUnknownControlFlow && !BC.HasRelocations)
1959     return false;
1960 
1961   return true;
1962 }
1963 
1964 void BinaryFunction::recomputeLandingPads() {
1965   updateBBIndices(0);
1966 
1967   for (BinaryBasicBlock *BB : BasicBlocks) {
1968     BB->LandingPads.clear();
1969     BB->Throwers.clear();
1970   }
1971 
1972   for (BinaryBasicBlock *BB : BasicBlocks) {
1973     std::unordered_set<const BinaryBasicBlock *> BBLandingPads;
1974     for (MCInst &Instr : *BB) {
1975       if (!BC.MIB->isInvoke(Instr))
1976         continue;
1977 
1978       const std::optional<MCPlus::MCLandingPad> EHInfo =
1979           BC.MIB->getEHInfo(Instr);
1980       if (!EHInfo || !EHInfo->first)
1981         continue;
1982 
1983       BinaryBasicBlock *LPBlock = getBasicBlockForLabel(EHInfo->first);
1984       if (!BBLandingPads.count(LPBlock)) {
1985         BBLandingPads.insert(LPBlock);
1986         BB->LandingPads.emplace_back(LPBlock);
1987         LPBlock->Throwers.emplace_back(BB);
1988       }
1989     }
1990   }
1991 }
1992 
1993 Error BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
1994   auto &MIB = BC.MIB;
1995 
1996   if (!isSimple()) {
1997     assert(!BC.HasRelocations &&
1998            "cannot process file with non-simple function in relocs mode");
1999     return createNonFatalBOLTError("");
2000   }
2001 
2002   if (CurrentState != State::Disassembled)
2003     return createNonFatalBOLTError("");
2004 
2005   assert(BasicBlocks.empty() && "basic block list should be empty");
2006   assert((Labels.find(getFirstInstructionOffset()) != Labels.end()) &&
2007          "first instruction should always have a label");
2008 
2009   // Create basic blocks in the original layout order:
2010   //
2011   //  * Every instruction with associated label marks
2012   //    the beginning of a basic block.
2013   //  * Conditional instruction marks the end of a basic block,
2014   //    except when the following instruction is an
2015   //    unconditional branch, and the unconditional branch is not
2016   //    a destination of another branch. In the latter case, the
2017   //    basic block will consist of a single unconditional branch
2018   //    (missed "double-jump" optimization).
2019   //
2020   // Created basic blocks are sorted in layout order since they are
2021   // created in the same order as instructions, and instructions are
2022   // sorted by offsets.
2023   BinaryBasicBlock *InsertBB = nullptr;
2024   BinaryBasicBlock *PrevBB = nullptr;
2025   bool IsLastInstrNop = false;
2026   // Offset of the last non-nop instruction.
2027   uint64_t LastInstrOffset = 0;
2028 
2029   auto addCFIPlaceholders = [this](uint64_t CFIOffset,
2030                                    BinaryBasicBlock *InsertBB) {
2031     for (auto FI = OffsetToCFI.lower_bound(CFIOffset),
2032               FE = OffsetToCFI.upper_bound(CFIOffset);
2033          FI != FE; ++FI) {
2034       addCFIPseudo(InsertBB, InsertBB->end(), FI->second);
2035     }
2036   };
2037 
2038   // For profiling purposes we need to save the offset of the last instruction
2039   // in the basic block.
2040   // NOTE: nops always have an Offset annotation. Annotate the last non-nop as
2041   //       older profiles ignored nops.
2042   auto updateOffset = [&](uint64_t Offset) {
2043     assert(PrevBB && PrevBB != InsertBB && "invalid previous block");
2044     MCInst *LastNonNop = nullptr;
2045     for (BinaryBasicBlock::reverse_iterator RII = PrevBB->getLastNonPseudo(),
2046                                             E = PrevBB->rend();
2047          RII != E; ++RII) {
2048       if (!BC.MIB->isPseudo(*RII) && !BC.MIB->isNoop(*RII)) {
2049         LastNonNop = &*RII;
2050         break;
2051       }
2052     }
2053     if (LastNonNop && !MIB->getOffset(*LastNonNop))
2054       MIB->setOffset(*LastNonNop, static_cast<uint32_t>(Offset));
2055   };
2056 
2057   for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) {
2058     const uint32_t Offset = I->first;
2059     MCInst &Instr = I->second;
2060 
2061     auto LI = Labels.find(Offset);
2062     if (LI != Labels.end()) {
2063       // Always create new BB at branch destination.
2064       PrevBB = InsertBB ? InsertBB : PrevBB;
2065       InsertBB = addBasicBlockAt(LI->first, LI->second);
2066       if (opts::PreserveBlocksAlignment && IsLastInstrNop)
2067         InsertBB->setDerivedAlignment();
2068 
2069       if (PrevBB)
2070         updateOffset(LastInstrOffset);
2071     }
2072 
2073     // Mark all nops with Offset for profile tracking purposes.
2074     if (MIB->isNoop(Instr) && !MIB->getOffset(Instr)) {
2075       // If "Offset" annotation is not present, set it and mark the nop for
2076       // deletion.
2077       MIB->setOffset(Instr, static_cast<uint32_t>(Offset));
2078       // Annotate ordinary nops, so we can safely delete them if required.
2079       MIB->addAnnotation(Instr, "NOP", static_cast<uint32_t>(1), AllocatorId);
2080     }
2081 
2082     if (!InsertBB) {
2083       // It must be a fallthrough or unreachable code. Create a new block unless
2084       // we see an unconditional branch following a conditional one. The latter
2085       // should not be a conditional tail call.
2086       assert(PrevBB && "no previous basic block for a fall through");
2087       MCInst *PrevInstr = PrevBB->getLastNonPseudoInstr();
2088       assert(PrevInstr && "no previous instruction for a fall through");
2089       if (MIB->isUnconditionalBranch(Instr) &&
2090           !MIB->isIndirectBranch(*PrevInstr) &&
2091           !MIB->isUnconditionalBranch(*PrevInstr) &&
2092           !MIB->getConditionalTailCall(*PrevInstr) &&
2093           !MIB->isReturn(*PrevInstr)) {
2094         // Temporarily restore inserter basic block.
2095         InsertBB = PrevBB;
2096       } else {
2097         MCSymbol *Label;
2098         {
2099           auto L = BC.scopeLock();
2100           Label = BC.Ctx->createNamedTempSymbol("FT");
2101         }
2102         InsertBB = addBasicBlockAt(Offset, Label);
2103         if (opts::PreserveBlocksAlignment && IsLastInstrNop)
2104           InsertBB->setDerivedAlignment();
2105         updateOffset(LastInstrOffset);
2106       }
2107     }
2108     if (Offset == getFirstInstructionOffset()) {
2109       // Add associated CFI pseudos in the first offset
2110       addCFIPlaceholders(Offset, InsertBB);
2111     }
2112 
2113     const bool IsBlockEnd = MIB->isTerminator(Instr);
2114     IsLastInstrNop = MIB->isNoop(Instr);
2115     if (!IsLastInstrNop)
2116       LastInstrOffset = Offset;
2117     InsertBB->addInstruction(std::move(Instr));
2118 
2119     // Add associated CFI instrs. We always add the CFI instruction that is
2120     // located immediately after this instruction, since the next CFI
2121     // instruction reflects the change in state caused by this instruction.
2122     auto NextInstr = std::next(I);
2123     uint64_t CFIOffset;
2124     if (NextInstr != E)
2125       CFIOffset = NextInstr->first;
2126     else
2127       CFIOffset = getSize();
2128 
2129     // Note: this potentially invalidates instruction pointers/iterators.
2130     addCFIPlaceholders(CFIOffset, InsertBB);
2131 
2132     if (IsBlockEnd) {
2133       PrevBB = InsertBB;
2134       InsertBB = nullptr;
2135     }
2136   }
2137 
2138   if (BasicBlocks.empty()) {
2139     setSimple(false);
2140     return createNonFatalBOLTError("");
2141   }
2142 
2143   // Intermediate dump.
2144   LLVM_DEBUG(print(dbgs(), "after creating basic blocks"));
2145 
2146   // TODO: handle properly calls to no-return functions,
2147   // e.g. exit(3), etc. Otherwise we'll see a false fall-through
2148   // blocks.
2149 
2150   // Remove duplicates branches. We can get a bunch of them from jump tables.
2151   // Without doing jump table value profiling we don't have a use for extra
2152   // (duplicate) branches.
2153   llvm::sort(TakenBranches);
2154   auto NewEnd = std::unique(TakenBranches.begin(), TakenBranches.end());
2155   TakenBranches.erase(NewEnd, TakenBranches.end());
2156 
2157   for (std::pair<uint32_t, uint32_t> &Branch : TakenBranches) {
2158     LLVM_DEBUG(dbgs() << "registering branch [0x"
2159                       << Twine::utohexstr(Branch.first) << "] -> [0x"
2160                       << Twine::utohexstr(Branch.second) << "]\n");
2161     BinaryBasicBlock *FromBB = getBasicBlockContainingOffset(Branch.first);
2162     BinaryBasicBlock *ToBB = getBasicBlockAtOffset(Branch.second);
2163     if (!FromBB || !ToBB) {
2164       if (!FromBB)
2165         BC.errs() << "BOLT-ERROR: cannot find BB containing the branch.\n";
2166       if (!ToBB)
2167         BC.errs()
2168             << "BOLT-ERROR: cannot find BB containing branch destination.\n";
2169       return createFatalBOLTError(BC.generateBugReportMessage(
2170           "disassembly failed - inconsistent branch found.", *this));
2171     }
2172 
2173     FromBB->addSuccessor(ToBB);
2174   }
2175 
2176   // Add fall-through branches.
2177   PrevBB = nullptr;
2178   bool IsPrevFT = false; // Is previous block a fall-through.
2179   for (BinaryBasicBlock *BB : BasicBlocks) {
2180     if (IsPrevFT)
2181       PrevBB->addSuccessor(BB);
2182 
2183     if (BB->empty()) {
2184       IsPrevFT = true;
2185       PrevBB = BB;
2186       continue;
2187     }
2188 
2189     MCInst *LastInstr = BB->getLastNonPseudoInstr();
2190     assert(LastInstr &&
2191            "should have non-pseudo instruction in non-empty block");
2192 
2193     if (BB->succ_size() == 0) {
2194       // Since there's no existing successors, we know the last instruction is
2195       // not a conditional branch. Thus if it's a terminator, it shouldn't be a
2196       // fall-through.
2197       //
2198       // Conditional tail call is a special case since we don't add a taken
2199       // branch successor for it.
2200       IsPrevFT = !MIB->isTerminator(*LastInstr) ||
2201                  MIB->getConditionalTailCall(*LastInstr);
2202     } else if (BB->succ_size() == 1) {
2203       IsPrevFT = MIB->isConditionalBranch(*LastInstr);
2204     } else {
2205       IsPrevFT = false;
2206     }
2207 
2208     PrevBB = BB;
2209   }
2210 
2211   // Assign landing pads and throwers info.
2212   recomputeLandingPads();
2213 
2214   // Assign CFI information to each BB entry.
2215   annotateCFIState();
2216 
2217   // Annotate invoke instructions with GNU_args_size data.
2218   propagateGnuArgsSizeInfo(AllocatorId);
2219 
2220   // Set the basic block layout to the original order and set end offsets.
2221   PrevBB = nullptr;
2222   for (BinaryBasicBlock *BB : BasicBlocks) {
2223     Layout.addBasicBlock(BB);
2224     if (PrevBB)
2225       PrevBB->setEndOffset(BB->getOffset());
2226     PrevBB = BB;
2227   }
2228   PrevBB->setEndOffset(getSize());
2229 
2230   Layout.updateLayoutIndices();
2231 
2232   normalizeCFIState();
2233 
2234   // Clean-up memory taken by intermediate structures.
2235   //
2236   // NB: don't clear Labels list as we may need them if we mark the function
2237   //     as non-simple later in the process of discovering extra entry points.
2238   clearList(Instructions);
2239   clearList(OffsetToCFI);
2240   clearList(TakenBranches);
2241 
2242   // Update the state.
2243   CurrentState = State::CFG;
2244 
2245   // Make any necessary adjustments for indirect branches.
2246   if (!postProcessIndirectBranches(AllocatorId)) {
2247     if (opts::Verbosity) {
2248       BC.errs() << "BOLT-WARNING: failed to post-process indirect branches for "
2249                 << *this << '\n';
2250     }
2251     // In relocation mode we want to keep processing the function but avoid
2252     // optimizing it.
2253     setSimple(false);
2254   }
2255 
2256   clearList(ExternallyReferencedOffsets);
2257   clearList(UnknownIndirectBranchOffsets);
2258 
2259   return Error::success();
2260 }
2261 
2262 void BinaryFunction::postProcessCFG() {
2263   if (isSimple() && !BasicBlocks.empty()) {
2264     // Convert conditional tail call branches to conditional branches that jump
2265     // to a tail call.
2266     removeConditionalTailCalls();
2267 
2268     postProcessProfile();
2269 
2270     // Eliminate inconsistencies between branch instructions and CFG.
2271     postProcessBranches();
2272   }
2273 
2274   calculateMacroOpFusionStats();
2275 
2276   // The final cleanup of intermediate structures.
2277   clearList(IgnoredBranches);
2278 
2279   // Remove "Offset" annotations, unless we need an address-translation table
2280   // later. This has no cost, since annotations are allocated by a bumpptr
2281   // allocator and won't be released anyway until late in the pipeline.
2282   if (!requiresAddressTranslation() && !opts::Instrument) {
2283     for (BinaryBasicBlock &BB : blocks())
2284       for (MCInst &Inst : BB)
2285         BC.MIB->clearOffset(Inst);
2286   }
2287 
2288   assert((!isSimple() || validateCFG()) &&
2289          "invalid CFG detected after post-processing");
2290 }
2291 
2292 void BinaryFunction::calculateMacroOpFusionStats() {
2293   if (!getBinaryContext().isX86())
2294     return;
2295   for (const BinaryBasicBlock &BB : blocks()) {
2296     auto II = BB.getMacroOpFusionPair();
2297     if (II == BB.end())
2298       continue;
2299 
2300     // Check offset of the second instruction.
2301     // FIXME: arch-specific.
2302     const uint32_t Offset = BC.MIB->getOffsetWithDefault(*std::next(II), 0);
2303     if (!Offset || (getAddress() + Offset) % 64)
2304       continue;
2305 
2306     LLVM_DEBUG(dbgs() << "\nmissed macro-op fusion at address 0x"
2307                       << Twine::utohexstr(getAddress() + Offset)
2308                       << " in function " << *this << "; executed "
2309                       << BB.getKnownExecutionCount() << " times.\n");
2310     ++BC.Stats.MissedMacroFusionPairs;
2311     BC.Stats.MissedMacroFusionExecCount += BB.getKnownExecutionCount();
2312   }
2313 }
2314 
2315 void BinaryFunction::removeTagsFromProfile() {
2316   for (BinaryBasicBlock *BB : BasicBlocks) {
2317     if (BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
2318       BB->ExecutionCount = 0;
2319     for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) {
2320       if (BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
2321           BI.MispredictedCount != BinaryBasicBlock::COUNT_NO_PROFILE)
2322         continue;
2323       BI.Count = 0;
2324       BI.MispredictedCount = 0;
2325     }
2326   }
2327 }
2328 
2329 void BinaryFunction::removeConditionalTailCalls() {
2330   // Blocks to be appended at the end.
2331   std::vector<std::unique_ptr<BinaryBasicBlock>> NewBlocks;
2332 
2333   for (auto BBI = begin(); BBI != end(); ++BBI) {
2334     BinaryBasicBlock &BB = *BBI;
2335     MCInst *CTCInstr = BB.getLastNonPseudoInstr();
2336     if (!CTCInstr)
2337       continue;
2338 
2339     std::optional<uint64_t> TargetAddressOrNone =
2340         BC.MIB->getConditionalTailCall(*CTCInstr);
2341     if (!TargetAddressOrNone)
2342       continue;
2343 
2344     // Gather all necessary information about CTC instruction before
2345     // annotations are destroyed.
2346     const int32_t CFIStateBeforeCTC = BB.getCFIStateAtInstr(CTCInstr);
2347     uint64_t CTCTakenCount = BinaryBasicBlock::COUNT_NO_PROFILE;
2348     uint64_t CTCMispredCount = BinaryBasicBlock::COUNT_NO_PROFILE;
2349     if (hasValidProfile()) {
2350       CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
2351           *CTCInstr, "CTCTakenCount");
2352       CTCMispredCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
2353           *CTCInstr, "CTCMispredCount");
2354     }
2355 
2356     // Assert that the tail call does not throw.
2357     assert(!BC.MIB->getEHInfo(*CTCInstr) &&
2358            "found tail call with associated landing pad");
2359 
2360     // Create a basic block with an unconditional tail call instruction using
2361     // the same destination.
2362     const MCSymbol *CTCTargetLabel = BC.MIB->getTargetSymbol(*CTCInstr);
2363     assert(CTCTargetLabel && "symbol expected for conditional tail call");
2364     MCInst TailCallInstr;
2365     BC.MIB->createTailCall(TailCallInstr, CTCTargetLabel, BC.Ctx.get());
2366 
2367     // Move offset from CTCInstr to TailCallInstr.
2368     if (const std::optional<uint32_t> Offset = BC.MIB->getOffset(*CTCInstr)) {
2369       BC.MIB->setOffset(TailCallInstr, *Offset);
2370       BC.MIB->clearOffset(*CTCInstr);
2371     }
2372 
2373     // Link new BBs to the original input offset of the BB where the CTC
2374     // is, so we can map samples recorded in new BBs back to the original BB
2375     // seem in the input binary (if using BAT)
2376     std::unique_ptr<BinaryBasicBlock> TailCallBB =
2377         createBasicBlock(BC.Ctx->createNamedTempSymbol("TC"));
2378     TailCallBB->setOffset(BB.getInputOffset());
2379     TailCallBB->addInstruction(TailCallInstr);
2380     TailCallBB->setCFIState(CFIStateBeforeCTC);
2381 
2382     // Add CFG edge with profile info from BB to TailCallBB.
2383     BB.addSuccessor(TailCallBB.get(), CTCTakenCount, CTCMispredCount);
2384 
2385     // Add execution count for the block.
2386     TailCallBB->setExecutionCount(CTCTakenCount);
2387 
2388     BC.MIB->convertTailCallToJmp(*CTCInstr);
2389 
2390     BC.MIB->replaceBranchTarget(*CTCInstr, TailCallBB->getLabel(),
2391                                 BC.Ctx.get());
2392 
2393     // Add basic block to the list that will be added to the end.
2394     NewBlocks.emplace_back(std::move(TailCallBB));
2395 
2396     // Swap edges as the TailCallBB corresponds to the taken branch.
2397     BB.swapConditionalSuccessors();
2398 
2399     // This branch is no longer a conditional tail call.
2400     BC.MIB->unsetConditionalTailCall(*CTCInstr);
2401   }
2402 
2403   insertBasicBlocks(std::prev(end()), std::move(NewBlocks),
2404                     /* UpdateLayout */ true,
2405                     /* UpdateCFIState */ false);
2406 }
2407 
2408 uint64_t BinaryFunction::getFunctionScore() const {
2409   if (FunctionScore != -1)
2410     return FunctionScore;
2411 
2412   if (!isSimple() || !hasValidProfile()) {
2413     FunctionScore = 0;
2414     return FunctionScore;
2415   }
2416 
2417   uint64_t TotalScore = 0ULL;
2418   for (const BinaryBasicBlock &BB : blocks()) {
2419     uint64_t BBExecCount = BB.getExecutionCount();
2420     if (BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
2421       continue;
2422     TotalScore += BBExecCount * BB.getNumNonPseudos();
2423   }
2424   FunctionScore = TotalScore;
2425   return FunctionScore;
2426 }
2427 
2428 void BinaryFunction::annotateCFIState() {
2429   assert(CurrentState == State::Disassembled && "unexpected function state");
2430   assert(!BasicBlocks.empty() && "basic block list should not be empty");
2431 
2432   // This is an index of the last processed CFI in FDE CFI program.
2433   uint32_t State = 0;
2434 
2435   // This is an index of RememberState CFI reflecting effective state right
2436   // after execution of RestoreState CFI.
2437   //
2438   // It differs from State iff the CFI at (State-1)
2439   // was RestoreState (modulo GNU_args_size CFIs, which are ignored).
2440   //
2441   // This allows us to generate shorter replay sequences when producing new
2442   // CFI programs.
2443   uint32_t EffectiveState = 0;
2444 
2445   // For tracking RememberState/RestoreState sequences.
2446   std::stack<uint32_t> StateStack;
2447 
2448   for (BinaryBasicBlock *BB : BasicBlocks) {
2449     BB->setCFIState(EffectiveState);
2450 
2451     for (const MCInst &Instr : *BB) {
2452       const MCCFIInstruction *CFI = getCFIFor(Instr);
2453       if (!CFI)
2454         continue;
2455 
2456       ++State;
2457 
2458       switch (CFI->getOperation()) {
2459       case MCCFIInstruction::OpRememberState:
2460         StateStack.push(EffectiveState);
2461         EffectiveState = State;
2462         break;
2463       case MCCFIInstruction::OpRestoreState:
2464         assert(!StateStack.empty() && "corrupt CFI stack");
2465         EffectiveState = StateStack.top();
2466         StateStack.pop();
2467         break;
2468       case MCCFIInstruction::OpGnuArgsSize:
2469         // OpGnuArgsSize CFIs do not affect the CFI state.
2470         break;
2471       default:
2472         // Any other CFI updates the state.
2473         EffectiveState = State;
2474         break;
2475       }
2476     }
2477   }
2478 
2479   assert(StateStack.empty() && "corrupt CFI stack");
2480 }
2481 
2482 namespace {
2483 
2484 /// Our full interpretation of a DWARF CFI machine state at a given point
2485 struct CFISnapshot {
2486   /// CFA register number and offset defining the canonical frame at this
2487   /// point, or the number of a rule (CFI state) that computes it with a
2488   /// DWARF expression. This number will be negative if it refers to a CFI
2489   /// located in the CIE instead of the FDE.
2490   uint32_t CFAReg;
2491   int32_t CFAOffset;
2492   int32_t CFARule;
2493   /// Mapping of rules (CFI states) that define the location of each
2494   /// register. If absent, no rule defining the location of such register
2495   /// was ever read. This number will be negative if it refers to a CFI
2496   /// located in the CIE instead of the FDE.
2497   DenseMap<int32_t, int32_t> RegRule;
2498 
2499   /// References to CIE, FDE and expanded instructions after a restore state
2500   const BinaryFunction::CFIInstrMapType &CIE;
2501   const BinaryFunction::CFIInstrMapType &FDE;
2502   const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents;
2503 
2504   /// Current FDE CFI number representing the state where the snapshot is at
2505   int32_t CurState;
2506 
2507   /// Used when we don't have information about which state/rule to apply
2508   /// to recover the location of either the CFA or a specific register
2509   constexpr static int32_t UNKNOWN = std::numeric_limits<int32_t>::min();
2510 
2511 private:
2512   /// Update our snapshot by executing a single CFI
2513   void update(const MCCFIInstruction &Instr, int32_t RuleNumber) {
2514     switch (Instr.getOperation()) {
2515     case MCCFIInstruction::OpSameValue:
2516     case MCCFIInstruction::OpRelOffset:
2517     case MCCFIInstruction::OpOffset:
2518     case MCCFIInstruction::OpRestore:
2519     case MCCFIInstruction::OpUndefined:
2520     case MCCFIInstruction::OpRegister:
2521       RegRule[Instr.getRegister()] = RuleNumber;
2522       break;
2523     case MCCFIInstruction::OpDefCfaRegister:
2524       CFAReg = Instr.getRegister();
2525       CFARule = UNKNOWN;
2526 
2527       // This shouldn't happen according to the spec but GNU binutils on RISC-V
2528       // emits a DW_CFA_def_cfa_register in CIE's which leaves the offset
2529       // unspecified. Both readelf and llvm-dwarfdump interpret the offset as 0
2530       // in this case so let's do the same.
2531       if (CFAOffset == UNKNOWN)
2532         CFAOffset = 0;
2533       break;
2534     case MCCFIInstruction::OpDefCfaOffset:
2535       CFAOffset = Instr.getOffset();
2536       CFARule = UNKNOWN;
2537       break;
2538     case MCCFIInstruction::OpDefCfa:
2539       CFAReg = Instr.getRegister();
2540       CFAOffset = Instr.getOffset();
2541       CFARule = UNKNOWN;
2542       break;
2543     case MCCFIInstruction::OpEscape: {
2544       std::optional<uint8_t> Reg =
2545           readDWARFExpressionTargetReg(Instr.getValues());
2546       // Handle DW_CFA_def_cfa_expression
2547       if (!Reg) {
2548         CFARule = RuleNumber;
2549         break;
2550       }
2551       RegRule[*Reg] = RuleNumber;
2552       break;
2553     }
2554     case MCCFIInstruction::OpAdjustCfaOffset:
2555     case MCCFIInstruction::OpWindowSave:
2556     case MCCFIInstruction::OpNegateRAState:
2557     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2558       llvm_unreachable("unsupported CFI opcode");
2559       break;
2560     case MCCFIInstruction::OpRememberState:
2561     case MCCFIInstruction::OpRestoreState:
2562     case MCCFIInstruction::OpGnuArgsSize:
2563       // do not affect CFI state
2564       break;
2565     }
2566   }
2567 
2568 public:
2569   /// Advance state reading FDE CFI instructions up to State number
2570   void advanceTo(int32_t State) {
2571     for (int32_t I = CurState, E = State; I != E; ++I) {
2572       const MCCFIInstruction &Instr = FDE[I];
2573       if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) {
2574         update(Instr, I);
2575         continue;
2576       }
2577       // If restore state instruction, fetch the equivalent CFIs that have
2578       // the same effect of this restore. This is used to ensure remember-
2579       // restore pairs are completely removed.
2580       auto Iter = FrameRestoreEquivalents.find(I);
2581       if (Iter == FrameRestoreEquivalents.end())
2582         continue;
2583       for (int32_t RuleNumber : Iter->second)
2584         update(FDE[RuleNumber], RuleNumber);
2585     }
2586 
2587     assert(((CFAReg != (uint32_t)UNKNOWN && CFAOffset != UNKNOWN) ||
2588             CFARule != UNKNOWN) &&
2589            "CIE did not define default CFA?");
2590 
2591     CurState = State;
2592   }
2593 
2594   /// Interpret all CIE and FDE instructions up until CFI State number and
2595   /// populate this snapshot
2596   CFISnapshot(
2597       const BinaryFunction::CFIInstrMapType &CIE,
2598       const BinaryFunction::CFIInstrMapType &FDE,
2599       const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents,
2600       int32_t State)
2601       : CIE(CIE), FDE(FDE), FrameRestoreEquivalents(FrameRestoreEquivalents) {
2602     CFAReg = UNKNOWN;
2603     CFAOffset = UNKNOWN;
2604     CFARule = UNKNOWN;
2605     CurState = 0;
2606 
2607     for (int32_t I = 0, E = CIE.size(); I != E; ++I) {
2608       const MCCFIInstruction &Instr = CIE[I];
2609       update(Instr, -I);
2610     }
2611 
2612     advanceTo(State);
2613   }
2614 };
2615 
2616 /// A CFI snapshot with the capability of checking if incremental additions to
2617 /// it are redundant. This is used to ensure we do not emit two CFI instructions
2618 /// back-to-back that are doing the same state change, or to avoid emitting a
2619 /// CFI at all when the state at that point would not be modified after that CFI
2620 struct CFISnapshotDiff : public CFISnapshot {
2621   bool RestoredCFAReg{false};
2622   bool RestoredCFAOffset{false};
2623   DenseMap<int32_t, bool> RestoredRegs;
2624 
2625   CFISnapshotDiff(const CFISnapshot &S) : CFISnapshot(S) {}
2626 
2627   CFISnapshotDiff(
2628       const BinaryFunction::CFIInstrMapType &CIE,
2629       const BinaryFunction::CFIInstrMapType &FDE,
2630       const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents,
2631       int32_t State)
2632       : CFISnapshot(CIE, FDE, FrameRestoreEquivalents, State) {}
2633 
2634   /// Return true if applying Instr to this state is redundant and can be
2635   /// dismissed.
2636   bool isRedundant(const MCCFIInstruction &Instr) {
2637     switch (Instr.getOperation()) {
2638     case MCCFIInstruction::OpSameValue:
2639     case MCCFIInstruction::OpRelOffset:
2640     case MCCFIInstruction::OpOffset:
2641     case MCCFIInstruction::OpRestore:
2642     case MCCFIInstruction::OpUndefined:
2643     case MCCFIInstruction::OpRegister:
2644     case MCCFIInstruction::OpEscape: {
2645       uint32_t Reg;
2646       if (Instr.getOperation() != MCCFIInstruction::OpEscape) {
2647         Reg = Instr.getRegister();
2648       } else {
2649         std::optional<uint8_t> R =
2650             readDWARFExpressionTargetReg(Instr.getValues());
2651         // Handle DW_CFA_def_cfa_expression
2652         if (!R) {
2653           if (RestoredCFAReg && RestoredCFAOffset)
2654             return true;
2655           RestoredCFAReg = true;
2656           RestoredCFAOffset = true;
2657           return false;
2658         }
2659         Reg = *R;
2660       }
2661       if (RestoredRegs[Reg])
2662         return true;
2663       RestoredRegs[Reg] = true;
2664       const int32_t CurRegRule = RegRule.contains(Reg) ? RegRule[Reg] : UNKNOWN;
2665       if (CurRegRule == UNKNOWN) {
2666         if (Instr.getOperation() == MCCFIInstruction::OpRestore ||
2667             Instr.getOperation() == MCCFIInstruction::OpSameValue)
2668           return true;
2669         return false;
2670       }
2671       const MCCFIInstruction &LastDef =
2672           CurRegRule < 0 ? CIE[-CurRegRule] : FDE[CurRegRule];
2673       return LastDef == Instr;
2674     }
2675     case MCCFIInstruction::OpDefCfaRegister:
2676       if (RestoredCFAReg)
2677         return true;
2678       RestoredCFAReg = true;
2679       return CFAReg == Instr.getRegister();
2680     case MCCFIInstruction::OpDefCfaOffset:
2681       if (RestoredCFAOffset)
2682         return true;
2683       RestoredCFAOffset = true;
2684       return CFAOffset == Instr.getOffset();
2685     case MCCFIInstruction::OpDefCfa:
2686       if (RestoredCFAReg && RestoredCFAOffset)
2687         return true;
2688       RestoredCFAReg = true;
2689       RestoredCFAOffset = true;
2690       return CFAReg == Instr.getRegister() && CFAOffset == Instr.getOffset();
2691     case MCCFIInstruction::OpAdjustCfaOffset:
2692     case MCCFIInstruction::OpWindowSave:
2693     case MCCFIInstruction::OpNegateRAState:
2694     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2695       llvm_unreachable("unsupported CFI opcode");
2696       return false;
2697     case MCCFIInstruction::OpRememberState:
2698     case MCCFIInstruction::OpRestoreState:
2699     case MCCFIInstruction::OpGnuArgsSize:
2700       // do not affect CFI state
2701       return true;
2702     }
2703     return false;
2704   }
2705 };
2706 
2707 } // end anonymous namespace
2708 
2709 bool BinaryFunction::replayCFIInstrs(int32_t FromState, int32_t ToState,
2710                                      BinaryBasicBlock *InBB,
2711                                      BinaryBasicBlock::iterator InsertIt) {
2712   if (FromState == ToState)
2713     return true;
2714   assert(FromState < ToState && "can only replay CFIs forward");
2715 
2716   CFISnapshotDiff CFIDiff(CIEFrameInstructions, FrameInstructions,
2717                           FrameRestoreEquivalents, FromState);
2718 
2719   std::vector<uint32_t> NewCFIs;
2720   for (int32_t CurState = FromState; CurState < ToState; ++CurState) {
2721     MCCFIInstruction *Instr = &FrameInstructions[CurState];
2722     if (Instr->getOperation() == MCCFIInstruction::OpRestoreState) {
2723       auto Iter = FrameRestoreEquivalents.find(CurState);
2724       assert(Iter != FrameRestoreEquivalents.end());
2725       NewCFIs.insert(NewCFIs.end(), Iter->second.begin(), Iter->second.end());
2726       // RestoreState / Remember will be filtered out later by CFISnapshotDiff,
2727       // so we might as well fall-through here.
2728     }
2729     NewCFIs.push_back(CurState);
2730   }
2731 
2732   // Replay instructions while avoiding duplicates
2733   for (int32_t State : llvm::reverse(NewCFIs)) {
2734     if (CFIDiff.isRedundant(FrameInstructions[State]))
2735       continue;
2736     InsertIt = addCFIPseudo(InBB, InsertIt, State);
2737   }
2738 
2739   return true;
2740 }
2741 
2742 SmallVector<int32_t, 4>
2743 BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState,
2744                                BinaryBasicBlock *InBB,
2745                                BinaryBasicBlock::iterator &InsertIt) {
2746   SmallVector<int32_t, 4> NewStates;
2747 
2748   CFISnapshot ToCFITable(CIEFrameInstructions, FrameInstructions,
2749                          FrameRestoreEquivalents, ToState);
2750   CFISnapshotDiff FromCFITable(ToCFITable);
2751   FromCFITable.advanceTo(FromState);
2752 
2753   auto undoStateDefCfa = [&]() {
2754     if (ToCFITable.CFARule == CFISnapshot::UNKNOWN) {
2755       FrameInstructions.emplace_back(MCCFIInstruction::cfiDefCfa(
2756           nullptr, ToCFITable.CFAReg, ToCFITable.CFAOffset));
2757       if (FromCFITable.isRedundant(FrameInstructions.back())) {
2758         FrameInstructions.pop_back();
2759         return;
2760       }
2761       NewStates.push_back(FrameInstructions.size() - 1);
2762       InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size() - 1);
2763       ++InsertIt;
2764     } else if (ToCFITable.CFARule < 0) {
2765       if (FromCFITable.isRedundant(CIEFrameInstructions[-ToCFITable.CFARule]))
2766         return;
2767       NewStates.push_back(FrameInstructions.size());
2768       InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size());
2769       ++InsertIt;
2770       FrameInstructions.emplace_back(CIEFrameInstructions[-ToCFITable.CFARule]);
2771     } else if (!FromCFITable.isRedundant(
2772                    FrameInstructions[ToCFITable.CFARule])) {
2773       NewStates.push_back(ToCFITable.CFARule);
2774       InsertIt = addCFIPseudo(InBB, InsertIt, ToCFITable.CFARule);
2775       ++InsertIt;
2776     }
2777   };
2778 
2779   auto undoState = [&](const MCCFIInstruction &Instr) {
2780     switch (Instr.getOperation()) {
2781     case MCCFIInstruction::OpRememberState:
2782     case MCCFIInstruction::OpRestoreState:
2783       break;
2784     case MCCFIInstruction::OpSameValue:
2785     case MCCFIInstruction::OpRelOffset:
2786     case MCCFIInstruction::OpOffset:
2787     case MCCFIInstruction::OpRestore:
2788     case MCCFIInstruction::OpUndefined:
2789     case MCCFIInstruction::OpEscape:
2790     case MCCFIInstruction::OpRegister: {
2791       uint32_t Reg;
2792       if (Instr.getOperation() != MCCFIInstruction::OpEscape) {
2793         Reg = Instr.getRegister();
2794       } else {
2795         std::optional<uint8_t> R =
2796             readDWARFExpressionTargetReg(Instr.getValues());
2797         // Handle DW_CFA_def_cfa_expression
2798         if (!R) {
2799           undoStateDefCfa();
2800           return;
2801         }
2802         Reg = *R;
2803       }
2804 
2805       if (!ToCFITable.RegRule.contains(Reg)) {
2806         FrameInstructions.emplace_back(
2807             MCCFIInstruction::createRestore(nullptr, Reg));
2808         if (FromCFITable.isRedundant(FrameInstructions.back())) {
2809           FrameInstructions.pop_back();
2810           break;
2811         }
2812         NewStates.push_back(FrameInstructions.size() - 1);
2813         InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size() - 1);
2814         ++InsertIt;
2815         break;
2816       }
2817       const int32_t Rule = ToCFITable.RegRule[Reg];
2818       if (Rule < 0) {
2819         if (FromCFITable.isRedundant(CIEFrameInstructions[-Rule]))
2820           break;
2821         NewStates.push_back(FrameInstructions.size());
2822         InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size());
2823         ++InsertIt;
2824         FrameInstructions.emplace_back(CIEFrameInstructions[-Rule]);
2825         break;
2826       }
2827       if (FromCFITable.isRedundant(FrameInstructions[Rule]))
2828         break;
2829       NewStates.push_back(Rule);
2830       InsertIt = addCFIPseudo(InBB, InsertIt, Rule);
2831       ++InsertIt;
2832       break;
2833     }
2834     case MCCFIInstruction::OpDefCfaRegister:
2835     case MCCFIInstruction::OpDefCfaOffset:
2836     case MCCFIInstruction::OpDefCfa:
2837       undoStateDefCfa();
2838       break;
2839     case MCCFIInstruction::OpAdjustCfaOffset:
2840     case MCCFIInstruction::OpWindowSave:
2841     case MCCFIInstruction::OpNegateRAState:
2842     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2843       llvm_unreachable("unsupported CFI opcode");
2844       break;
2845     case MCCFIInstruction::OpGnuArgsSize:
2846       // do not affect CFI state
2847       break;
2848     }
2849   };
2850 
2851   // Undo all modifications from ToState to FromState
2852   for (int32_t I = ToState, E = FromState; I != E; ++I) {
2853     const MCCFIInstruction &Instr = FrameInstructions[I];
2854     if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) {
2855       undoState(Instr);
2856       continue;
2857     }
2858     auto Iter = FrameRestoreEquivalents.find(I);
2859     if (Iter == FrameRestoreEquivalents.end())
2860       continue;
2861     for (int32_t State : Iter->second)
2862       undoState(FrameInstructions[State]);
2863   }
2864 
2865   return NewStates;
2866 }
2867 
2868 void BinaryFunction::normalizeCFIState() {
2869   // Reordering blocks with remember-restore state instructions can be specially
2870   // tricky. When rewriting the CFI, we omit remember-restore state instructions
2871   // entirely. For restore state, we build a map expanding each restore to the
2872   // equivalent unwindCFIState sequence required at that point to achieve the
2873   // same effect of the restore. All remember state are then just ignored.
2874   std::stack<int32_t> Stack;
2875   for (BinaryBasicBlock *CurBB : Layout.blocks()) {
2876     for (auto II = CurBB->begin(); II != CurBB->end(); ++II) {
2877       if (const MCCFIInstruction *CFI = getCFIFor(*II)) {
2878         if (CFI->getOperation() == MCCFIInstruction::OpRememberState) {
2879           Stack.push(II->getOperand(0).getImm());
2880           continue;
2881         }
2882         if (CFI->getOperation() == MCCFIInstruction::OpRestoreState) {
2883           const int32_t RememberState = Stack.top();
2884           const int32_t CurState = II->getOperand(0).getImm();
2885           FrameRestoreEquivalents[CurState] =
2886               unwindCFIState(CurState, RememberState, CurBB, II);
2887           Stack.pop();
2888         }
2889       }
2890     }
2891   }
2892 }
2893 
2894 bool BinaryFunction::finalizeCFIState() {
2895   LLVM_DEBUG(
2896       dbgs() << "Trying to fix CFI states for each BB after reordering.\n");
2897   LLVM_DEBUG(dbgs() << "This is the list of CFI states for each BB of " << *this
2898                     << ": ");
2899 
2900   const char *Sep = "";
2901   (void)Sep;
2902   for (FunctionFragment &FF : Layout.fragments()) {
2903     // Hot-cold border: at start of each region (with a different FDE) we need
2904     // to reset the CFI state.
2905     int32_t State = 0;
2906 
2907     for (BinaryBasicBlock *BB : FF) {
2908       const int32_t CFIStateAtExit = BB->getCFIStateAtExit();
2909 
2910       // We need to recover the correct state if it doesn't match expected
2911       // state at BB entry point.
2912       if (BB->getCFIState() < State) {
2913         // In this case, State is currently higher than what this BB expect it
2914         // to be. To solve this, we need to insert CFI instructions to undo
2915         // the effect of all CFI from BB's state to current State.
2916         auto InsertIt = BB->begin();
2917         unwindCFIState(State, BB->getCFIState(), BB, InsertIt);
2918       } else if (BB->getCFIState() > State) {
2919         // If BB's CFI state is greater than State, it means we are behind in
2920         // the state. Just emit all instructions to reach this state at the
2921         // beginning of this BB. If this sequence of instructions involve
2922         // remember state or restore state, bail out.
2923         if (!replayCFIInstrs(State, BB->getCFIState(), BB, BB->begin()))
2924           return false;
2925       }
2926 
2927       State = CFIStateAtExit;
2928       LLVM_DEBUG(dbgs() << Sep << State; Sep = ", ");
2929     }
2930   }
2931   LLVM_DEBUG(dbgs() << "\n");
2932 
2933   for (BinaryBasicBlock &BB : blocks()) {
2934     for (auto II = BB.begin(); II != BB.end();) {
2935       const MCCFIInstruction *CFI = getCFIFor(*II);
2936       if (CFI && (CFI->getOperation() == MCCFIInstruction::OpRememberState ||
2937                   CFI->getOperation() == MCCFIInstruction::OpRestoreState)) {
2938         II = BB.eraseInstruction(II);
2939       } else {
2940         ++II;
2941       }
2942     }
2943   }
2944 
2945   return true;
2946 }
2947 
2948 bool BinaryFunction::requiresAddressTranslation() const {
2949   return opts::EnableBAT || hasSDTMarker() || hasPseudoProbe();
2950 }
2951 
2952 bool BinaryFunction::requiresAddressMap() const {
2953   if (isInjected())
2954     return false;
2955 
2956   return opts::UpdateDebugSections || isMultiEntry() ||
2957          requiresAddressTranslation();
2958 }
2959 
2960 uint64_t BinaryFunction::getInstructionCount() const {
2961   uint64_t Count = 0;
2962   for (const BinaryBasicBlock &BB : blocks())
2963     Count += BB.getNumNonPseudos();
2964   return Count;
2965 }
2966 
2967 void BinaryFunction::clearDisasmState() {
2968   clearList(Instructions);
2969   clearList(IgnoredBranches);
2970   clearList(TakenBranches);
2971 
2972   if (BC.HasRelocations) {
2973     for (std::pair<const uint32_t, MCSymbol *> &LI : Labels)
2974       BC.UndefinedSymbols.insert(LI.second);
2975     for (MCSymbol *const EndLabel : FunctionEndLabels)
2976       if (EndLabel)
2977         BC.UndefinedSymbols.insert(EndLabel);
2978   }
2979 }
2980 
2981 void BinaryFunction::setTrapOnEntry() {
2982   clearDisasmState();
2983 
2984   forEachEntryPoint([&](uint64_t Offset, const MCSymbol *Label) -> bool {
2985     MCInst TrapInstr;
2986     BC.MIB->createTrap(TrapInstr);
2987     addInstruction(Offset, std::move(TrapInstr));
2988     return true;
2989   });
2990 
2991   TrapsOnEntry = true;
2992 }
2993 
2994 void BinaryFunction::setIgnored() {
2995   if (opts::processAllFunctions()) {
2996     // We can accept ignored functions before they've been disassembled.
2997     // In that case, they would still get disassembled and emited, but not
2998     // optimized.
2999     assert(CurrentState == State::Empty &&
3000            "cannot ignore non-empty functions in current mode");
3001     IsIgnored = true;
3002     return;
3003   }
3004 
3005   clearDisasmState();
3006 
3007   // Clear CFG state too.
3008   if (hasCFG()) {
3009     releaseCFG();
3010 
3011     for (BinaryBasicBlock *BB : BasicBlocks)
3012       delete BB;
3013     clearList(BasicBlocks);
3014 
3015     for (BinaryBasicBlock *BB : DeletedBasicBlocks)
3016       delete BB;
3017     clearList(DeletedBasicBlocks);
3018 
3019     Layout.clear();
3020   }
3021 
3022   CurrentState = State::Empty;
3023 
3024   IsIgnored = true;
3025   IsSimple = false;
3026   LLVM_DEBUG(dbgs() << "Ignoring " << getPrintName() << '\n');
3027 }
3028 
3029 void BinaryFunction::duplicateConstantIslands() {
3030   assert(Islands && "function expected to have constant islands");
3031 
3032   for (BinaryBasicBlock *BB : getLayout().blocks()) {
3033     if (!BB->isCold())
3034       continue;
3035 
3036     for (MCInst &Inst : *BB) {
3037       int OpNum = 0;
3038       for (MCOperand &Operand : Inst) {
3039         if (!Operand.isExpr()) {
3040           ++OpNum;
3041           continue;
3042         }
3043         const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Inst, OpNum);
3044         // Check if this is an island symbol
3045         if (!Islands->Symbols.count(Symbol) &&
3046             !Islands->ProxySymbols.count(Symbol))
3047           continue;
3048 
3049         // Create cold symbol, if missing
3050         auto ISym = Islands->ColdSymbols.find(Symbol);
3051         MCSymbol *ColdSymbol;
3052         if (ISym != Islands->ColdSymbols.end()) {
3053           ColdSymbol = ISym->second;
3054         } else {
3055           ColdSymbol = BC.Ctx->getOrCreateSymbol(Symbol->getName() + ".cold");
3056           Islands->ColdSymbols[Symbol] = ColdSymbol;
3057           // Check if this is a proxy island symbol and update owner proxy map
3058           if (Islands->ProxySymbols.count(Symbol)) {
3059             BinaryFunction *Owner = Islands->ProxySymbols[Symbol];
3060             auto IProxiedSym = Owner->Islands->Proxies[this].find(Symbol);
3061             Owner->Islands->ColdProxies[this][IProxiedSym->second] = ColdSymbol;
3062           }
3063         }
3064 
3065         // Update instruction reference
3066         Operand = MCOperand::createExpr(BC.MIB->getTargetExprFor(
3067             Inst,
3068             MCSymbolRefExpr::create(ColdSymbol, MCSymbolRefExpr::VK_None,
3069                                     *BC.Ctx),
3070             *BC.Ctx, 0));
3071         ++OpNum;
3072       }
3073     }
3074   }
3075 }
3076 
3077 #ifndef MAX_PATH
3078 #define MAX_PATH 255
3079 #endif
3080 
3081 static std::string constructFilename(std::string Filename,
3082                                      std::string Annotation,
3083                                      std::string Suffix) {
3084   std::replace(Filename.begin(), Filename.end(), '/', '-');
3085   if (!Annotation.empty())
3086     Annotation.insert(0, "-");
3087   if (Filename.size() + Annotation.size() + Suffix.size() > MAX_PATH) {
3088     assert(Suffix.size() + Annotation.size() <= MAX_PATH);
3089     Filename.resize(MAX_PATH - (Suffix.size() + Annotation.size()));
3090   }
3091   Filename += Annotation;
3092   Filename += Suffix;
3093   return Filename;
3094 }
3095 
3096 static std::string formatEscapes(const std::string &Str) {
3097   std::string Result;
3098   for (unsigned I = 0; I < Str.size(); ++I) {
3099     char C = Str[I];
3100     switch (C) {
3101     case '\n':
3102       Result += "&#13;";
3103       break;
3104     case '"':
3105       break;
3106     default:
3107       Result += C;
3108       break;
3109     }
3110   }
3111   return Result;
3112 }
3113 
3114 void BinaryFunction::dumpGraph(raw_ostream &OS) const {
3115   OS << "digraph \"" << getPrintName() << "\" {\n"
3116      << "node [fontname=courier, shape=box, style=filled, colorscheme=brbg9]\n";
3117   uint64_t Offset = Address;
3118   for (BinaryBasicBlock *BB : BasicBlocks) {
3119     auto LayoutPos = find(Layout.blocks(), BB);
3120     unsigned LayoutIndex = LayoutPos - Layout.block_begin();
3121     const char *ColdStr = BB->isCold() ? " (cold)" : "";
3122     std::vector<std::string> Attrs;
3123     // Bold box for entry points
3124     if (isEntryPoint(*BB))
3125       Attrs.push_back("penwidth=2");
3126     if (BLI && BLI->getLoopFor(BB)) {
3127       // Distinguish innermost loops
3128       const BinaryLoop *Loop = BLI->getLoopFor(BB);
3129       if (Loop->isInnermost())
3130         Attrs.push_back("fillcolor=6");
3131       else // some outer loop
3132         Attrs.push_back("fillcolor=4");
3133     } else { // non-loopy code
3134       Attrs.push_back("fillcolor=5");
3135     }
3136     ListSeparator LS;
3137     OS << "\"" << BB->getName() << "\" [";
3138     for (StringRef Attr : Attrs)
3139       OS << LS << Attr;
3140     OS << "]\n";
3141     OS << format("\"%s\" [label=\"%s%s\\n(C:%lu,O:%lu,I:%u,L:%u,CFI:%u)\\n",
3142                  BB->getName().data(), BB->getName().data(), ColdStr,
3143                  BB->getKnownExecutionCount(), BB->getOffset(), getIndex(BB),
3144                  LayoutIndex, BB->getCFIState());
3145 
3146     if (opts::DotToolTipCode) {
3147       std::string Str;
3148       raw_string_ostream CS(Str);
3149       Offset = BC.printInstructions(CS, BB->begin(), BB->end(), Offset, this,
3150                                     /* PrintMCInst = */ false,
3151                                     /* PrintMemData = */ false,
3152                                     /* PrintRelocations = */ false,
3153                                     /* Endl = */ R"(\\l)");
3154       OS << formatEscapes(CS.str()) << '\n';
3155     }
3156     OS << "\"]\n";
3157 
3158     // analyzeBranch is just used to get the names of the branch
3159     // opcodes.
3160     const MCSymbol *TBB = nullptr;
3161     const MCSymbol *FBB = nullptr;
3162     MCInst *CondBranch = nullptr;
3163     MCInst *UncondBranch = nullptr;
3164     const bool Success = BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch);
3165 
3166     const MCInst *LastInstr = BB->getLastNonPseudoInstr();
3167     const bool IsJumpTable = LastInstr && BC.MIB->getJumpTable(*LastInstr);
3168 
3169     auto BI = BB->branch_info_begin();
3170     for (BinaryBasicBlock *Succ : BB->successors()) {
3171       std::string Branch;
3172       if (Success) {
3173         if (Succ == BB->getConditionalSuccessor(true)) {
3174           Branch = CondBranch ? std::string(BC.InstPrinter->getOpcodeName(
3175                                     CondBranch->getOpcode()))
3176                               : "TB";
3177         } else if (Succ == BB->getConditionalSuccessor(false)) {
3178           Branch = UncondBranch ? std::string(BC.InstPrinter->getOpcodeName(
3179                                       UncondBranch->getOpcode()))
3180                                 : "FB";
3181         } else {
3182           Branch = "FT";
3183         }
3184       }
3185       if (IsJumpTable)
3186         Branch = "JT";
3187       OS << format("\"%s\" -> \"%s\" [label=\"%s", BB->getName().data(),
3188                    Succ->getName().data(), Branch.c_str());
3189 
3190       if (BB->getExecutionCount() != COUNT_NO_PROFILE &&
3191           BI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
3192         OS << "\\n(C:" << BI->Count << ",M:" << BI->MispredictedCount << ")";
3193       } else if (ExecutionCount != COUNT_NO_PROFILE &&
3194                  BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
3195         OS << "\\n(IC:" << BI->Count << ")";
3196       }
3197       OS << "\"]\n";
3198 
3199       ++BI;
3200     }
3201     for (BinaryBasicBlock *LP : BB->landing_pads()) {
3202       OS << format("\"%s\" -> \"%s\" [constraint=false style=dashed]\n",
3203                    BB->getName().data(), LP->getName().data());
3204     }
3205   }
3206   OS << "}\n";
3207 }
3208 
3209 void BinaryFunction::viewGraph() const {
3210   SmallString<MAX_PATH> Filename;
3211   if (std::error_code EC =
3212           sys::fs::createTemporaryFile("bolt-cfg", "dot", Filename)) {
3213     BC.errs() << "BOLT-ERROR: " << EC.message() << ", unable to create "
3214               << " bolt-cfg-XXXXX.dot temporary file.\n";
3215     return;
3216   }
3217   dumpGraphToFile(std::string(Filename));
3218   if (DisplayGraph(Filename))
3219     BC.errs() << "BOLT-ERROR: Can't display " << Filename
3220               << " with graphviz.\n";
3221   if (std::error_code EC = sys::fs::remove(Filename)) {
3222     BC.errs() << "BOLT-WARNING: " << EC.message() << ", failed to remove "
3223               << Filename << "\n";
3224   }
3225 }
3226 
3227 void BinaryFunction::dumpGraphForPass(std::string Annotation) const {
3228   if (!opts::shouldPrint(*this))
3229     return;
3230 
3231   std::string Filename = constructFilename(getPrintName(), Annotation, ".dot");
3232   if (opts::Verbosity >= 1)
3233     BC.outs() << "BOLT-INFO: dumping CFG to " << Filename << "\n";
3234   dumpGraphToFile(Filename);
3235 }
3236 
3237 void BinaryFunction::dumpGraphToFile(std::string Filename) const {
3238   std::error_code EC;
3239   raw_fd_ostream of(Filename, EC, sys::fs::OF_None);
3240   if (EC) {
3241     if (opts::Verbosity >= 1) {
3242       BC.errs() << "BOLT-WARNING: " << EC.message() << ", unable to open "
3243                 << Filename << " for output.\n";
3244     }
3245     return;
3246   }
3247   dumpGraph(of);
3248 }
3249 
3250 bool BinaryFunction::validateCFG() const {
3251   // Skip the validation of CFG after it is finalized
3252   if (CurrentState == State::CFG_Finalized)
3253     return true;
3254 
3255   bool Valid = true;
3256   for (BinaryBasicBlock *BB : BasicBlocks)
3257     Valid &= BB->validateSuccessorInvariants();
3258 
3259   if (!Valid)
3260     return Valid;
3261 
3262   // Make sure all blocks in CFG are valid.
3263   auto validateBlock = [this](const BinaryBasicBlock *BB, StringRef Desc) {
3264     if (!BB->isValid()) {
3265       BC.errs() << "BOLT-ERROR: deleted " << Desc << " " << BB->getName()
3266                 << " detected in:\n";
3267       this->dump();
3268       return false;
3269     }
3270     return true;
3271   };
3272   for (const BinaryBasicBlock *BB : BasicBlocks) {
3273     if (!validateBlock(BB, "block"))
3274       return false;
3275     for (const BinaryBasicBlock *PredBB : BB->predecessors())
3276       if (!validateBlock(PredBB, "predecessor"))
3277         return false;
3278     for (const BinaryBasicBlock *SuccBB : BB->successors())
3279       if (!validateBlock(SuccBB, "successor"))
3280         return false;
3281     for (const BinaryBasicBlock *LP : BB->landing_pads())
3282       if (!validateBlock(LP, "landing pad"))
3283         return false;
3284     for (const BinaryBasicBlock *Thrower : BB->throwers())
3285       if (!validateBlock(Thrower, "thrower"))
3286         return false;
3287   }
3288 
3289   for (const BinaryBasicBlock *BB : BasicBlocks) {
3290     std::unordered_set<const BinaryBasicBlock *> BBLandingPads;
3291     for (const BinaryBasicBlock *LP : BB->landing_pads()) {
3292       if (BBLandingPads.count(LP)) {
3293         BC.errs() << "BOLT-ERROR: duplicate landing pad detected in"
3294                   << BB->getName() << " in function " << *this << '\n';
3295         return false;
3296       }
3297       BBLandingPads.insert(LP);
3298     }
3299 
3300     std::unordered_set<const BinaryBasicBlock *> BBThrowers;
3301     for (const BinaryBasicBlock *Thrower : BB->throwers()) {
3302       if (BBThrowers.count(Thrower)) {
3303         BC.errs() << "BOLT-ERROR: duplicate thrower detected in"
3304                   << BB->getName() << " in function " << *this << '\n';
3305         return false;
3306       }
3307       BBThrowers.insert(Thrower);
3308     }
3309 
3310     for (const BinaryBasicBlock *LPBlock : BB->landing_pads()) {
3311       if (!llvm::is_contained(LPBlock->throwers(), BB)) {
3312         BC.errs() << "BOLT-ERROR: inconsistent landing pad detected in "
3313                   << *this << ": " << BB->getName()
3314                   << " is in LandingPads but not in " << LPBlock->getName()
3315                   << " Throwers\n";
3316         return false;
3317       }
3318     }
3319     for (const BinaryBasicBlock *Thrower : BB->throwers()) {
3320       if (!llvm::is_contained(Thrower->landing_pads(), BB)) {
3321         BC.errs() << "BOLT-ERROR: inconsistent thrower detected in " << *this
3322                   << ": " << BB->getName() << " is in Throwers list but not in "
3323                   << Thrower->getName() << " LandingPads\n";
3324         return false;
3325       }
3326     }
3327   }
3328 
3329   return Valid;
3330 }
3331 
3332 void BinaryFunction::fixBranches() {
3333   auto &MIB = BC.MIB;
3334   MCContext *Ctx = BC.Ctx.get();
3335 
3336   for (BinaryBasicBlock *BB : BasicBlocks) {
3337     const MCSymbol *TBB = nullptr;
3338     const MCSymbol *FBB = nullptr;
3339     MCInst *CondBranch = nullptr;
3340     MCInst *UncondBranch = nullptr;
3341     if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch))
3342       continue;
3343 
3344     // We will create unconditional branch with correct destination if needed.
3345     if (UncondBranch)
3346       BB->eraseInstruction(BB->findInstruction(UncondBranch));
3347 
3348     // Basic block that follows the current one in the final layout.
3349     const BinaryBasicBlock *const NextBB =
3350         Layout.getBasicBlockAfter(BB, /*IgnoreSplits=*/false);
3351 
3352     if (BB->succ_size() == 1) {
3353       // __builtin_unreachable() could create a conditional branch that
3354       // falls-through into the next function - hence the block will have only
3355       // one valid successor. Since behaviour is undefined - we replace
3356       // the conditional branch with an unconditional if required.
3357       if (CondBranch)
3358         BB->eraseInstruction(BB->findInstruction(CondBranch));
3359       if (BB->getSuccessor() == NextBB)
3360         continue;
3361       BB->addBranchInstruction(BB->getSuccessor());
3362     } else if (BB->succ_size() == 2) {
3363       assert(CondBranch && "conditional branch expected");
3364       const BinaryBasicBlock *TSuccessor = BB->getConditionalSuccessor(true);
3365       const BinaryBasicBlock *FSuccessor = BB->getConditionalSuccessor(false);
3366 
3367       // Eliminate unnecessary conditional branch.
3368       if (TSuccessor == FSuccessor) {
3369         // FIXME: at the moment, we cannot safely remove static key branches.
3370         if (MIB->isDynamicBranch(*CondBranch)) {
3371           if (opts::Verbosity) {
3372             BC.outs()
3373                 << "BOLT-INFO: unable to remove redundant dynamic branch in "
3374                 << *this << '\n';
3375           }
3376           continue;
3377         }
3378 
3379         BB->removeDuplicateConditionalSuccessor(CondBranch);
3380         if (TSuccessor != NextBB)
3381           BB->addBranchInstruction(TSuccessor);
3382         continue;
3383       }
3384 
3385       // Reverse branch condition and swap successors.
3386       auto swapSuccessors = [&]() {
3387         if (MIB->isUnsupportedBranch(*CondBranch)) {
3388           if (opts::Verbosity) {
3389             BC.outs() << "BOLT-INFO: unable to swap successors in " << *this
3390                       << '\n';
3391           }
3392           return false;
3393         }
3394         std::swap(TSuccessor, FSuccessor);
3395         BB->swapConditionalSuccessors();
3396         auto L = BC.scopeLock();
3397         MIB->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(), Ctx);
3398         return true;
3399       };
3400 
3401       // Check whether the next block is a "taken" target and try to swap it
3402       // with a "fall-through" target.
3403       if (TSuccessor == NextBB && swapSuccessors())
3404         continue;
3405 
3406       // Update conditional branch destination if needed.
3407       if (MIB->getTargetSymbol(*CondBranch) != TSuccessor->getLabel()) {
3408         auto L = BC.scopeLock();
3409         MIB->replaceBranchTarget(*CondBranch, TSuccessor->getLabel(), Ctx);
3410       }
3411 
3412       // No need for the unconditional branch.
3413       if (FSuccessor == NextBB)
3414         continue;
3415 
3416       if (BC.isX86()) {
3417         // We are going to generate two branches. Check if their targets are in
3418         // the same fragment as this block. If only one target is in the same
3419         // fragment, make it the destination of the conditional branch. There
3420         // is a chance it will be a short branch which takes 4 bytes fewer than
3421         // a long conditional branch. For unconditional branch, the difference
3422         // is 3 bytes.
3423         if (BB->getFragmentNum() != TSuccessor->getFragmentNum() &&
3424             BB->getFragmentNum() == FSuccessor->getFragmentNum())
3425           swapSuccessors();
3426       }
3427 
3428       BB->addBranchInstruction(FSuccessor);
3429     }
3430     // Cases where the number of successors is 0 (block ends with a
3431     // terminator) or more than 2 (switch table) don't require branch
3432     // instruction adjustments.
3433   }
3434   assert((!isSimple() || validateCFG()) &&
3435          "Invalid CFG detected after fixing branches");
3436 }
3437 
3438 void BinaryFunction::propagateGnuArgsSizeInfo(
3439     MCPlusBuilder::AllocatorIdTy AllocId) {
3440   assert(CurrentState == State::Disassembled && "unexpected function state");
3441 
3442   if (!hasEHRanges() || !usesGnuArgsSize())
3443     return;
3444 
3445   // The current value of DW_CFA_GNU_args_size affects all following
3446   // invoke instructions until the next CFI overrides it.
3447   // It is important to iterate basic blocks in the original order when
3448   // assigning the value.
3449   uint64_t CurrentGnuArgsSize = 0;
3450   for (BinaryBasicBlock *BB : BasicBlocks) {
3451     for (auto II = BB->begin(); II != BB->end();) {
3452       MCInst &Instr = *II;
3453       if (BC.MIB->isCFI(Instr)) {
3454         const MCCFIInstruction *CFI = getCFIFor(Instr);
3455         if (CFI->getOperation() == MCCFIInstruction::OpGnuArgsSize) {
3456           CurrentGnuArgsSize = CFI->getOffset();
3457           // Delete DW_CFA_GNU_args_size instructions and only regenerate
3458           // during the final code emission. The information is embedded
3459           // inside call instructions.
3460           II = BB->erasePseudoInstruction(II);
3461           continue;
3462         }
3463       } else if (BC.MIB->isInvoke(Instr)) {
3464         // Add the value of GNU_args_size as an extra operand to invokes.
3465         BC.MIB->addGnuArgsSize(Instr, CurrentGnuArgsSize);
3466       }
3467       ++II;
3468     }
3469   }
3470 }
3471 
3472 void BinaryFunction::postProcessBranches() {
3473   if (!isSimple())
3474     return;
3475   for (BinaryBasicBlock &BB : blocks()) {
3476     auto LastInstrRI = BB.getLastNonPseudo();
3477     if (BB.succ_size() == 1) {
3478       if (LastInstrRI != BB.rend() &&
3479           BC.MIB->isConditionalBranch(*LastInstrRI)) {
3480         // __builtin_unreachable() could create a conditional branch that
3481         // falls-through into the next function - hence the block will have only
3482         // one valid successor. Such behaviour is undefined and thus we remove
3483         // the conditional branch while leaving a valid successor.
3484         BB.eraseInstruction(std::prev(LastInstrRI.base()));
3485         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: erasing conditional branch in "
3486                           << BB.getName() << " in function " << *this << '\n');
3487       }
3488     } else if (BB.succ_size() == 0) {
3489       // Ignore unreachable basic blocks.
3490       if (BB.pred_size() == 0 || BB.isLandingPad())
3491         continue;
3492 
3493       // If it's the basic block that does not end up with a terminator - we
3494       // insert a return instruction unless it's a call instruction.
3495       if (LastInstrRI == BB.rend()) {
3496         LLVM_DEBUG(
3497             dbgs() << "BOLT-DEBUG: at least one instruction expected in BB "
3498                    << BB.getName() << " in function " << *this << '\n');
3499         continue;
3500       }
3501       if (!BC.MIB->isTerminator(*LastInstrRI) &&
3502           !BC.MIB->isCall(*LastInstrRI)) {
3503         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding return to basic block "
3504                           << BB.getName() << " in function " << *this << '\n');
3505         MCInst ReturnInstr;
3506         BC.MIB->createReturn(ReturnInstr);
3507         BB.addInstruction(ReturnInstr);
3508       }
3509     }
3510   }
3511   assert(validateCFG() && "invalid CFG");
3512 }
3513 
3514 MCSymbol *BinaryFunction::addEntryPointAtOffset(uint64_t Offset) {
3515   assert(Offset && "cannot add primary entry point");
3516   assert(CurrentState == State::Empty || CurrentState == State::Disassembled);
3517 
3518   const uint64_t EntryPointAddress = getAddress() + Offset;
3519   MCSymbol *LocalSymbol = getOrCreateLocalLabel(EntryPointAddress);
3520 
3521   MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(LocalSymbol);
3522   if (EntrySymbol)
3523     return EntrySymbol;
3524 
3525   if (BinaryData *EntryBD = BC.getBinaryDataAtAddress(EntryPointAddress)) {
3526     EntrySymbol = EntryBD->getSymbol();
3527   } else {
3528     EntrySymbol = BC.getOrCreateGlobalSymbol(
3529         EntryPointAddress, Twine("__ENTRY_") + getOneName() + "@");
3530   }
3531   SecondaryEntryPoints[LocalSymbol] = EntrySymbol;
3532 
3533   BC.setSymbolToFunctionMap(EntrySymbol, this);
3534 
3535   return EntrySymbol;
3536 }
3537 
3538 MCSymbol *BinaryFunction::addEntryPoint(const BinaryBasicBlock &BB) {
3539   assert(CurrentState == State::CFG &&
3540          "basic block can be added as an entry only in a function with CFG");
3541 
3542   if (&BB == BasicBlocks.front())
3543     return getSymbol();
3544 
3545   MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(BB);
3546   if (EntrySymbol)
3547     return EntrySymbol;
3548 
3549   EntrySymbol =
3550       BC.Ctx->getOrCreateSymbol("__ENTRY_" + BB.getLabel()->getName());
3551 
3552   SecondaryEntryPoints[BB.getLabel()] = EntrySymbol;
3553 
3554   BC.setSymbolToFunctionMap(EntrySymbol, this);
3555 
3556   return EntrySymbol;
3557 }
3558 
3559 MCSymbol *BinaryFunction::getSymbolForEntryID(uint64_t EntryID) {
3560   if (EntryID == 0)
3561     return getSymbol();
3562 
3563   if (!isMultiEntry())
3564     return nullptr;
3565 
3566   uint64_t NumEntries = 1;
3567   if (hasCFG()) {
3568     for (BinaryBasicBlock *BB : BasicBlocks) {
3569       MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
3570       if (!EntrySymbol)
3571         continue;
3572       if (NumEntries == EntryID)
3573         return EntrySymbol;
3574       ++NumEntries;
3575     }
3576   } else {
3577     for (std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3578       MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3579       if (!EntrySymbol)
3580         continue;
3581       if (NumEntries == EntryID)
3582         return EntrySymbol;
3583       ++NumEntries;
3584     }
3585   }
3586 
3587   return nullptr;
3588 }
3589 
3590 uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
3591   if (!isMultiEntry())
3592     return 0;
3593 
3594   for (const MCSymbol *FunctionSymbol : getSymbols())
3595     if (FunctionSymbol == Symbol)
3596       return 0;
3597 
3598   // Check all secondary entries available as either basic blocks or lables.
3599   uint64_t NumEntries = 1;
3600   for (const BinaryBasicBlock *BB : BasicBlocks) {
3601     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
3602     if (!EntrySymbol)
3603       continue;
3604     if (EntrySymbol == Symbol)
3605       return NumEntries;
3606     ++NumEntries;
3607   }
3608   NumEntries = 1;
3609   for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3610     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3611     if (!EntrySymbol)
3612       continue;
3613     if (EntrySymbol == Symbol)
3614       return NumEntries;
3615     ++NumEntries;
3616   }
3617 
3618   llvm_unreachable("symbol not found");
3619 }
3620 
3621 bool BinaryFunction::forEachEntryPoint(EntryPointCallbackTy Callback) const {
3622   bool Status = Callback(0, getSymbol());
3623   if (!isMultiEntry())
3624     return Status;
3625 
3626   for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3627     if (!Status)
3628       break;
3629 
3630     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3631     if (!EntrySymbol)
3632       continue;
3633 
3634     Status = Callback(KV.first, EntrySymbol);
3635   }
3636 
3637   return Status;
3638 }
3639 
3640 BinaryFunction::BasicBlockListType BinaryFunction::dfs() const {
3641   BasicBlockListType DFS;
3642   unsigned Index = 0;
3643   std::stack<BinaryBasicBlock *> Stack;
3644 
3645   // Push entry points to the stack in reverse order.
3646   //
3647   // NB: we rely on the original order of entries to match.
3648   SmallVector<BinaryBasicBlock *> EntryPoints;
3649   llvm::copy_if(BasicBlocks, std::back_inserter(EntryPoints),
3650           [&](const BinaryBasicBlock *const BB) { return isEntryPoint(*BB); });
3651   // Sort entry points by their offset to make sure we got them in the right
3652   // order.
3653   llvm::stable_sort(EntryPoints, [](const BinaryBasicBlock *const A,
3654                               const BinaryBasicBlock *const B) {
3655     return A->getOffset() < B->getOffset();
3656   });
3657   for (BinaryBasicBlock *const BB : reverse(EntryPoints))
3658     Stack.push(BB);
3659 
3660   for (BinaryBasicBlock &BB : blocks())
3661     BB.setLayoutIndex(BinaryBasicBlock::InvalidIndex);
3662 
3663   while (!Stack.empty()) {
3664     BinaryBasicBlock *BB = Stack.top();
3665     Stack.pop();
3666 
3667     if (BB->getLayoutIndex() != BinaryBasicBlock::InvalidIndex)
3668       continue;
3669 
3670     BB->setLayoutIndex(Index++);
3671     DFS.push_back(BB);
3672 
3673     for (BinaryBasicBlock *SuccBB : BB->landing_pads()) {
3674       Stack.push(SuccBB);
3675     }
3676 
3677     const MCSymbol *TBB = nullptr;
3678     const MCSymbol *FBB = nullptr;
3679     MCInst *CondBranch = nullptr;
3680     MCInst *UncondBranch = nullptr;
3681     if (BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch) && CondBranch &&
3682         BB->succ_size() == 2) {
3683       if (BC.MIB->getCanonicalBranchCondCode(BC.MIB->getCondCode(
3684               *CondBranch)) == BC.MIB->getCondCode(*CondBranch)) {
3685         Stack.push(BB->getConditionalSuccessor(true));
3686         Stack.push(BB->getConditionalSuccessor(false));
3687       } else {
3688         Stack.push(BB->getConditionalSuccessor(false));
3689         Stack.push(BB->getConditionalSuccessor(true));
3690       }
3691     } else {
3692       for (BinaryBasicBlock *SuccBB : BB->successors()) {
3693         Stack.push(SuccBB);
3694       }
3695     }
3696   }
3697 
3698   return DFS;
3699 }
3700 
3701 size_t BinaryFunction::computeHash(bool UseDFS, HashFunction HashFunction,
3702                                    OperandHashFuncTy OperandHashFunc) const {
3703   if (size() == 0)
3704     return 0;
3705 
3706   assert(hasCFG() && "function is expected to have CFG");
3707 
3708   SmallVector<const BinaryBasicBlock *, 0> Order;
3709   if (UseDFS)
3710     llvm::copy(dfs(), std::back_inserter(Order));
3711   else
3712     llvm::copy(Layout.blocks(), std::back_inserter(Order));
3713 
3714   // The hash is computed by creating a string of all instruction opcodes and
3715   // possibly their operands and then hashing that string with std::hash.
3716   std::string HashString;
3717   for (const BinaryBasicBlock *BB : Order)
3718     HashString.append(hashBlock(BC, *BB, OperandHashFunc));
3719 
3720   switch (HashFunction) {
3721   case HashFunction::StdHash:
3722     return Hash = std::hash<std::string>{}(HashString);
3723   case HashFunction::XXH3:
3724     return Hash = llvm::xxh3_64bits(HashString);
3725   }
3726   llvm_unreachable("Unhandled HashFunction");
3727 }
3728 
3729 void BinaryFunction::insertBasicBlocks(
3730     BinaryBasicBlock *Start,
3731     std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
3732     const bool UpdateLayout, const bool UpdateCFIState,
3733     const bool RecomputeLandingPads) {
3734   const int64_t StartIndex = Start ? getIndex(Start) : -1LL;
3735   const size_t NumNewBlocks = NewBBs.size();
3736 
3737   BasicBlocks.insert(BasicBlocks.begin() + (StartIndex + 1), NumNewBlocks,
3738                      nullptr);
3739 
3740   int64_t I = StartIndex + 1;
3741   for (std::unique_ptr<BinaryBasicBlock> &BB : NewBBs) {
3742     assert(!BasicBlocks[I]);
3743     BasicBlocks[I++] = BB.release();
3744   }
3745 
3746   if (RecomputeLandingPads)
3747     recomputeLandingPads();
3748   else
3749     updateBBIndices(0);
3750 
3751   if (UpdateLayout)
3752     updateLayout(Start, NumNewBlocks);
3753 
3754   if (UpdateCFIState)
3755     updateCFIState(Start, NumNewBlocks);
3756 }
3757 
3758 BinaryFunction::iterator BinaryFunction::insertBasicBlocks(
3759     BinaryFunction::iterator StartBB,
3760     std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
3761     const bool UpdateLayout, const bool UpdateCFIState,
3762     const bool RecomputeLandingPads) {
3763   const unsigned StartIndex = getIndex(&*StartBB);
3764   const size_t NumNewBlocks = NewBBs.size();
3765 
3766   BasicBlocks.insert(BasicBlocks.begin() + StartIndex + 1, NumNewBlocks,
3767                      nullptr);
3768   auto RetIter = BasicBlocks.begin() + StartIndex + 1;
3769 
3770   unsigned I = StartIndex + 1;
3771   for (std::unique_ptr<BinaryBasicBlock> &BB : NewBBs) {
3772     assert(!BasicBlocks[I]);
3773     BasicBlocks[I++] = BB.release();
3774   }
3775 
3776   if (RecomputeLandingPads)
3777     recomputeLandingPads();
3778   else
3779     updateBBIndices(0);
3780 
3781   if (UpdateLayout)
3782     updateLayout(*std::prev(RetIter), NumNewBlocks);
3783 
3784   if (UpdateCFIState)
3785     updateCFIState(*std::prev(RetIter), NumNewBlocks);
3786 
3787   return RetIter;
3788 }
3789 
3790 void BinaryFunction::updateBBIndices(const unsigned StartIndex) {
3791   for (unsigned I = StartIndex; I < BasicBlocks.size(); ++I)
3792     BasicBlocks[I]->Index = I;
3793 }
3794 
3795 void BinaryFunction::updateCFIState(BinaryBasicBlock *Start,
3796                                     const unsigned NumNewBlocks) {
3797   const int32_t CFIState = Start->getCFIStateAtExit();
3798   const unsigned StartIndex = getIndex(Start) + 1;
3799   for (unsigned I = 0; I < NumNewBlocks; ++I)
3800     BasicBlocks[StartIndex + I]->setCFIState(CFIState);
3801 }
3802 
3803 void BinaryFunction::updateLayout(BinaryBasicBlock *Start,
3804                                   const unsigned NumNewBlocks) {
3805   BasicBlockListType::iterator Begin;
3806   BasicBlockListType::iterator End;
3807 
3808   // If start not provided copy new blocks from the beginning of BasicBlocks
3809   if (!Start) {
3810     Begin = BasicBlocks.begin();
3811     End = BasicBlocks.begin() + NumNewBlocks;
3812   } else {
3813     unsigned StartIndex = getIndex(Start);
3814     Begin = std::next(BasicBlocks.begin(), StartIndex + 1);
3815     End = std::next(BasicBlocks.begin(), StartIndex + NumNewBlocks + 1);
3816   }
3817 
3818   // Insert new blocks in the layout immediately after Start.
3819   Layout.insertBasicBlocks(Start, {Begin, End});
3820   Layout.updateLayoutIndices();
3821 }
3822 
3823 bool BinaryFunction::checkForAmbiguousJumpTables() {
3824   SmallSet<uint64_t, 4> JumpTables;
3825   for (BinaryBasicBlock *&BB : BasicBlocks) {
3826     for (MCInst &Inst : *BB) {
3827       if (!BC.MIB->isIndirectBranch(Inst))
3828         continue;
3829       uint64_t JTAddress = BC.MIB->getJumpTable(Inst);
3830       if (!JTAddress)
3831         continue;
3832       // This address can be inside another jump table, but we only consider
3833       // it ambiguous when the same start address is used, not the same JT
3834       // object.
3835       if (!JumpTables.count(JTAddress)) {
3836         JumpTables.insert(JTAddress);
3837         continue;
3838       }
3839       return true;
3840     }
3841   }
3842   return false;
3843 }
3844 
3845 void BinaryFunction::disambiguateJumpTables(
3846     MCPlusBuilder::AllocatorIdTy AllocId) {
3847   assert((opts::JumpTables != JTS_BASIC && isSimple()) || !BC.HasRelocations);
3848   SmallPtrSet<JumpTable *, 4> JumpTables;
3849   for (BinaryBasicBlock *&BB : BasicBlocks) {
3850     for (MCInst &Inst : *BB) {
3851       if (!BC.MIB->isIndirectBranch(Inst))
3852         continue;
3853       JumpTable *JT = getJumpTable(Inst);
3854       if (!JT)
3855         continue;
3856       auto Iter = JumpTables.find(JT);
3857       if (Iter == JumpTables.end()) {
3858         JumpTables.insert(JT);
3859         continue;
3860       }
3861       // This instruction is an indirect jump using a jump table, but it is
3862       // using the same jump table of another jump. Try all our tricks to
3863       // extract the jump table symbol and make it point to a new, duplicated JT
3864       MCPhysReg BaseReg1;
3865       uint64_t Scale;
3866       const MCSymbol *Target;
3867       // In case we match if our first matcher, first instruction is the one to
3868       // patch
3869       MCInst *JTLoadInst = &Inst;
3870       // Try a standard indirect jump matcher, scale 8
3871       std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher =
3872           BC.MIB->matchIndJmp(BC.MIB->matchReg(BaseReg1),
3873                               BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3874                               /*Offset=*/BC.MIB->matchSymbol(Target));
3875       if (!IndJmpMatcher->match(
3876               *BC.MRI, *BC.MIB,
3877               MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3878           BaseReg1 != BC.MIB->getNoRegister() || Scale != 8) {
3879         MCPhysReg BaseReg2;
3880         uint64_t Offset;
3881         // Standard JT matching failed. Trying now:
3882         //     movq  "jt.2397/1"(,%rax,8), %rax
3883         //     jmpq  *%rax
3884         std::unique_ptr<MCPlusBuilder::MCInstMatcher> LoadMatcherOwner =
3885             BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg1),
3886                               BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3887                               /*Offset=*/BC.MIB->matchSymbol(Target));
3888         MCPlusBuilder::MCInstMatcher *LoadMatcher = LoadMatcherOwner.get();
3889         std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher2 =
3890             BC.MIB->matchIndJmp(std::move(LoadMatcherOwner));
3891         if (!IndJmpMatcher2->match(
3892                 *BC.MRI, *BC.MIB,
3893                 MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3894             BaseReg1 != BC.MIB->getNoRegister() || Scale != 8) {
3895           // JT matching failed. Trying now:
3896           // PIC-style matcher, scale 4
3897           //    addq    %rdx, %rsi
3898           //    addq    %rdx, %rdi
3899           //    leaq    DATAat0x402450(%rip), %r11
3900           //    movslq  (%r11,%rdx,4), %rcx
3901           //    addq    %r11, %rcx
3902           //    jmpq    *%rcx # JUMPTABLE @0x402450
3903           std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICIndJmpMatcher =
3904               BC.MIB->matchIndJmp(BC.MIB->matchAdd(
3905                   BC.MIB->matchReg(BaseReg1),
3906                   BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg2),
3907                                     BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3908                                     BC.MIB->matchImm(Offset))));
3909           std::unique_ptr<MCPlusBuilder::MCInstMatcher> LEAMatcherOwner =
3910               BC.MIB->matchLoadAddr(BC.MIB->matchSymbol(Target));
3911           MCPlusBuilder::MCInstMatcher *LEAMatcher = LEAMatcherOwner.get();
3912           std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICBaseAddrMatcher =
3913               BC.MIB->matchIndJmp(BC.MIB->matchAdd(std::move(LEAMatcherOwner),
3914                                                    BC.MIB->matchAnyOperand()));
3915           if (!PICIndJmpMatcher->match(
3916                   *BC.MRI, *BC.MIB,
3917                   MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3918               Scale != 4 || BaseReg1 != BaseReg2 || Offset != 0 ||
3919               !PICBaseAddrMatcher->match(
3920                   *BC.MRI, *BC.MIB,
3921                   MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1)) {
3922             llvm_unreachable("Failed to extract jump table base");
3923             continue;
3924           }
3925           // Matched PIC, identify the instruction with the reference to the JT
3926           JTLoadInst = LEAMatcher->CurInst;
3927         } else {
3928           // Matched non-PIC
3929           JTLoadInst = LoadMatcher->CurInst;
3930         }
3931       }
3932 
3933       uint64_t NewJumpTableID = 0;
3934       const MCSymbol *NewJTLabel;
3935       std::tie(NewJumpTableID, NewJTLabel) =
3936           BC.duplicateJumpTable(*this, JT, Target);
3937       {
3938         auto L = BC.scopeLock();
3939         BC.MIB->replaceMemOperandDisp(*JTLoadInst, NewJTLabel, BC.Ctx.get());
3940       }
3941       // We use a unique ID with the high bit set as address for this "injected"
3942       // jump table (not originally in the input binary).
3943       BC.MIB->setJumpTable(Inst, NewJumpTableID, 0, AllocId);
3944     }
3945   }
3946 }
3947 
3948 bool BinaryFunction::replaceJumpTableEntryIn(BinaryBasicBlock *BB,
3949                                              BinaryBasicBlock *OldDest,
3950                                              BinaryBasicBlock *NewDest) {
3951   MCInst *Instr = BB->getLastNonPseudoInstr();
3952   if (!Instr || !BC.MIB->isIndirectBranch(*Instr))
3953     return false;
3954   uint64_t JTAddress = BC.MIB->getJumpTable(*Instr);
3955   assert(JTAddress && "Invalid jump table address");
3956   JumpTable *JT = getJumpTableContainingAddress(JTAddress);
3957   assert(JT && "No jump table structure for this indirect branch");
3958   bool Patched = JT->replaceDestination(JTAddress, OldDest->getLabel(),
3959                                         NewDest->getLabel());
3960   (void)Patched;
3961   assert(Patched && "Invalid entry to be replaced in jump table");
3962   return true;
3963 }
3964 
3965 BinaryBasicBlock *BinaryFunction::splitEdge(BinaryBasicBlock *From,
3966                                             BinaryBasicBlock *To) {
3967   // Create intermediate BB
3968   MCSymbol *Tmp;
3969   {
3970     auto L = BC.scopeLock();
3971     Tmp = BC.Ctx->createNamedTempSymbol("SplitEdge");
3972   }
3973   // Link new BBs to the original input offset of the From BB, so we can map
3974   // samples recorded in new BBs back to the original BB seem in the input
3975   // binary (if using BAT)
3976   std::unique_ptr<BinaryBasicBlock> NewBB = createBasicBlock(Tmp);
3977   NewBB->setOffset(From->getInputOffset());
3978   BinaryBasicBlock *NewBBPtr = NewBB.get();
3979 
3980   // Update "From" BB
3981   auto I = From->succ_begin();
3982   auto BI = From->branch_info_begin();
3983   for (; I != From->succ_end(); ++I) {
3984     if (*I == To)
3985       break;
3986     ++BI;
3987   }
3988   assert(I != From->succ_end() && "Invalid CFG edge in splitEdge!");
3989   uint64_t OrigCount = BI->Count;
3990   uint64_t OrigMispreds = BI->MispredictedCount;
3991   replaceJumpTableEntryIn(From, To, NewBBPtr);
3992   From->replaceSuccessor(To, NewBBPtr, OrigCount, OrigMispreds);
3993 
3994   NewBB->addSuccessor(To, OrigCount, OrigMispreds);
3995   NewBB->setExecutionCount(OrigCount);
3996   NewBB->setIsCold(From->isCold());
3997 
3998   // Update CFI and BB layout with new intermediate BB
3999   std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs;
4000   NewBBs.emplace_back(std::move(NewBB));
4001   insertBasicBlocks(From, std::move(NewBBs), true, true,
4002                     /*RecomputeLandingPads=*/false);
4003   return NewBBPtr;
4004 }
4005 
4006 void BinaryFunction::deleteConservativeEdges() {
4007   // Our goal is to aggressively remove edges from the CFG that we believe are
4008   // wrong. This is used for instrumentation, where it is safe to remove
4009   // fallthrough edges because we won't reorder blocks.
4010   for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) {
4011     BinaryBasicBlock *BB = *I;
4012     if (BB->succ_size() != 1 || BB->size() == 0)
4013       continue;
4014 
4015     auto NextBB = std::next(I);
4016     MCInst *Last = BB->getLastNonPseudoInstr();
4017     // Fallthrough is a landing pad? Delete this edge (as long as we don't
4018     // have a direct jump to it)
4019     if ((*BB->succ_begin())->isLandingPad() && NextBB != E &&
4020         *BB->succ_begin() == *NextBB && Last && !BC.MIB->isBranch(*Last)) {
4021       BB->removeAllSuccessors();
4022       continue;
4023     }
4024 
4025     // Look for suspicious calls at the end of BB where gcc may optimize it and
4026     // remove the jump to the epilogue when it knows the call won't return.
4027     if (!Last || !BC.MIB->isCall(*Last))
4028       continue;
4029 
4030     const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(*Last);
4031     if (!CalleeSymbol)
4032       continue;
4033 
4034     StringRef CalleeName = CalleeSymbol->getName();
4035     if (CalleeName != "__cxa_throw@PLT" && CalleeName != "_Unwind_Resume@PLT" &&
4036         CalleeName != "__cxa_rethrow@PLT" && CalleeName != "exit@PLT" &&
4037         CalleeName != "abort@PLT")
4038       continue;
4039 
4040     BB->removeAllSuccessors();
4041   }
4042 }
4043 
4044 bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
4045                                           uint64_t SymbolSize) const {
4046   // If this symbol is in a different section from the one where the
4047   // function symbol is, don't consider it as valid.
4048   if (!getOriginSection()->containsAddress(
4049           cantFail(Symbol.getAddress(), "cannot get symbol address")))
4050     return false;
4051 
4052   // Some symbols are tolerated inside function bodies, others are not.
4053   // The real function boundaries may not be known at this point.
4054   if (BC.isMarker(Symbol))
4055     return true;
4056 
4057   // It's okay to have a zero-sized symbol in the middle of non-zero-sized
4058   // function.
4059   if (SymbolSize == 0 && containsAddress(cantFail(Symbol.getAddress())))
4060     return true;
4061 
4062   if (cantFail(Symbol.getType()) != SymbolRef::ST_Unknown)
4063     return false;
4064 
4065   if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)
4066     return false;
4067 
4068   return true;
4069 }
4070 
4071 void BinaryFunction::adjustExecutionCount(uint64_t Count) {
4072   if (getKnownExecutionCount() == 0 || Count == 0)
4073     return;
4074 
4075   if (ExecutionCount < Count)
4076     Count = ExecutionCount;
4077 
4078   double AdjustmentRatio = ((double)ExecutionCount - Count) / ExecutionCount;
4079   if (AdjustmentRatio < 0.0)
4080     AdjustmentRatio = 0.0;
4081 
4082   for (BinaryBasicBlock &BB : blocks())
4083     BB.adjustExecutionCount(AdjustmentRatio);
4084 
4085   ExecutionCount -= Count;
4086 }
4087 
4088 BinaryFunction::~BinaryFunction() {
4089   for (BinaryBasicBlock *BB : BasicBlocks)
4090     delete BB;
4091   for (BinaryBasicBlock *BB : DeletedBasicBlocks)
4092     delete BB;
4093 }
4094 
4095 void BinaryFunction::constructDomTree() {
4096   BDT.reset(new BinaryDominatorTree);
4097   BDT->recalculate(*this);
4098 }
4099 
4100 void BinaryFunction::calculateLoopInfo() {
4101   if (!hasDomTree())
4102     constructDomTree();
4103   // Discover loops.
4104   BLI.reset(new BinaryLoopInfo());
4105   BLI->analyze(getDomTree());
4106 
4107   // Traverse discovered loops and add depth and profile information.
4108   std::stack<BinaryLoop *> St;
4109   for (auto I = BLI->begin(), E = BLI->end(); I != E; ++I) {
4110     St.push(*I);
4111     ++BLI->OuterLoops;
4112   }
4113 
4114   while (!St.empty()) {
4115     BinaryLoop *L = St.top();
4116     St.pop();
4117     ++BLI->TotalLoops;
4118     BLI->MaximumDepth = std::max(L->getLoopDepth(), BLI->MaximumDepth);
4119 
4120     // Add nested loops in the stack.
4121     for (BinaryLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
4122       St.push(*I);
4123 
4124     // Skip if no valid profile is found.
4125     if (!hasValidProfile()) {
4126       L->EntryCount = COUNT_NO_PROFILE;
4127       L->ExitCount = COUNT_NO_PROFILE;
4128       L->TotalBackEdgeCount = COUNT_NO_PROFILE;
4129       continue;
4130     }
4131 
4132     // Compute back edge count.
4133     SmallVector<BinaryBasicBlock *, 1> Latches;
4134     L->getLoopLatches(Latches);
4135 
4136     for (BinaryBasicBlock *Latch : Latches) {
4137       auto BI = Latch->branch_info_begin();
4138       for (BinaryBasicBlock *Succ : Latch->successors()) {
4139         if (Succ == L->getHeader()) {
4140           assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
4141                  "profile data not found");
4142           L->TotalBackEdgeCount += BI->Count;
4143         }
4144         ++BI;
4145       }
4146     }
4147 
4148     // Compute entry count.
4149     L->EntryCount = L->getHeader()->getExecutionCount() - L->TotalBackEdgeCount;
4150 
4151     // Compute exit count.
4152     SmallVector<BinaryLoop::Edge, 1> ExitEdges;
4153     L->getExitEdges(ExitEdges);
4154     for (BinaryLoop::Edge &Exit : ExitEdges) {
4155       const BinaryBasicBlock *Exiting = Exit.first;
4156       const BinaryBasicBlock *ExitTarget = Exit.second;
4157       auto BI = Exiting->branch_info_begin();
4158       for (BinaryBasicBlock *Succ : Exiting->successors()) {
4159         if (Succ == ExitTarget) {
4160           assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
4161                  "profile data not found");
4162           L->ExitCount += BI->Count;
4163         }
4164         ++BI;
4165       }
4166     }
4167   }
4168 }
4169 
4170 void BinaryFunction::updateOutputValues(const BOLTLinker &Linker) {
4171   if (!isEmitted()) {
4172     assert(!isInjected() && "injected function should be emitted");
4173     setOutputAddress(getAddress());
4174     setOutputSize(getSize());
4175     return;
4176   }
4177 
4178   const auto SymbolInfo = Linker.lookupSymbolInfo(getSymbol()->getName());
4179   assert(SymbolInfo && "Cannot find function entry symbol");
4180   setOutputAddress(SymbolInfo->Address);
4181   setOutputSize(SymbolInfo->Size);
4182 
4183   if (BC.HasRelocations || isInjected()) {
4184     if (hasConstantIsland()) {
4185       const auto DataAddress =
4186           Linker.lookupSymbol(getFunctionConstantIslandLabel()->getName());
4187       assert(DataAddress && "Cannot find function CI symbol");
4188       setOutputDataAddress(*DataAddress);
4189       for (auto It : Islands->Offsets) {
4190         const uint64_t OldOffset = It.first;
4191         BinaryData *BD = BC.getBinaryDataAtAddress(getAddress() + OldOffset);
4192         if (!BD)
4193           continue;
4194 
4195         MCSymbol *Symbol = It.second;
4196         const auto NewAddress = Linker.lookupSymbol(Symbol->getName());
4197         assert(NewAddress && "Cannot find CI symbol");
4198         auto &Section = *getCodeSection();
4199         const auto NewOffset = *NewAddress - Section.getOutputAddress();
4200         BD->setOutputLocation(Section, NewOffset);
4201       }
4202     }
4203     if (isSplit()) {
4204       for (FunctionFragment &FF : getLayout().getSplitFragments()) {
4205         ErrorOr<BinarySection &> ColdSection =
4206             getCodeSection(FF.getFragmentNum());
4207         // If fragment is empty, cold section might not exist
4208         if (FF.empty() && ColdSection.getError())
4209           continue;
4210 
4211         const MCSymbol *ColdStartSymbol = getSymbol(FF.getFragmentNum());
4212         // If fragment is empty, symbol might have not been emitted
4213         if (FF.empty() && (!ColdStartSymbol || !ColdStartSymbol->isDefined()) &&
4214             !hasConstantIsland())
4215           continue;
4216         assert(ColdStartSymbol && ColdStartSymbol->isDefined() &&
4217                "split function should have defined cold symbol");
4218         const auto ColdStartSymbolInfo =
4219             Linker.lookupSymbolInfo(ColdStartSymbol->getName());
4220         assert(ColdStartSymbolInfo && "Cannot find cold start symbol");
4221         FF.setAddress(ColdStartSymbolInfo->Address);
4222         FF.setImageSize(ColdStartSymbolInfo->Size);
4223         if (hasConstantIsland()) {
4224           const auto DataAddress = Linker.lookupSymbol(
4225               getFunctionColdConstantIslandLabel()->getName());
4226           assert(DataAddress && "Cannot find cold CI symbol");
4227           setOutputColdDataAddress(*DataAddress);
4228         }
4229       }
4230     }
4231   }
4232 
4233   // Update basic block output ranges for the debug info, if we have
4234   // secondary entry points in the symbol table to update or if writing BAT.
4235   if (!requiresAddressMap())
4236     return;
4237 
4238   // AArch64 may have functions that only contains a constant island (no code).
4239   if (getLayout().block_empty())
4240     return;
4241 
4242   for (FunctionFragment &FF : getLayout().fragments()) {
4243     if (FF.empty())
4244       continue;
4245 
4246     const uint64_t FragmentBaseAddress =
4247         getCodeSection(isSimple() ? FF.getFragmentNum() : FragmentNum::main())
4248             ->getOutputAddress();
4249 
4250     BinaryBasicBlock *PrevBB = nullptr;
4251     for (BinaryBasicBlock *const BB : FF) {
4252       assert(BB->getLabel()->isDefined() && "symbol should be defined");
4253       if (!BC.HasRelocations) {
4254         if (BB->isSplit())
4255           assert(FragmentBaseAddress == FF.getAddress());
4256         else
4257           assert(FragmentBaseAddress == getOutputAddress());
4258         (void)FragmentBaseAddress;
4259       }
4260 
4261       // Injected functions likely will fail lookup, as they have no
4262       // input range. Just assign the BB the output address of the
4263       // function.
4264       auto MaybeBBAddress = BC.getIOAddressMap().lookup(BB->getLabel());
4265       const uint64_t BBAddress = MaybeBBAddress  ? *MaybeBBAddress
4266                                  : BB->isSplit() ? FF.getAddress()
4267                                                  : getOutputAddress();
4268       BB->setOutputStartAddress(BBAddress);
4269 
4270       if (PrevBB) {
4271         assert(PrevBB->getOutputAddressRange().first <= BBAddress &&
4272                "Bad output address for basic block.");
4273         assert((PrevBB->getOutputAddressRange().first != BBAddress ||
4274                 !hasInstructions() || !PrevBB->getNumNonPseudos()) &&
4275                "Bad output address for basic block.");
4276         PrevBB->setOutputEndAddress(BBAddress);
4277       }
4278       PrevBB = BB;
4279     }
4280 
4281     PrevBB->setOutputEndAddress(PrevBB->isSplit()
4282                                     ? FF.getAddress() + FF.getImageSize()
4283                                     : getOutputAddress() + getOutputSize());
4284   }
4285 
4286   // Reset output addresses for deleted blocks.
4287   for (BinaryBasicBlock *BB : DeletedBasicBlocks) {
4288     BB->setOutputStartAddress(0);
4289     BB->setOutputEndAddress(0);
4290   }
4291 }
4292 
4293 DebugAddressRangesVector BinaryFunction::getOutputAddressRanges() const {
4294   DebugAddressRangesVector OutputRanges;
4295 
4296   if (isFolded())
4297     return OutputRanges;
4298 
4299   if (IsFragment)
4300     return OutputRanges;
4301 
4302   OutputRanges.emplace_back(getOutputAddress(),
4303                             getOutputAddress() + getOutputSize());
4304   if (isSplit()) {
4305     assert(isEmitted() && "split function should be emitted");
4306     for (const FunctionFragment &FF : getLayout().getSplitFragments())
4307       OutputRanges.emplace_back(FF.getAddress(),
4308                                 FF.getAddress() + FF.getImageSize());
4309   }
4310 
4311   if (isSimple())
4312     return OutputRanges;
4313 
4314   for (BinaryFunction *Frag : Fragments) {
4315     assert(!Frag->isSimple() &&
4316            "fragment of non-simple function should also be non-simple");
4317     OutputRanges.emplace_back(Frag->getOutputAddress(),
4318                               Frag->getOutputAddress() + Frag->getOutputSize());
4319   }
4320 
4321   return OutputRanges;
4322 }
4323 
4324 uint64_t BinaryFunction::translateInputToOutputAddress(uint64_t Address) const {
4325   if (isFolded())
4326     return 0;
4327 
4328   // If the function hasn't changed return the same address.
4329   if (!isEmitted())
4330     return Address;
4331 
4332   if (Address < getAddress())
4333     return 0;
4334 
4335   // Check if the address is associated with an instruction that is tracked
4336   // by address translation.
4337   if (auto OutputAddress = BC.getIOAddressMap().lookup(Address))
4338     return *OutputAddress;
4339 
4340   // FIXME: #18950828 - we rely on relative offsets inside basic blocks to stay
4341   //        intact. Instead we can use pseudo instructions and/or annotations.
4342   const uint64_t Offset = Address - getAddress();
4343   const BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
4344   if (!BB) {
4345     // Special case for address immediately past the end of the function.
4346     if (Offset == getSize())
4347       return getOutputAddress() + getOutputSize();
4348 
4349     return 0;
4350   }
4351 
4352   return std::min(BB->getOutputAddressRange().first + Offset - BB->getOffset(),
4353                   BB->getOutputAddressRange().second);
4354 }
4355 
4356 DebugAddressRangesVector
4357 BinaryFunction::translateInputToOutputRange(DebugAddressRange InRange) const {
4358   DebugAddressRangesVector OutRanges;
4359 
4360   // The function was removed from the output. Return an empty range.
4361   if (isFolded())
4362     return OutRanges;
4363 
4364   // If the function hasn't changed return the same range.
4365   if (!isEmitted()) {
4366     OutRanges.emplace_back(InRange);
4367     return OutRanges;
4368   }
4369 
4370   if (!containsAddress(InRange.LowPC))
4371     return OutRanges;
4372 
4373   // Special case of an empty range [X, X). Some tools expect X to be updated.
4374   if (InRange.LowPC == InRange.HighPC) {
4375     if (uint64_t NewPC = translateInputToOutputAddress(InRange.LowPC))
4376       OutRanges.push_back(DebugAddressRange{NewPC, NewPC});
4377     return OutRanges;
4378   }
4379 
4380   uint64_t InputOffset = InRange.LowPC - getAddress();
4381   const uint64_t InputEndOffset =
4382       std::min(InRange.HighPC - getAddress(), getSize());
4383 
4384   auto BBI = llvm::upper_bound(BasicBlockOffsets,
4385                                BasicBlockOffset(InputOffset, nullptr),
4386                                CompareBasicBlockOffsets());
4387   assert(BBI != BasicBlockOffsets.begin());
4388 
4389   // Iterate over blocks in the input order using BasicBlockOffsets.
4390   for (--BBI; InputOffset < InputEndOffset && BBI != BasicBlockOffsets.end();
4391        InputOffset = BBI->second->getEndOffset(), ++BBI) {
4392     const BinaryBasicBlock &BB = *BBI->second;
4393     if (InputOffset < BB.getOffset() || InputOffset >= BB.getEndOffset()) {
4394       LLVM_DEBUG(
4395           dbgs() << "BOLT-DEBUG: invalid debug address range detected for "
4396                  << *this << " : [0x" << Twine::utohexstr(InRange.LowPC)
4397                  << ", 0x" << Twine::utohexstr(InRange.HighPC) << "]\n");
4398       break;
4399     }
4400 
4401     // Skip the block if it wasn't emitted.
4402     if (!BB.getOutputAddressRange().first)
4403       continue;
4404 
4405     // Find output address for an instruction with an offset greater or equal
4406     // to /p Offset. The output address should fall within the same basic
4407     // block boundaries.
4408     auto translateBlockOffset = [&](const uint64_t Offset) {
4409       const uint64_t OutAddress = BB.getOutputAddressRange().first + Offset;
4410       return std::min(OutAddress, BB.getOutputAddressRange().second);
4411     };
4412 
4413     uint64_t OutLowPC = BB.getOutputAddressRange().first;
4414     if (InputOffset > BB.getOffset())
4415       OutLowPC = translateBlockOffset(InputOffset - BB.getOffset());
4416 
4417     uint64_t OutHighPC = BB.getOutputAddressRange().second;
4418     if (InputEndOffset < BB.getEndOffset()) {
4419       assert(InputEndOffset >= BB.getOffset());
4420       OutHighPC = translateBlockOffset(InputEndOffset - BB.getOffset());
4421     }
4422 
4423     // Check if we can expand the last translated range.
4424     if (!OutRanges.empty() && OutRanges.back().HighPC == OutLowPC)
4425       OutRanges.back().HighPC = std::max(OutRanges.back().HighPC, OutHighPC);
4426     else
4427       OutRanges.emplace_back(OutLowPC, std::max(OutLowPC, OutHighPC));
4428   }
4429 
4430   LLVM_DEBUG({
4431     dbgs() << "BOLT-DEBUG: translated address range " << InRange << " -> ";
4432     for (const DebugAddressRange &R : OutRanges)
4433       dbgs() << R << ' ';
4434     dbgs() << '\n';
4435   });
4436 
4437   return OutRanges;
4438 }
4439 
4440 MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) {
4441   if (CurrentState == State::Disassembled) {
4442     auto II = Instructions.find(Offset);
4443     return (II == Instructions.end()) ? nullptr : &II->second;
4444   } else if (CurrentState == State::CFG) {
4445     BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
4446     if (!BB)
4447       return nullptr;
4448 
4449     for (MCInst &Inst : *BB) {
4450       constexpr uint32_t InvalidOffset = std::numeric_limits<uint32_t>::max();
4451       if (Offset == BC.MIB->getOffsetWithDefault(Inst, InvalidOffset))
4452         return &Inst;
4453     }
4454 
4455     if (MCInst *LastInstr = BB->getLastNonPseudoInstr()) {
4456       if (std::optional<uint32_t> Size = BC.MIB->getSize(*LastInstr)) {
4457         if (BB->getEndOffset() - Offset == Size) {
4458           return LastInstr;
4459         }
4460       }
4461     }
4462 
4463     return nullptr;
4464   } else {
4465     llvm_unreachable("invalid CFG state to use getInstructionAtOffset()");
4466   }
4467 }
4468 
4469 void BinaryFunction::printLoopInfo(raw_ostream &OS) const {
4470   if (!opts::shouldPrint(*this))
4471     return;
4472 
4473   OS << "Loop Info for Function \"" << *this << "\"";
4474   if (hasValidProfile())
4475     OS << " (count: " << getExecutionCount() << ")";
4476   OS << "\n";
4477 
4478   std::stack<BinaryLoop *> St;
4479   for (BinaryLoop *L : *BLI)
4480     St.push(L);
4481   while (!St.empty()) {
4482     BinaryLoop *L = St.top();
4483     St.pop();
4484 
4485     for (BinaryLoop *Inner : *L)
4486       St.push(Inner);
4487 
4488     if (!hasValidProfile())
4489       continue;
4490 
4491     OS << (L->getLoopDepth() > 1 ? "Nested" : "Outer")
4492        << " loop header: " << L->getHeader()->getName();
4493     OS << "\n";
4494     OS << "Loop basic blocks: ";
4495     ListSeparator LS;
4496     for (BinaryBasicBlock *BB : L->blocks())
4497       OS << LS << BB->getName();
4498     OS << "\n";
4499     if (hasValidProfile()) {
4500       OS << "Total back edge count: " << L->TotalBackEdgeCount << "\n";
4501       OS << "Loop entry count: " << L->EntryCount << "\n";
4502       OS << "Loop exit count: " << L->ExitCount << "\n";
4503       if (L->EntryCount > 0) {
4504         OS << "Average iters per entry: "
4505            << format("%.4lf", (double)L->TotalBackEdgeCount / L->EntryCount)
4506            << "\n";
4507       }
4508     }
4509     OS << "----\n";
4510   }
4511 
4512   OS << "Total number of loops: " << BLI->TotalLoops << "\n";
4513   OS << "Number of outer loops: " << BLI->OuterLoops << "\n";
4514   OS << "Maximum nested loop depth: " << BLI->MaximumDepth << "\n\n";
4515 }
4516 
4517 bool BinaryFunction::isAArch64Veneer() const {
4518   if (empty() || hasIslandsInfo())
4519     return false;
4520 
4521   BinaryBasicBlock &BB = **BasicBlocks.begin();
4522   for (MCInst &Inst : BB)
4523     if (!BC.MIB->hasAnnotation(Inst, "AArch64Veneer"))
4524       return false;
4525 
4526   for (auto I = BasicBlocks.begin() + 1, E = BasicBlocks.end(); I != E; ++I) {
4527     for (MCInst &Inst : **I)
4528       if (!BC.MIB->isNoop(Inst))
4529         return false;
4530   }
4531 
4532   return true;
4533 }
4534 
4535 void BinaryFunction::addRelocation(uint64_t Address, MCSymbol *Symbol,
4536                                    uint64_t RelType, uint64_t Addend,
4537                                    uint64_t Value) {
4538   assert(Address >= getAddress() && Address < getAddress() + getMaxSize() &&
4539          "address is outside of the function");
4540   uint64_t Offset = Address - getAddress();
4541   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addRelocation in "
4542                     << formatv("{0}@{1:x} against {2}\n", *this, Offset,
4543                                (Symbol ? Symbol->getName() : "<undef>")));
4544   bool IsCI = BC.isAArch64() && isInConstantIsland(Address);
4545   std::map<uint64_t, Relocation> &Rels =
4546       IsCI ? Islands->Relocations : Relocations;
4547   if (BC.MIB->shouldRecordCodeRelocation(RelType))
4548     Rels[Offset] = Relocation{Offset, Symbol, RelType, Addend, Value};
4549 }
4550 
4551 } // namespace bolt
4552 } // namespace llvm
4553