xref: /llvm-project/bolt/lib/Core/BinaryFunction.cpp (revision b7944f7c042925adef4e4a5ed587d7ffca8e3168)
1 //===- bolt/Core/BinaryFunction.cpp - Low-level function ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryFunction class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryFunction.h"
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryDomTree.h"
16 #include "bolt/Core/DynoStats.h"
17 #include "bolt/Core/HashUtilities.h"
18 #include "bolt/Core/MCPlusBuilder.h"
19 #include "bolt/Utils/NameResolver.h"
20 #include "bolt/Utils/NameShortener.h"
21 #include "bolt/Utils/Utils.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/Demangle/Demangle.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCContext.h"
29 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/MC/MCInst.h"
32 #include "llvm/MC/MCInstPrinter.h"
33 #include "llvm/MC/MCRegisterInfo.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/Object/ObjectFile.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/GraphWriter.h"
39 #include "llvm/Support/LEB128.h"
40 #include "llvm/Support/Regex.h"
41 #include "llvm/Support/Timer.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <functional>
44 #include <limits>
45 #include <numeric>
46 #include <string>
47 
48 #define DEBUG_TYPE "bolt"
49 
50 using namespace llvm;
51 using namespace bolt;
52 
53 namespace opts {
54 
55 extern cl::OptionCategory BoltCategory;
56 extern cl::OptionCategory BoltOptCategory;
57 extern cl::OptionCategory BoltRelocCategory;
58 
59 extern cl::opt<bool> EnableBAT;
60 extern cl::opt<bool> Instrument;
61 extern cl::opt<bool> StrictMode;
62 extern cl::opt<bool> UpdateDebugSections;
63 extern cl::opt<unsigned> Verbosity;
64 
65 extern bool processAllFunctions();
66 
67 cl::opt<bool> CheckEncoding(
68     "check-encoding",
69     cl::desc("perform verification of LLVM instruction encoding/decoding. "
70              "Every instruction in the input is decoded and re-encoded. "
71              "If the resulting bytes do not match the input, a warning message "
72              "is printed."),
73     cl::Hidden, cl::cat(BoltCategory));
74 
75 static cl::opt<bool> DotToolTipCode(
76     "dot-tooltip-code",
77     cl::desc("add basic block instructions as tool tips on nodes"), cl::Hidden,
78     cl::cat(BoltCategory));
79 
80 cl::opt<JumpTableSupportLevel>
81 JumpTables("jump-tables",
82   cl::desc("jump tables support (default=basic)"),
83   cl::init(JTS_BASIC),
84   cl::values(
85       clEnumValN(JTS_NONE, "none",
86                  "do not optimize functions with jump tables"),
87       clEnumValN(JTS_BASIC, "basic",
88                  "optimize functions with jump tables"),
89       clEnumValN(JTS_MOVE, "move",
90                  "move jump tables to a separate section"),
91       clEnumValN(JTS_SPLIT, "split",
92                  "split jump tables section into hot and cold based on "
93                  "function execution frequency"),
94       clEnumValN(JTS_AGGRESSIVE, "aggressive",
95                  "aggressively split jump tables section based on usage "
96                  "of the tables")),
97   cl::ZeroOrMore,
98   cl::cat(BoltOptCategory));
99 
100 static cl::opt<bool> NoScan(
101     "no-scan",
102     cl::desc(
103         "do not scan cold functions for external references (may result in "
104         "slower binary)"),
105     cl::Hidden, cl::cat(BoltOptCategory));
106 
107 cl::opt<bool>
108     PreserveBlocksAlignment("preserve-blocks-alignment",
109                             cl::desc("try to preserve basic block alignment"),
110                             cl::cat(BoltOptCategory));
111 
112 cl::opt<bool>
113 PrintDynoStats("dyno-stats",
114   cl::desc("print execution info based on profile"),
115   cl::cat(BoltCategory));
116 
117 static cl::opt<bool>
118 PrintDynoStatsOnly("print-dyno-stats-only",
119   cl::desc("while printing functions output dyno-stats and skip instructions"),
120   cl::init(false),
121   cl::Hidden,
122   cl::cat(BoltCategory));
123 
124 static cl::list<std::string>
125 PrintOnly("print-only",
126   cl::CommaSeparated,
127   cl::desc("list of functions to print"),
128   cl::value_desc("func1,func2,func3,..."),
129   cl::Hidden,
130   cl::cat(BoltCategory));
131 
132 cl::opt<bool>
133     TimeBuild("time-build",
134               cl::desc("print time spent constructing binary functions"),
135               cl::Hidden, cl::cat(BoltCategory));
136 
137 cl::opt<bool>
138 TrapOnAVX512("trap-avx512",
139   cl::desc("in relocation mode trap upon entry to any function that uses "
140             "AVX-512 instructions"),
141   cl::init(false),
142   cl::ZeroOrMore,
143   cl::Hidden,
144   cl::cat(BoltCategory));
145 
146 bool shouldPrint(const BinaryFunction &Function) {
147   if (Function.isIgnored())
148     return false;
149 
150   if (PrintOnly.empty())
151     return true;
152 
153   for (std::string &Name : opts::PrintOnly) {
154     if (Function.hasNameRegex(Name)) {
155       return true;
156     }
157   }
158 
159   return false;
160 }
161 
162 } // namespace opts
163 
164 namespace llvm {
165 namespace bolt {
166 
167 template <typename R> static bool emptyRange(const R &Range) {
168   return Range.begin() == Range.end();
169 }
170 
171 /// Gets debug line information for the instruction located at the given
172 /// address in the original binary. The SMLoc's pointer is used
173 /// to point to this information, which is represented by a
174 /// DebugLineTableRowRef. The returned pointer is null if no debug line
175 /// information for this instruction was found.
176 static SMLoc findDebugLineInformationForInstructionAt(
177     uint64_t Address, DWARFUnit *Unit,
178     const DWARFDebugLine::LineTable *LineTable) {
179   // We use the pointer in SMLoc to store an instance of DebugLineTableRowRef,
180   // which occupies 64 bits. Thus, we can only proceed if the struct fits into
181   // the pointer itself.
182   static_assert(
183       sizeof(decltype(SMLoc().getPointer())) >= sizeof(DebugLineTableRowRef),
184       "Cannot fit instruction debug line information into SMLoc's pointer");
185 
186   SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc();
187   uint32_t RowIndex = LineTable->lookupAddress(
188       {Address, object::SectionedAddress::UndefSection});
189   if (RowIndex == LineTable->UnknownRowIndex)
190     return NullResult;
191 
192   assert(RowIndex < LineTable->Rows.size() &&
193          "Line Table lookup returned invalid index.");
194 
195   decltype(SMLoc().getPointer()) Ptr;
196   DebugLineTableRowRef *InstructionLocation =
197       reinterpret_cast<DebugLineTableRowRef *>(&Ptr);
198 
199   InstructionLocation->DwCompileUnitIndex = Unit->getOffset();
200   InstructionLocation->RowIndex = RowIndex + 1;
201 
202   return SMLoc::getFromPointer(Ptr);
203 }
204 
205 static std::string buildSectionName(StringRef Prefix, StringRef Name,
206                                     const BinaryContext &BC) {
207   if (BC.isELF())
208     return (Prefix + Name).str();
209   static NameShortener NS;
210   return (Prefix + Twine(NS.getID(Name))).str();
211 }
212 
213 static raw_ostream &operator<<(raw_ostream &OS,
214                                const BinaryFunction::State State) {
215   switch (State) {
216   case BinaryFunction::State::Empty:         OS << "empty"; break;
217   case BinaryFunction::State::Disassembled:  OS << "disassembled"; break;
218   case BinaryFunction::State::CFG:           OS << "CFG constructed"; break;
219   case BinaryFunction::State::CFG_Finalized: OS << "CFG finalized"; break;
220   case BinaryFunction::State::EmittedCFG:    OS << "emitted with CFG"; break;
221   case BinaryFunction::State::Emitted:       OS << "emitted"; break;
222   }
223 
224   return OS;
225 }
226 
227 std::string BinaryFunction::buildCodeSectionName(StringRef Name,
228                                                  const BinaryContext &BC) {
229   return buildSectionName(BC.isELF() ? ".local.text." : ".l.text.", Name, BC);
230 }
231 
232 std::string BinaryFunction::buildColdCodeSectionName(StringRef Name,
233                                                      const BinaryContext &BC) {
234   return buildSectionName(BC.isELF() ? ".local.cold.text." : ".l.c.text.", Name,
235                           BC);
236 }
237 
238 uint64_t BinaryFunction::Count = 0;
239 
240 std::optional<StringRef>
241 BinaryFunction::hasNameRegex(const StringRef Name) const {
242   const std::string RegexName = (Twine("^") + StringRef(Name) + "$").str();
243   Regex MatchName(RegexName);
244   return forEachName(
245       [&MatchName](StringRef Name) { return MatchName.match(Name); });
246 }
247 
248 std::optional<StringRef>
249 BinaryFunction::hasRestoredNameRegex(const StringRef Name) const {
250   const std::string RegexName = (Twine("^") + StringRef(Name) + "$").str();
251   Regex MatchName(RegexName);
252   return forEachName([&MatchName](StringRef Name) {
253     return MatchName.match(NameResolver::restore(Name));
254   });
255 }
256 
257 std::string BinaryFunction::getDemangledName() const {
258   StringRef MangledName = NameResolver::restore(getOneName());
259   return demangle(MangledName.str());
260 }
261 
262 BinaryBasicBlock *
263 BinaryFunction::getBasicBlockContainingOffset(uint64_t Offset) {
264   if (Offset > Size)
265     return nullptr;
266 
267   if (BasicBlockOffsets.empty())
268     return nullptr;
269 
270   /*
271    * This is commented out because it makes BOLT too slow.
272    * assert(std::is_sorted(BasicBlockOffsets.begin(),
273    *                       BasicBlockOffsets.end(),
274    *                       CompareBasicBlockOffsets())));
275    */
276   auto I =
277       llvm::upper_bound(BasicBlockOffsets, BasicBlockOffset(Offset, nullptr),
278                         CompareBasicBlockOffsets());
279   assert(I != BasicBlockOffsets.begin() && "first basic block not at offset 0");
280   --I;
281   BinaryBasicBlock *BB = I->second;
282   return (Offset < BB->getOffset() + BB->getOriginalSize()) ? BB : nullptr;
283 }
284 
285 void BinaryFunction::markUnreachableBlocks() {
286   std::stack<BinaryBasicBlock *> Stack;
287 
288   for (BinaryBasicBlock &BB : blocks())
289     BB.markValid(false);
290 
291   // Add all entries and landing pads as roots.
292   for (BinaryBasicBlock *BB : BasicBlocks) {
293     if (isEntryPoint(*BB) || BB->isLandingPad()) {
294       Stack.push(BB);
295       BB->markValid(true);
296       continue;
297     }
298     // FIXME:
299     // Also mark BBs with indirect jumps as reachable, since we do not
300     // support removing unused jump tables yet (GH-issue20).
301     for (const MCInst &Inst : *BB) {
302       if (BC.MIB->getJumpTable(Inst)) {
303         Stack.push(BB);
304         BB->markValid(true);
305         break;
306       }
307     }
308   }
309 
310   // Determine reachable BBs from the entry point
311   while (!Stack.empty()) {
312     BinaryBasicBlock *BB = Stack.top();
313     Stack.pop();
314     for (BinaryBasicBlock *Succ : BB->successors()) {
315       if (Succ->isValid())
316         continue;
317       Succ->markValid(true);
318       Stack.push(Succ);
319     }
320   }
321 }
322 
323 // Any unnecessary fallthrough jumps revealed after calling eraseInvalidBBs
324 // will be cleaned up by fixBranches().
325 std::pair<unsigned, uint64_t> BinaryFunction::eraseInvalidBBs() {
326   DenseSet<const BinaryBasicBlock *> InvalidBBs;
327   unsigned Count = 0;
328   uint64_t Bytes = 0;
329   for (BinaryBasicBlock *const BB : BasicBlocks) {
330     if (!BB->isValid()) {
331       assert(!isEntryPoint(*BB) && "all entry blocks must be valid");
332       InvalidBBs.insert(BB);
333       ++Count;
334       Bytes += BC.computeCodeSize(BB->begin(), BB->end());
335     }
336   }
337 
338   Layout.eraseBasicBlocks(InvalidBBs);
339 
340   BasicBlockListType NewBasicBlocks;
341   for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) {
342     BinaryBasicBlock *BB = *I;
343     if (InvalidBBs.contains(BB)) {
344       // Make sure the block is removed from the list of predecessors.
345       BB->removeAllSuccessors();
346       DeletedBasicBlocks.push_back(BB);
347     } else {
348       NewBasicBlocks.push_back(BB);
349     }
350   }
351   BasicBlocks = std::move(NewBasicBlocks);
352 
353   assert(BasicBlocks.size() == Layout.block_size());
354 
355   // Update CFG state if needed
356   if (Count > 0)
357     recomputeLandingPads();
358 
359   return std::make_pair(Count, Bytes);
360 }
361 
362 bool BinaryFunction::isForwardCall(const MCSymbol *CalleeSymbol) const {
363   // This function should work properly before and after function reordering.
364   // In order to accomplish this, we use the function index (if it is valid).
365   // If the function indices are not valid, we fall back to the original
366   // addresses.  This should be ok because the functions without valid indices
367   // should have been ordered with a stable sort.
368   const BinaryFunction *CalleeBF = BC.getFunctionForSymbol(CalleeSymbol);
369   if (CalleeBF) {
370     if (CalleeBF->isInjected())
371       return true;
372 
373     if (hasValidIndex() && CalleeBF->hasValidIndex()) {
374       return getIndex() < CalleeBF->getIndex();
375     } else if (hasValidIndex() && !CalleeBF->hasValidIndex()) {
376       return true;
377     } else if (!hasValidIndex() && CalleeBF->hasValidIndex()) {
378       return false;
379     } else {
380       return getAddress() < CalleeBF->getAddress();
381     }
382   } else {
383     // Absolute symbol.
384     ErrorOr<uint64_t> CalleeAddressOrError = BC.getSymbolValue(*CalleeSymbol);
385     assert(CalleeAddressOrError && "unregistered symbol found");
386     return *CalleeAddressOrError > getAddress();
387   }
388 }
389 
390 void BinaryFunction::dump() const {
391   // getDynoStats calls FunctionLayout::updateLayoutIndices and
392   // BasicBlock::analyzeBranch. The former cannot be const, but should be
393   // removed, the latter should be made const, but seems to require refactoring.
394   // Forcing all callers to have a non-const reference to BinaryFunction to call
395   // dump non-const however is not ideal either. Adding this const_cast is right
396   // now the best solution. It is safe, because BinaryFunction itself is not
397   // modified. Only BinaryBasicBlocks are actually modified (if it all) and we
398   // have mutable pointers to those regardless whether this function is
399   // const-qualified or not.
400   const_cast<BinaryFunction &>(*this).print(dbgs(), "");
401 }
402 
403 void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
404   if (!opts::shouldPrint(*this))
405     return;
406 
407   StringRef SectionName =
408       OriginSection ? OriginSection->getName() : "<no origin section>";
409   OS << "Binary Function \"" << *this << "\" " << Annotation << " {";
410   std::vector<StringRef> AllNames = getNames();
411   if (AllNames.size() > 1) {
412     OS << "\n  All names   : ";
413     const char *Sep = "";
414     for (const StringRef &Name : AllNames) {
415       OS << Sep << Name;
416       Sep = "\n                ";
417     }
418   }
419   OS << "\n  Number      : " << FunctionNumber;
420   OS << "\n  State       : " << CurrentState;
421   OS << "\n  Address     : 0x" << Twine::utohexstr(Address);
422   OS << "\n  Size        : 0x" << Twine::utohexstr(Size);
423   OS << "\n  MaxSize     : 0x" << Twine::utohexstr(MaxSize);
424   OS << "\n  Offset      : 0x" << Twine::utohexstr(getFileOffset());
425   OS << "\n  Section     : " << SectionName;
426   OS << "\n  Orc Section : " << getCodeSectionName();
427   OS << "\n  LSDA        : 0x" << Twine::utohexstr(getLSDAAddress());
428   OS << "\n  IsSimple    : " << IsSimple;
429   OS << "\n  IsMultiEntry: " << isMultiEntry();
430   OS << "\n  IsSplit     : " << isSplit();
431   OS << "\n  BB Count    : " << size();
432 
433   if (HasFixedIndirectBranch)
434     OS << "\n  HasFixedIndirectBranch : true";
435   if (HasUnknownControlFlow)
436     OS << "\n  Unknown CF  : true";
437   if (getPersonalityFunction())
438     OS << "\n  Personality : " << getPersonalityFunction()->getName();
439   if (IsFragment)
440     OS << "\n  IsFragment  : true";
441   if (isFolded())
442     OS << "\n  FoldedInto  : " << *getFoldedIntoFunction();
443   for (BinaryFunction *ParentFragment : ParentFragments)
444     OS << "\n  Parent      : " << *ParentFragment;
445   if (!Fragments.empty()) {
446     OS << "\n  Fragments   : ";
447     ListSeparator LS;
448     for (BinaryFunction *Frag : Fragments)
449       OS << LS << *Frag;
450   }
451   if (hasCFG())
452     OS << "\n  Hash        : " << Twine::utohexstr(computeHash());
453   if (isMultiEntry()) {
454     OS << "\n  Secondary Entry Points : ";
455     ListSeparator LS;
456     for (const auto &KV : SecondaryEntryPoints)
457       OS << LS << KV.second->getName();
458   }
459   if (FrameInstructions.size())
460     OS << "\n  CFI Instrs  : " << FrameInstructions.size();
461   if (!Layout.block_empty()) {
462     OS << "\n  BB Layout   : ";
463     ListSeparator LS;
464     for (const BinaryBasicBlock *BB : Layout.blocks())
465       OS << LS << BB->getName();
466   }
467   if (getImageAddress())
468     OS << "\n  Image       : 0x" << Twine::utohexstr(getImageAddress());
469   if (ExecutionCount != COUNT_NO_PROFILE) {
470     OS << "\n  Exec Count  : " << ExecutionCount;
471     OS << "\n  Branch Count: " << RawBranchCount;
472     OS << "\n  Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
473   }
474 
475   if (opts::PrintDynoStats && !getLayout().block_empty()) {
476     OS << '\n';
477     DynoStats dynoStats = getDynoStats(*this);
478     OS << dynoStats;
479   }
480 
481   OS << "\n}\n";
482 
483   if (opts::PrintDynoStatsOnly || !BC.InstPrinter)
484     return;
485 
486   // Offset of the instruction in function.
487   uint64_t Offset = 0;
488 
489   if (BasicBlocks.empty() && !Instructions.empty()) {
490     // Print before CFG was built.
491     for (const std::pair<const uint32_t, MCInst> &II : Instructions) {
492       Offset = II.first;
493 
494       // Print label if exists at this offset.
495       auto LI = Labels.find(Offset);
496       if (LI != Labels.end()) {
497         if (const MCSymbol *EntrySymbol =
498                 getSecondaryEntryPointSymbol(LI->second))
499           OS << EntrySymbol->getName() << " (Entry Point):\n";
500         OS << LI->second->getName() << ":\n";
501       }
502 
503       BC.printInstruction(OS, II.second, Offset, this);
504     }
505   }
506 
507   StringRef SplitPointMsg = "";
508   for (const FunctionFragment &FF : Layout.fragments()) {
509     OS << SplitPointMsg;
510     SplitPointMsg = "-------   HOT-COLD SPLIT POINT   -------\n\n";
511     for (const BinaryBasicBlock *BB : FF) {
512       OS << BB->getName() << " (" << BB->size()
513          << " instructions, align : " << BB->getAlignment() << ")\n";
514 
515       if (isEntryPoint(*BB)) {
516         if (MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB))
517           OS << "  Secondary Entry Point: " << EntrySymbol->getName() << '\n';
518         else
519           OS << "  Entry Point\n";
520       }
521 
522       if (BB->isLandingPad())
523         OS << "  Landing Pad\n";
524 
525       uint64_t BBExecCount = BB->getExecutionCount();
526       if (hasValidProfile()) {
527         OS << "  Exec Count : ";
528         if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE)
529           OS << BBExecCount << '\n';
530         else
531           OS << "<unknown>\n";
532       }
533       if (BB->getCFIState() >= 0)
534         OS << "  CFI State : " << BB->getCFIState() << '\n';
535       if (opts::EnableBAT) {
536         OS << "  Input offset: " << Twine::utohexstr(BB->getInputOffset())
537            << "\n";
538       }
539       if (!BB->pred_empty()) {
540         OS << "  Predecessors: ";
541         ListSeparator LS;
542         for (BinaryBasicBlock *Pred : BB->predecessors())
543           OS << LS << Pred->getName();
544         OS << '\n';
545       }
546       if (!BB->throw_empty()) {
547         OS << "  Throwers: ";
548         ListSeparator LS;
549         for (BinaryBasicBlock *Throw : BB->throwers())
550           OS << LS << Throw->getName();
551         OS << '\n';
552       }
553 
554       Offset = alignTo(Offset, BB->getAlignment());
555 
556       // Note: offsets are imprecise since this is happening prior to
557       // relaxation.
558       Offset = BC.printInstructions(OS, BB->begin(), BB->end(), Offset, this);
559 
560       if (!BB->succ_empty()) {
561         OS << "  Successors: ";
562         // For more than 2 successors, sort them based on frequency.
563         std::vector<uint64_t> Indices(BB->succ_size());
564         std::iota(Indices.begin(), Indices.end(), 0);
565         if (BB->succ_size() > 2 && BB->getKnownExecutionCount()) {
566           llvm::stable_sort(Indices, [&](const uint64_t A, const uint64_t B) {
567             return BB->BranchInfo[B] < BB->BranchInfo[A];
568           });
569         }
570         ListSeparator LS;
571         for (unsigned I = 0; I < Indices.size(); ++I) {
572           BinaryBasicBlock *Succ = BB->Successors[Indices[I]];
573           const BinaryBasicBlock::BinaryBranchInfo &BI =
574               BB->BranchInfo[Indices[I]];
575           OS << LS << Succ->getName();
576           if (ExecutionCount != COUNT_NO_PROFILE &&
577               BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
578             OS << " (mispreds: " << BI.MispredictedCount
579                << ", count: " << BI.Count << ")";
580           } else if (ExecutionCount != COUNT_NO_PROFILE &&
581                      BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
582             OS << " (inferred count: " << BI.Count << ")";
583           }
584         }
585         OS << '\n';
586       }
587 
588       if (!BB->lp_empty()) {
589         OS << "  Landing Pads: ";
590         ListSeparator LS;
591         for (BinaryBasicBlock *LP : BB->landing_pads()) {
592           OS << LS << LP->getName();
593           if (ExecutionCount != COUNT_NO_PROFILE) {
594             OS << " (count: " << LP->getExecutionCount() << ")";
595           }
596         }
597         OS << '\n';
598       }
599 
600       // In CFG_Finalized state we can miscalculate CFI state at exit.
601       if (CurrentState == State::CFG) {
602         const int32_t CFIStateAtExit = BB->getCFIStateAtExit();
603         if (CFIStateAtExit >= 0)
604           OS << "  CFI State: " << CFIStateAtExit << '\n';
605       }
606 
607       OS << '\n';
608     }
609   }
610 
611   // Dump new exception ranges for the function.
612   if (!CallSites.empty()) {
613     OS << "EH table:\n";
614     for (const FunctionFragment &FF : getLayout().fragments()) {
615       for (const auto &FCSI : getCallSites(FF.getFragmentNum())) {
616         const CallSite &CSI = FCSI.second;
617         OS << "  [" << *CSI.Start << ", " << *CSI.End << ") landing pad : ";
618         if (CSI.LP)
619           OS << *CSI.LP;
620         else
621           OS << "0";
622         OS << ", action : " << CSI.Action << '\n';
623       }
624     }
625     OS << '\n';
626   }
627 
628   // Print all jump tables.
629   for (const std::pair<const uint64_t, JumpTable *> &JTI : JumpTables)
630     JTI.second->print(OS);
631 
632   OS << "DWARF CFI Instructions:\n";
633   if (OffsetToCFI.size()) {
634     // Pre-buildCFG information
635     for (const std::pair<const uint32_t, uint32_t> &Elmt : OffsetToCFI) {
636       OS << format("    %08x:\t", Elmt.first);
637       assert(Elmt.second < FrameInstructions.size() && "Incorrect CFI offset");
638       BinaryContext::printCFI(OS, FrameInstructions[Elmt.second]);
639       OS << "\n";
640     }
641   } else {
642     // Post-buildCFG information
643     for (uint32_t I = 0, E = FrameInstructions.size(); I != E; ++I) {
644       const MCCFIInstruction &CFI = FrameInstructions[I];
645       OS << format("    %d:\t", I);
646       BinaryContext::printCFI(OS, CFI);
647       OS << "\n";
648     }
649   }
650   if (FrameInstructions.empty())
651     OS << "    <empty>\n";
652 
653   OS << "End of Function \"" << *this << "\"\n\n";
654 }
655 
656 void BinaryFunction::printRelocations(raw_ostream &OS, uint64_t Offset,
657                                       uint64_t Size) const {
658   const char *Sep = " # Relocs: ";
659 
660   auto RI = Relocations.lower_bound(Offset);
661   while (RI != Relocations.end() && RI->first < Offset + Size) {
662     OS << Sep << "(R: " << RI->second << ")";
663     Sep = ", ";
664     ++RI;
665   }
666 }
667 
668 static std::string mutateDWARFExpressionTargetReg(const MCCFIInstruction &Instr,
669                                                   MCPhysReg NewReg) {
670   StringRef ExprBytes = Instr.getValues();
671   assert(ExprBytes.size() > 1 && "DWARF expression CFI is too short");
672   uint8_t Opcode = ExprBytes[0];
673   assert((Opcode == dwarf::DW_CFA_expression ||
674           Opcode == dwarf::DW_CFA_val_expression) &&
675          "invalid DWARF expression CFI");
676   (void)Opcode;
677   const uint8_t *const Start =
678       reinterpret_cast<const uint8_t *>(ExprBytes.drop_front(1).data());
679   const uint8_t *const End =
680       reinterpret_cast<const uint8_t *>(Start + ExprBytes.size() - 1);
681   unsigned Size = 0;
682   decodeULEB128(Start, &Size, End);
683   assert(Size > 0 && "Invalid reg encoding for DWARF expression CFI");
684   SmallString<8> Tmp;
685   raw_svector_ostream OSE(Tmp);
686   encodeULEB128(NewReg, OSE);
687   return Twine(ExprBytes.slice(0, 1))
688       .concat(OSE.str())
689       .concat(ExprBytes.drop_front(1 + Size))
690       .str();
691 }
692 
693 void BinaryFunction::mutateCFIRegisterFor(const MCInst &Instr,
694                                           MCPhysReg NewReg) {
695   const MCCFIInstruction *OldCFI = getCFIFor(Instr);
696   assert(OldCFI && "invalid CFI instr");
697   switch (OldCFI->getOperation()) {
698   default:
699     llvm_unreachable("Unexpected instruction");
700   case MCCFIInstruction::OpDefCfa:
701     setCFIFor(Instr, MCCFIInstruction::cfiDefCfa(nullptr, NewReg,
702                                                  OldCFI->getOffset()));
703     break;
704   case MCCFIInstruction::OpDefCfaRegister:
705     setCFIFor(Instr, MCCFIInstruction::createDefCfaRegister(nullptr, NewReg));
706     break;
707   case MCCFIInstruction::OpOffset:
708     setCFIFor(Instr, MCCFIInstruction::createOffset(nullptr, NewReg,
709                                                     OldCFI->getOffset()));
710     break;
711   case MCCFIInstruction::OpRegister:
712     setCFIFor(Instr, MCCFIInstruction::createRegister(nullptr, NewReg,
713                                                       OldCFI->getRegister2()));
714     break;
715   case MCCFIInstruction::OpSameValue:
716     setCFIFor(Instr, MCCFIInstruction::createSameValue(nullptr, NewReg));
717     break;
718   case MCCFIInstruction::OpEscape:
719     setCFIFor(Instr,
720               MCCFIInstruction::createEscape(
721                   nullptr,
722                   StringRef(mutateDWARFExpressionTargetReg(*OldCFI, NewReg))));
723     break;
724   case MCCFIInstruction::OpRestore:
725     setCFIFor(Instr, MCCFIInstruction::createRestore(nullptr, NewReg));
726     break;
727   case MCCFIInstruction::OpUndefined:
728     setCFIFor(Instr, MCCFIInstruction::createUndefined(nullptr, NewReg));
729     break;
730   }
731 }
732 
733 const MCCFIInstruction *BinaryFunction::mutateCFIOffsetFor(const MCInst &Instr,
734                                                            int64_t NewOffset) {
735   const MCCFIInstruction *OldCFI = getCFIFor(Instr);
736   assert(OldCFI && "invalid CFI instr");
737   switch (OldCFI->getOperation()) {
738   default:
739     llvm_unreachable("Unexpected instruction");
740   case MCCFIInstruction::OpDefCfaOffset:
741     setCFIFor(Instr, MCCFIInstruction::cfiDefCfaOffset(nullptr, NewOffset));
742     break;
743   case MCCFIInstruction::OpAdjustCfaOffset:
744     setCFIFor(Instr,
745               MCCFIInstruction::createAdjustCfaOffset(nullptr, NewOffset));
746     break;
747   case MCCFIInstruction::OpDefCfa:
748     setCFIFor(Instr, MCCFIInstruction::cfiDefCfa(nullptr, OldCFI->getRegister(),
749                                                  NewOffset));
750     break;
751   case MCCFIInstruction::OpOffset:
752     setCFIFor(Instr, MCCFIInstruction::createOffset(
753                          nullptr, OldCFI->getRegister(), NewOffset));
754     break;
755   }
756   return getCFIFor(Instr);
757 }
758 
759 IndirectBranchType
760 BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
761                                       uint64_t Offset,
762                                       uint64_t &TargetAddress) {
763   const unsigned PtrSize = BC.AsmInfo->getCodePointerSize();
764 
765   // The instruction referencing memory used by the branch instruction.
766   // It could be the branch instruction itself or one of the instructions
767   // setting the value of the register used by the branch.
768   MCInst *MemLocInstr;
769 
770   // Address of the table referenced by MemLocInstr. Could be either an
771   // array of function pointers, or a jump table.
772   uint64_t ArrayStart = 0;
773 
774   unsigned BaseRegNum, IndexRegNum;
775   int64_t DispValue;
776   const MCExpr *DispExpr;
777 
778   // In AArch, identify the instruction adding the PC-relative offset to
779   // jump table entries to correctly decode it.
780   MCInst *PCRelBaseInstr;
781   uint64_t PCRelAddr = 0;
782 
783   auto Begin = Instructions.begin();
784   if (BC.isAArch64()) {
785     PreserveNops = BC.HasRelocations;
786     // Start at the last label as an approximation of the current basic block.
787     // This is a heuristic, since the full set of labels have yet to be
788     // determined
789     for (const uint32_t Offset :
790          llvm::make_first_range(llvm::reverse(Labels))) {
791       auto II = Instructions.find(Offset);
792       if (II != Instructions.end()) {
793         Begin = II;
794         break;
795       }
796     }
797   }
798 
799   IndirectBranchType BranchType = BC.MIB->analyzeIndirectBranch(
800       Instruction, Begin, Instructions.end(), PtrSize, MemLocInstr, BaseRegNum,
801       IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
802 
803   if (BranchType == IndirectBranchType::UNKNOWN && !MemLocInstr)
804     return BranchType;
805 
806   if (MemLocInstr != &Instruction)
807     IndexRegNum = BC.MIB->getNoRegister();
808 
809   if (BC.isAArch64()) {
810     const MCSymbol *Sym = BC.MIB->getTargetSymbol(*PCRelBaseInstr, 1);
811     assert(Sym && "Symbol extraction failed");
812     ErrorOr<uint64_t> SymValueOrError = BC.getSymbolValue(*Sym);
813     if (SymValueOrError) {
814       PCRelAddr = *SymValueOrError;
815     } else {
816       for (std::pair<const uint32_t, MCSymbol *> &Elmt : Labels) {
817         if (Elmt.second == Sym) {
818           PCRelAddr = Elmt.first + getAddress();
819           break;
820         }
821       }
822     }
823     uint64_t InstrAddr = 0;
824     for (auto II = Instructions.rbegin(); II != Instructions.rend(); ++II) {
825       if (&II->second == PCRelBaseInstr) {
826         InstrAddr = II->first + getAddress();
827         break;
828       }
829     }
830     assert(InstrAddr != 0 && "instruction not found");
831     // We do this to avoid spurious references to code locations outside this
832     // function (for example, if the indirect jump lives in the last basic
833     // block of the function, it will create a reference to the next function).
834     // This replaces a symbol reference with an immediate.
835     BC.MIB->replaceMemOperandDisp(*PCRelBaseInstr,
836                                   MCOperand::createImm(PCRelAddr - InstrAddr));
837     // FIXME: Disable full jump table processing for AArch64 until we have a
838     // proper way of determining the jump table limits.
839     return IndirectBranchType::UNKNOWN;
840   }
841 
842   // RIP-relative addressing should be converted to symbol form by now
843   // in processed instructions (but not in jump).
844   if (DispExpr) {
845     const MCSymbol *TargetSym;
846     uint64_t TargetOffset;
847     std::tie(TargetSym, TargetOffset) = BC.MIB->getTargetSymbolInfo(DispExpr);
848     ErrorOr<uint64_t> SymValueOrError = BC.getSymbolValue(*TargetSym);
849     assert(SymValueOrError && "global symbol needs a value");
850     ArrayStart = *SymValueOrError + TargetOffset;
851     BaseRegNum = BC.MIB->getNoRegister();
852     if (BC.isAArch64()) {
853       ArrayStart &= ~0xFFFULL;
854       ArrayStart += DispValue & 0xFFFULL;
855     }
856   } else {
857     ArrayStart = static_cast<uint64_t>(DispValue);
858   }
859 
860   if (BaseRegNum == BC.MRI->getProgramCounter())
861     ArrayStart += getAddress() + Offset + Size;
862 
863   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addressed memory is 0x"
864                     << Twine::utohexstr(ArrayStart) << '\n');
865 
866   ErrorOr<BinarySection &> Section = BC.getSectionForAddress(ArrayStart);
867   if (!Section) {
868     // No section - possibly an absolute address. Since we don't allow
869     // internal function addresses to escape the function scope - we
870     // consider it a tail call.
871     if (opts::Verbosity >= 1) {
872       errs() << "BOLT-WARNING: no section for address 0x"
873              << Twine::utohexstr(ArrayStart) << " referenced from function "
874              << *this << '\n';
875     }
876     return IndirectBranchType::POSSIBLE_TAIL_CALL;
877   }
878   if (Section->isVirtual()) {
879     // The contents are filled at runtime.
880     return IndirectBranchType::POSSIBLE_TAIL_CALL;
881   }
882 
883   if (BranchType == IndirectBranchType::POSSIBLE_FIXED_BRANCH) {
884     ErrorOr<uint64_t> Value = BC.getPointerAtAddress(ArrayStart);
885     if (!Value)
886       return IndirectBranchType::UNKNOWN;
887 
888     if (BC.getSectionForAddress(ArrayStart)->isWritable())
889       return IndirectBranchType::UNKNOWN;
890 
891     outs() << "BOLT-INFO: fixed indirect branch detected in " << *this
892            << " at 0x" << Twine::utohexstr(getAddress() + Offset)
893            << " referencing data at 0x" << Twine::utohexstr(ArrayStart)
894            << " the destination value is 0x" << Twine::utohexstr(*Value)
895            << '\n';
896 
897     TargetAddress = *Value;
898     return BranchType;
899   }
900 
901   // Check if there's already a jump table registered at this address.
902   MemoryContentsType MemType;
903   if (JumpTable *JT = BC.getJumpTableContainingAddress(ArrayStart)) {
904     switch (JT->Type) {
905     case JumpTable::JTT_NORMAL:
906       MemType = MemoryContentsType::POSSIBLE_JUMP_TABLE;
907       break;
908     case JumpTable::JTT_PIC:
909       MemType = MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
910       break;
911     }
912   } else {
913     MemType = BC.analyzeMemoryAt(ArrayStart, *this);
914   }
915 
916   // Check that jump table type in instruction pattern matches memory contents.
917   JumpTable::JumpTableType JTType;
918   if (BranchType == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
919     if (MemType != MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
920       return IndirectBranchType::UNKNOWN;
921     JTType = JumpTable::JTT_PIC;
922   } else {
923     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
924       return IndirectBranchType::UNKNOWN;
925 
926     if (MemType == MemoryContentsType::UNKNOWN)
927       return IndirectBranchType::POSSIBLE_TAIL_CALL;
928 
929     BranchType = IndirectBranchType::POSSIBLE_JUMP_TABLE;
930     JTType = JumpTable::JTT_NORMAL;
931   }
932 
933   // Convert the instruction into jump table branch.
934   const MCSymbol *JTLabel = BC.getOrCreateJumpTable(*this, ArrayStart, JTType);
935   BC.MIB->replaceMemOperandDisp(*MemLocInstr, JTLabel, BC.Ctx.get());
936   BC.MIB->setJumpTable(Instruction, ArrayStart, IndexRegNum);
937 
938   JTSites.emplace_back(Offset, ArrayStart);
939 
940   return BranchType;
941 }
942 
943 MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address,
944                                                 bool CreatePastEnd) {
945   const uint64_t Offset = Address - getAddress();
946 
947   if ((Offset == getSize()) && CreatePastEnd)
948     return getFunctionEndLabel();
949 
950   auto LI = Labels.find(Offset);
951   if (LI != Labels.end())
952     return LI->second;
953 
954   // For AArch64, check if this address is part of a constant island.
955   if (BC.isAArch64()) {
956     if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address))
957       return IslandSym;
958   }
959 
960   MCSymbol *Label = BC.Ctx->createNamedTempSymbol();
961   Labels[Offset] = Label;
962 
963   return Label;
964 }
965 
966 ErrorOr<ArrayRef<uint8_t>> BinaryFunction::getData() const {
967   BinarySection &Section = *getOriginSection();
968   assert(Section.containsRange(getAddress(), getMaxSize()) &&
969          "wrong section for function");
970 
971   if (!Section.isText() || Section.isVirtual() || !Section.getSize())
972     return std::make_error_code(std::errc::bad_address);
973 
974   StringRef SectionContents = Section.getContents();
975 
976   assert(SectionContents.size() == Section.getSize() &&
977          "section size mismatch");
978 
979   // Function offset from the section start.
980   uint64_t Offset = getAddress() - Section.getAddress();
981   auto *Bytes = reinterpret_cast<const uint8_t *>(SectionContents.data());
982   return ArrayRef<uint8_t>(Bytes + Offset, getMaxSize());
983 }
984 
985 size_t BinaryFunction::getSizeOfDataInCodeAt(uint64_t Offset) const {
986   if (!Islands)
987     return 0;
988 
989   if (!llvm::is_contained(Islands->DataOffsets, Offset))
990     return 0;
991 
992   auto Iter = Islands->CodeOffsets.upper_bound(Offset);
993   if (Iter != Islands->CodeOffsets.end())
994     return *Iter - Offset;
995   return getSize() - Offset;
996 }
997 
998 bool BinaryFunction::isZeroPaddingAt(uint64_t Offset) const {
999   ArrayRef<uint8_t> FunctionData = *getData();
1000   uint64_t EndOfCode = getSize();
1001   if (Islands) {
1002     auto Iter = Islands->DataOffsets.upper_bound(Offset);
1003     if (Iter != Islands->DataOffsets.end())
1004       EndOfCode = *Iter;
1005   }
1006   for (uint64_t I = Offset; I < EndOfCode; ++I)
1007     if (FunctionData[I] != 0)
1008       return false;
1009 
1010   return true;
1011 }
1012 
1013 void BinaryFunction::handlePCRelOperand(MCInst &Instruction, uint64_t Address,
1014                                         uint64_t Size) {
1015   auto &MIB = BC.MIB;
1016   uint64_t TargetAddress = 0;
1017   if (!MIB->evaluateMemOperandTarget(Instruction, TargetAddress, Address,
1018                                      Size)) {
1019     errs() << "BOLT-ERROR: PC-relative operand can't be evaluated:\n";
1020     BC.InstPrinter->printInst(&Instruction, 0, "", *BC.STI, errs());
1021     errs() << '\n';
1022     Instruction.dump_pretty(errs(), BC.InstPrinter.get());
1023     errs() << '\n';
1024     errs() << "BOLT-ERROR: cannot handle PC-relative operand at 0x"
1025            << Twine::utohexstr(Address) << ". Skipping function " << *this
1026            << ".\n";
1027     if (BC.HasRelocations)
1028       exit(1);
1029     IsSimple = false;
1030     return;
1031   }
1032   if (TargetAddress == 0 && opts::Verbosity >= 1) {
1033     outs() << "BOLT-INFO: PC-relative operand is zero in function " << *this
1034            << '\n';
1035   }
1036 
1037   const MCSymbol *TargetSymbol;
1038   uint64_t TargetOffset;
1039   std::tie(TargetSymbol, TargetOffset) =
1040       BC.handleAddressRef(TargetAddress, *this, /*IsPCRel*/ true);
1041 
1042   bool ReplaceSuccess = MIB->replaceMemOperandDisp(
1043       Instruction, TargetSymbol, static_cast<int64_t>(TargetOffset), &*BC.Ctx);
1044   (void)ReplaceSuccess;
1045   assert(ReplaceSuccess && "Failed to replace mem operand with symbol+off.");
1046 }
1047 
1048 MCSymbol *BinaryFunction::handleExternalReference(MCInst &Instruction,
1049                                                   uint64_t Size,
1050                                                   uint64_t Offset,
1051                                                   uint64_t TargetAddress,
1052                                                   bool &IsCall) {
1053   auto &MIB = BC.MIB;
1054 
1055   const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1056   BC.addInterproceduralReference(this, TargetAddress);
1057   if (opts::Verbosity >= 2 && !IsCall && Size == 2 && !BC.HasRelocations) {
1058     errs() << "BOLT-WARNING: relaxed tail call detected at 0x"
1059            << Twine::utohexstr(AbsoluteInstrAddr) << " in function " << *this
1060            << ". Code size will be increased.\n";
1061   }
1062 
1063   assert(!MIB->isTailCall(Instruction) &&
1064          "synthetic tail call instruction found");
1065 
1066   // This is a call regardless of the opcode.
1067   // Assign proper opcode for tail calls, so that they could be
1068   // treated as calls.
1069   if (!IsCall) {
1070     if (!MIB->convertJmpToTailCall(Instruction)) {
1071       assert(MIB->isConditionalBranch(Instruction) &&
1072              "unknown tail call instruction");
1073       if (opts::Verbosity >= 2) {
1074         errs() << "BOLT-WARNING: conditional tail call detected in "
1075                << "function " << *this << " at 0x"
1076                << Twine::utohexstr(AbsoluteInstrAddr) << ".\n";
1077       }
1078     }
1079     IsCall = true;
1080   }
1081 
1082   if (opts::Verbosity >= 2 && TargetAddress == 0) {
1083     // We actually see calls to address 0 in presence of weak
1084     // symbols originating from libraries. This code is never meant
1085     // to be executed.
1086     outs() << "BOLT-INFO: Function " << *this
1087            << " has a call to address zero.\n";
1088   }
1089 
1090   return BC.getOrCreateGlobalSymbol(TargetAddress, "FUNCat");
1091 }
1092 
1093 void BinaryFunction::handleIndirectBranch(MCInst &Instruction, uint64_t Size,
1094                                           uint64_t Offset) {
1095   auto &MIB = BC.MIB;
1096   uint64_t IndirectTarget = 0;
1097   IndirectBranchType Result =
1098       processIndirectBranch(Instruction, Size, Offset, IndirectTarget);
1099   switch (Result) {
1100   default:
1101     llvm_unreachable("unexpected result");
1102   case IndirectBranchType::POSSIBLE_TAIL_CALL: {
1103     bool Result = MIB->convertJmpToTailCall(Instruction);
1104     (void)Result;
1105     assert(Result);
1106     break;
1107   }
1108   case IndirectBranchType::POSSIBLE_JUMP_TABLE:
1109   case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE:
1110     if (opts::JumpTables == JTS_NONE)
1111       IsSimple = false;
1112     break;
1113   case IndirectBranchType::POSSIBLE_FIXED_BRANCH: {
1114     if (containsAddress(IndirectTarget)) {
1115       const MCSymbol *TargetSymbol = getOrCreateLocalLabel(IndirectTarget);
1116       Instruction.clear();
1117       MIB->createUncondBranch(Instruction, TargetSymbol, BC.Ctx.get());
1118       TakenBranches.emplace_back(Offset, IndirectTarget - getAddress());
1119       HasFixedIndirectBranch = true;
1120     } else {
1121       MIB->convertJmpToTailCall(Instruction);
1122       BC.addInterproceduralReference(this, IndirectTarget);
1123     }
1124     break;
1125   }
1126   case IndirectBranchType::UNKNOWN:
1127     // Keep processing. We'll do more checks and fixes in
1128     // postProcessIndirectBranches().
1129     UnknownIndirectBranchOffsets.emplace(Offset);
1130     break;
1131   }
1132 }
1133 
1134 void BinaryFunction::handleAArch64IndirectCall(MCInst &Instruction,
1135                                                const uint64_t Offset) {
1136   auto &MIB = BC.MIB;
1137   const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1138   MCInst *TargetHiBits, *TargetLowBits;
1139   uint64_t TargetAddress, Count;
1140   Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1141                                  AbsoluteInstrAddr, Instruction, TargetHiBits,
1142                                  TargetLowBits, TargetAddress);
1143   if (Count) {
1144     MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1145     --Count;
1146     for (auto It = std::prev(Instructions.end()); Count != 0;
1147          It = std::prev(It), --Count) {
1148       MIB->addAnnotation(It->second, "AArch64Veneer", true);
1149     }
1150 
1151     BC.addAdrpAddRelocAArch64(*this, *TargetLowBits, *TargetHiBits,
1152                               TargetAddress);
1153   }
1154 }
1155 
1156 bool BinaryFunction::disassemble() {
1157   NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
1158                      "Build Binary Functions", opts::TimeBuild);
1159   ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
1160   assert(ErrorOrFunctionData && "function data is not available");
1161   ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
1162   assert(FunctionData.size() == getMaxSize() &&
1163          "function size does not match raw data size");
1164 
1165   auto &Ctx = BC.Ctx;
1166   auto &MIB = BC.MIB;
1167 
1168   BC.SymbolicDisAsm->setSymbolizer(MIB->createTargetSymbolizer(*this));
1169 
1170   // Insert a label at the beginning of the function. This will be our first
1171   // basic block.
1172   Labels[0] = Ctx->createNamedTempSymbol("BB0");
1173 
1174   // Map offsets in the function to a label that should always point to the
1175   // corresponding instruction. This is used for labels that shouldn't point to
1176   // the start of a basic block but always to a specific instruction. This is
1177   // used, for example, on RISC-V where %pcrel_lo relocations point to the
1178   // corresponding %pcrel_hi.
1179   LabelsMapType InstructionLabels;
1180 
1181   uint64_t Size = 0; // instruction size
1182   for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
1183     MCInst Instruction;
1184     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1185 
1186     // Check for data inside code and ignore it
1187     if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
1188       Size = DataInCodeSize;
1189       continue;
1190     }
1191 
1192     if (!BC.SymbolicDisAsm->getInstruction(Instruction, Size,
1193                                            FunctionData.slice(Offset),
1194                                            AbsoluteInstrAddr, nulls())) {
1195       // Functions with "soft" boundaries, e.g. coming from assembly source,
1196       // can have 0-byte padding at the end.
1197       if (isZeroPaddingAt(Offset))
1198         break;
1199 
1200       errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
1201              << Twine::utohexstr(Offset) << " (address 0x"
1202              << Twine::utohexstr(AbsoluteInstrAddr) << ") in function " << *this
1203              << '\n';
1204       // Some AVX-512 instructions could not be disassembled at all.
1205       if (BC.HasRelocations && opts::TrapOnAVX512 && BC.isX86()) {
1206         setTrapOnEntry();
1207         BC.TrappedFunctions.push_back(this);
1208       } else {
1209         setIgnored();
1210       }
1211 
1212       break;
1213     }
1214 
1215     // Check integrity of LLVM assembler/disassembler.
1216     if (opts::CheckEncoding && !BC.MIB->isBranch(Instruction) &&
1217         !BC.MIB->isCall(Instruction) && !BC.MIB->isNoop(Instruction)) {
1218       if (!BC.validateInstructionEncoding(FunctionData.slice(Offset, Size))) {
1219         errs() << "BOLT-WARNING: mismatching LLVM encoding detected in "
1220                << "function " << *this << " for instruction :\n";
1221         BC.printInstruction(errs(), Instruction, AbsoluteInstrAddr);
1222         errs() << '\n';
1223       }
1224     }
1225 
1226     // Special handling for AVX-512 instructions.
1227     if (MIB->hasEVEXEncoding(Instruction)) {
1228       if (BC.HasRelocations && opts::TrapOnAVX512) {
1229         setTrapOnEntry();
1230         BC.TrappedFunctions.push_back(this);
1231         break;
1232       }
1233 
1234       if (!BC.validateInstructionEncoding(FunctionData.slice(Offset, Size))) {
1235         errs() << "BOLT-WARNING: internal assembler/disassembler error "
1236                   "detected for AVX512 instruction:\n";
1237         BC.printInstruction(errs(), Instruction, AbsoluteInstrAddr);
1238         errs() << " in function " << *this << '\n';
1239         setIgnored();
1240         break;
1241       }
1242     }
1243 
1244     if (MIB->isBranch(Instruction) || MIB->isCall(Instruction)) {
1245       uint64_t TargetAddress = 0;
1246       if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
1247                               TargetAddress)) {
1248         // Check if the target is within the same function. Otherwise it's
1249         // a call, possibly a tail call.
1250         //
1251         // If the target *is* the function address it could be either a branch
1252         // or a recursive call.
1253         bool IsCall = MIB->isCall(Instruction);
1254         const bool IsCondBranch = MIB->isConditionalBranch(Instruction);
1255         MCSymbol *TargetSymbol = nullptr;
1256 
1257         if (BC.MIB->isUnsupportedBranch(Instruction)) {
1258           setIgnored();
1259           if (BinaryFunction *TargetFunc =
1260                   BC.getBinaryFunctionContainingAddress(TargetAddress))
1261             TargetFunc->setIgnored();
1262         }
1263 
1264         if (IsCall && containsAddress(TargetAddress)) {
1265           if (TargetAddress == getAddress()) {
1266             // Recursive call.
1267             TargetSymbol = getSymbol();
1268           } else {
1269             if (BC.isX86()) {
1270               // Dangerous old-style x86 PIC code. We may need to freeze this
1271               // function, so preserve the function as is for now.
1272               PreserveNops = true;
1273             } else {
1274               errs() << "BOLT-WARNING: internal call detected at 0x"
1275                      << Twine::utohexstr(AbsoluteInstrAddr) << " in function "
1276                      << *this << ". Skipping.\n";
1277               IsSimple = false;
1278             }
1279           }
1280         }
1281 
1282         if (!TargetSymbol) {
1283           // Create either local label or external symbol.
1284           if (containsAddress(TargetAddress)) {
1285             TargetSymbol = getOrCreateLocalLabel(TargetAddress);
1286           } else {
1287             if (TargetAddress == getAddress() + getSize() &&
1288                 TargetAddress < getAddress() + getMaxSize() &&
1289                 !(BC.isAArch64() &&
1290                   BC.handleAArch64Veneer(TargetAddress, /*MatchOnly*/ true))) {
1291               // Result of __builtin_unreachable().
1292               LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump past end detected at 0x"
1293                                 << Twine::utohexstr(AbsoluteInstrAddr)
1294                                 << " in function " << *this
1295                                 << " : replacing with nop.\n");
1296               BC.MIB->createNoop(Instruction);
1297               if (IsCondBranch) {
1298                 // Register branch offset for profile validation.
1299                 IgnoredBranches.emplace_back(Offset, Offset + Size);
1300               }
1301               goto add_instruction;
1302             }
1303             // May update Instruction and IsCall
1304             TargetSymbol = handleExternalReference(Instruction, Size, Offset,
1305                                                    TargetAddress, IsCall);
1306           }
1307         }
1308 
1309         if (!IsCall) {
1310           // Add taken branch info.
1311           TakenBranches.emplace_back(Offset, TargetAddress - getAddress());
1312         }
1313         BC.MIB->replaceBranchTarget(Instruction, TargetSymbol, &*Ctx);
1314 
1315         // Mark CTC.
1316         if (IsCondBranch && IsCall)
1317           MIB->setConditionalTailCall(Instruction, TargetAddress);
1318       } else {
1319         // Could not evaluate branch. Should be an indirect call or an
1320         // indirect branch. Bail out on the latter case.
1321         if (MIB->isIndirectBranch(Instruction))
1322           handleIndirectBranch(Instruction, Size, Offset);
1323         // Indirect call. We only need to fix it if the operand is RIP-relative.
1324         if (IsSimple && MIB->hasPCRelOperand(Instruction))
1325           handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size);
1326 
1327         if (BC.isAArch64())
1328           handleAArch64IndirectCall(Instruction, Offset);
1329       }
1330     } else if (BC.isAArch64() || BC.isRISCV()) {
1331       // Check if there's a relocation associated with this instruction.
1332       bool UsedReloc = false;
1333       for (auto Itr = Relocations.lower_bound(Offset),
1334                 ItrE = Relocations.lower_bound(Offset + Size);
1335            Itr != ItrE; ++Itr) {
1336         const Relocation &Relocation = Itr->second;
1337         MCSymbol *Symbol = Relocation.Symbol;
1338 
1339         if (Relocation::isInstructionReference(Relocation.Type)) {
1340           uint64_t RefOffset = Relocation.Value - getAddress();
1341           LabelsMapType::iterator LI = InstructionLabels.find(RefOffset);
1342 
1343           if (LI == InstructionLabels.end()) {
1344             Symbol = BC.Ctx->createNamedTempSymbol();
1345             InstructionLabels.emplace(RefOffset, Symbol);
1346           } else {
1347             Symbol = LI->second;
1348           }
1349         }
1350 
1351         int64_t Value = Relocation.Value;
1352         const bool Result = BC.MIB->replaceImmWithSymbolRef(
1353             Instruction, Symbol, Relocation.Addend, Ctx.get(), Value,
1354             Relocation.Type);
1355         (void)Result;
1356         assert(Result && "cannot replace immediate with relocation");
1357 
1358         // For aarch64, if we replaced an immediate with a symbol from a
1359         // relocation, we mark it so we do not try to further process a
1360         // pc-relative operand. All we need is the symbol.
1361         UsedReloc = true;
1362       }
1363 
1364       if (!BC.isRISCV() && MIB->hasPCRelOperand(Instruction) && !UsedReloc)
1365         handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size);
1366     }
1367 
1368 add_instruction:
1369     if (getDWARFLineTable()) {
1370       Instruction.setLoc(findDebugLineInformationForInstructionAt(
1371           AbsoluteInstrAddr, getDWARFUnit(), getDWARFLineTable()));
1372     }
1373 
1374     // Record offset of the instruction for profile matching.
1375     if (BC.keepOffsetForInstruction(Instruction))
1376       MIB->setOffset(Instruction, static_cast<uint32_t>(Offset));
1377 
1378     if (BC.MIB->isNoop(Instruction)) {
1379       // NOTE: disassembly loses the correct size information for noops.
1380       //       E.g. nopw 0x0(%rax,%rax,1) is 9 bytes, but re-encoded it's only
1381       //       5 bytes. Preserve the size info using annotations.
1382       MIB->addAnnotation(Instruction, "Size", static_cast<uint32_t>(Size));
1383     }
1384 
1385     addInstruction(Offset, std::move(Instruction));
1386   }
1387 
1388   for (auto [Offset, Label] : InstructionLabels) {
1389     InstrMapType::iterator II = Instructions.find(Offset);
1390     assert(II != Instructions.end() && "reference to non-existing instruction");
1391 
1392     BC.MIB->setLabel(II->second, Label);
1393   }
1394 
1395   // Reset symbolizer for the disassembler.
1396   BC.SymbolicDisAsm->setSymbolizer(nullptr);
1397 
1398   if (uint64_t Offset = getFirstInstructionOffset())
1399     Labels[Offset] = BC.Ctx->createNamedTempSymbol();
1400 
1401   clearList(Relocations);
1402 
1403   if (!IsSimple) {
1404     clearList(Instructions);
1405     return false;
1406   }
1407 
1408   updateState(State::Disassembled);
1409 
1410   return true;
1411 }
1412 
1413 bool BinaryFunction::scanExternalRefs() {
1414   bool Success = true;
1415   bool DisassemblyFailed = false;
1416 
1417   // Ignore pseudo functions.
1418   if (isPseudo())
1419     return Success;
1420 
1421   if (opts::NoScan) {
1422     clearList(Relocations);
1423     clearList(ExternallyReferencedOffsets);
1424 
1425     return false;
1426   }
1427 
1428   // List of external references for this function.
1429   std::vector<Relocation> FunctionRelocations;
1430 
1431   static BinaryContext::IndependentCodeEmitter Emitter =
1432       BC.createIndependentMCCodeEmitter();
1433 
1434   ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
1435   assert(ErrorOrFunctionData && "function data is not available");
1436   ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
1437   assert(FunctionData.size() == getMaxSize() &&
1438          "function size does not match raw data size");
1439 
1440   BC.SymbolicDisAsm->setSymbolizer(
1441       BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));
1442 
1443   // Disassemble contents of the function. Detect code entry points and create
1444   // relocations for references to code that will be moved.
1445   uint64_t Size = 0; // instruction size
1446   for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
1447     // Check for data inside code and ignore it
1448     if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
1449       Size = DataInCodeSize;
1450       continue;
1451     }
1452 
1453     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1454     MCInst Instruction;
1455     if (!BC.SymbolicDisAsm->getInstruction(Instruction, Size,
1456                                            FunctionData.slice(Offset),
1457                                            AbsoluteInstrAddr, nulls())) {
1458       if (opts::Verbosity >= 1 && !isZeroPaddingAt(Offset)) {
1459         errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
1460                << Twine::utohexstr(Offset) << " (address 0x"
1461                << Twine::utohexstr(AbsoluteInstrAddr) << ") in function "
1462                << *this << '\n';
1463       }
1464       Success = false;
1465       DisassemblyFailed = true;
1466       break;
1467     }
1468 
1469     // Return true if we can skip handling the Target function reference.
1470     auto ignoreFunctionRef = [&](const BinaryFunction &Target) {
1471       if (&Target == this)
1472         return true;
1473 
1474       // Note that later we may decide not to emit Target function. In that
1475       // case, we conservatively create references that will be ignored or
1476       // resolved to the same function.
1477       if (!BC.shouldEmit(Target))
1478         return true;
1479 
1480       return false;
1481     };
1482 
1483     // Return true if we can ignore reference to the symbol.
1484     auto ignoreReference = [&](const MCSymbol *TargetSymbol) {
1485       if (!TargetSymbol)
1486         return true;
1487 
1488       if (BC.forceSymbolRelocations(TargetSymbol->getName()))
1489         return false;
1490 
1491       BinaryFunction *TargetFunction = BC.getFunctionForSymbol(TargetSymbol);
1492       if (!TargetFunction)
1493         return true;
1494 
1495       return ignoreFunctionRef(*TargetFunction);
1496     };
1497 
1498     // Handle calls and branches separately as symbolization doesn't work for
1499     // them yet.
1500     MCSymbol *BranchTargetSymbol = nullptr;
1501     if (BC.MIB->isCall(Instruction) || BC.MIB->isBranch(Instruction)) {
1502       uint64_t TargetAddress = 0;
1503       BC.MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
1504                              TargetAddress);
1505 
1506       // Create an entry point at reference address if needed.
1507       BinaryFunction *TargetFunction =
1508           BC.getBinaryFunctionContainingAddress(TargetAddress);
1509 
1510       if (!TargetFunction || ignoreFunctionRef(*TargetFunction))
1511         continue;
1512 
1513       const uint64_t FunctionOffset =
1514           TargetAddress - TargetFunction->getAddress();
1515       BranchTargetSymbol =
1516           FunctionOffset ? TargetFunction->addEntryPointAtOffset(FunctionOffset)
1517                          : TargetFunction->getSymbol();
1518     }
1519 
1520     // Can't find more references. Not creating relocations since we are not
1521     // moving code.
1522     if (!BC.HasRelocations)
1523       continue;
1524 
1525     if (BranchTargetSymbol) {
1526       BC.MIB->replaceBranchTarget(Instruction, BranchTargetSymbol,
1527                                   Emitter.LocalCtx.get());
1528     } else if (!llvm::any_of(Instruction,
1529                              [](const MCOperand &Op) { return Op.isExpr(); })) {
1530       // Skip assembly if the instruction may not have any symbolic operands.
1531       continue;
1532     }
1533 
1534     // Emit the instruction using temp emitter and generate relocations.
1535     SmallString<256> Code;
1536     SmallVector<MCFixup, 4> Fixups;
1537     Emitter.MCE->encodeInstruction(Instruction, Code, Fixups, *BC.STI);
1538 
1539     // Create relocation for every fixup.
1540     for (const MCFixup &Fixup : Fixups) {
1541       std::optional<Relocation> Rel = BC.MIB->createRelocation(Fixup, *BC.MAB);
1542       if (!Rel) {
1543         Success = false;
1544         continue;
1545       }
1546 
1547       if (ignoreReference(Rel->Symbol))
1548         continue;
1549 
1550       if (Relocation::getSizeForType(Rel->Type) < 4) {
1551         // If the instruction uses a short form, then we might not be able
1552         // to handle the rewrite without relaxation, and hence cannot reliably
1553         // create an external reference relocation.
1554         Success = false;
1555         continue;
1556       }
1557       Rel->Offset += getAddress() - getOriginSection()->getAddress() + Offset;
1558       FunctionRelocations.push_back(*Rel);
1559     }
1560 
1561     if (!Success)
1562       break;
1563   }
1564 
1565   // Reset symbolizer for the disassembler.
1566   BC.SymbolicDisAsm->setSymbolizer(nullptr);
1567 
1568   // Add relocations unless disassembly failed for this function.
1569   if (!DisassemblyFailed)
1570     for (Relocation &Rel : FunctionRelocations)
1571       getOriginSection()->addPendingRelocation(Rel);
1572 
1573   // Inform BinaryContext that this function symbols will not be defined and
1574   // relocations should not be created against them.
1575   if (BC.HasRelocations) {
1576     for (std::pair<const uint32_t, MCSymbol *> &LI : Labels)
1577       BC.UndefinedSymbols.insert(LI.second);
1578     for (MCSymbol *const EndLabel : FunctionEndLabels)
1579       if (EndLabel)
1580         BC.UndefinedSymbols.insert(EndLabel);
1581   }
1582 
1583   clearList(Relocations);
1584   clearList(ExternallyReferencedOffsets);
1585 
1586   if (Success && BC.HasRelocations)
1587     HasExternalRefRelocations = true;
1588 
1589   if (opts::Verbosity >= 1 && !Success)
1590     outs() << "BOLT-INFO: failed to scan refs for  " << *this << '\n';
1591 
1592   return Success;
1593 }
1594 
1595 void BinaryFunction::postProcessEntryPoints() {
1596   if (!isSimple())
1597     return;
1598 
1599   for (auto &KV : Labels) {
1600     MCSymbol *Label = KV.second;
1601     if (!getSecondaryEntryPointSymbol(Label))
1602       continue;
1603 
1604     // In non-relocation mode there's potentially an external undetectable
1605     // reference to the entry point and hence we cannot move this entry
1606     // point. Optimizing without moving could be difficult.
1607     if (!BC.HasRelocations)
1608       setSimple(false);
1609 
1610     const uint32_t Offset = KV.first;
1611 
1612     // If we are at Offset 0 and there is no instruction associated with it,
1613     // this means this is an empty function. Just ignore. If we find an
1614     // instruction at this offset, this entry point is valid.
1615     if (!Offset || getInstructionAtOffset(Offset))
1616       continue;
1617 
1618     // On AArch64 there are legitimate reasons to have references past the
1619     // end of the function, e.g. jump tables.
1620     if (BC.isAArch64() && Offset == getSize())
1621       continue;
1622 
1623     errs() << "BOLT-WARNING: reference in the middle of instruction "
1624               "detected in function "
1625            << *this << " at offset 0x" << Twine::utohexstr(Offset) << '\n';
1626     if (BC.HasRelocations)
1627       setIgnored();
1628     setSimple(false);
1629     return;
1630   }
1631 }
1632 
1633 void BinaryFunction::postProcessJumpTables() {
1634   // Create labels for all entries.
1635   for (auto &JTI : JumpTables) {
1636     JumpTable &JT = *JTI.second;
1637     if (JT.Type == JumpTable::JTT_PIC && opts::JumpTables == JTS_BASIC) {
1638       opts::JumpTables = JTS_MOVE;
1639       outs() << "BOLT-INFO: forcing -jump-tables=move as PIC jump table was "
1640                 "detected in function "
1641              << *this << '\n';
1642     }
1643     const uint64_t BDSize =
1644         BC.getBinaryDataAtAddress(JT.getAddress())->getSize();
1645     if (!BDSize) {
1646       BC.setBinaryDataSize(JT.getAddress(), JT.getSize());
1647     } else {
1648       assert(BDSize >= JT.getSize() &&
1649              "jump table cannot be larger than the containing object");
1650     }
1651     if (!JT.Entries.empty())
1652       continue;
1653 
1654     bool HasOneParent = (JT.Parents.size() == 1);
1655     for (uint64_t EntryAddress : JT.EntriesAsAddress) {
1656       // builtin_unreachable does not belong to any function
1657       // Need to handle separately
1658       bool IsBuiltinUnreachable =
1659           llvm::any_of(JT.Parents, [&](const BinaryFunction *Parent) {
1660             return EntryAddress == Parent->getAddress() + Parent->getSize();
1661           });
1662       if (IsBuiltinUnreachable) {
1663         MCSymbol *Label = getOrCreateLocalLabel(EntryAddress, true);
1664         JT.Entries.push_back(Label);
1665         continue;
1666       }
1667       // Create a local label for targets that cannot be reached by other
1668       // fragments. Otherwise, create a secondary entry point in the target
1669       // function.
1670       BinaryFunction *TargetBF =
1671           BC.getBinaryFunctionContainingAddress(EntryAddress);
1672       MCSymbol *Label;
1673       if (HasOneParent && TargetBF == this) {
1674         Label = getOrCreateLocalLabel(EntryAddress, true);
1675       } else {
1676         const uint64_t Offset = EntryAddress - TargetBF->getAddress();
1677         Label = Offset ? TargetBF->addEntryPointAtOffset(Offset)
1678                        : TargetBF->getSymbol();
1679       }
1680       JT.Entries.push_back(Label);
1681     }
1682   }
1683 
1684   // Add TakenBranches from JumpTables.
1685   //
1686   // We want to do it after initial processing since we don't know jump tables'
1687   // boundaries until we process them all.
1688   for (auto &JTSite : JTSites) {
1689     const uint64_t JTSiteOffset = JTSite.first;
1690     const uint64_t JTAddress = JTSite.second;
1691     const JumpTable *JT = getJumpTableContainingAddress(JTAddress);
1692     assert(JT && "cannot find jump table for address");
1693 
1694     uint64_t EntryOffset = JTAddress - JT->getAddress();
1695     while (EntryOffset < JT->getSize()) {
1696       uint64_t EntryAddress = JT->EntriesAsAddress[EntryOffset / JT->EntrySize];
1697       uint64_t TargetOffset = EntryAddress - getAddress();
1698       if (TargetOffset < getSize()) {
1699         TakenBranches.emplace_back(JTSiteOffset, TargetOffset);
1700 
1701         if (opts::StrictMode)
1702           registerReferencedOffset(TargetOffset);
1703       }
1704 
1705       EntryOffset += JT->EntrySize;
1706 
1707       // A label at the next entry means the end of this jump table.
1708       if (JT->Labels.count(EntryOffset))
1709         break;
1710     }
1711   }
1712   clearList(JTSites);
1713 
1714   // Conservatively populate all possible destinations for unknown indirect
1715   // branches.
1716   if (opts::StrictMode && hasInternalReference()) {
1717     for (uint64_t Offset : UnknownIndirectBranchOffsets) {
1718       for (uint64_t PossibleDestination : ExternallyReferencedOffsets) {
1719         // Ignore __builtin_unreachable().
1720         if (PossibleDestination == getSize())
1721           continue;
1722         TakenBranches.emplace_back(Offset, PossibleDestination);
1723       }
1724     }
1725   }
1726 
1727   // Remove duplicates branches. We can get a bunch of them from jump tables.
1728   // Without doing jump table value profiling we don't have use for extra
1729   // (duplicate) branches.
1730   llvm::sort(TakenBranches);
1731   auto NewEnd = std::unique(TakenBranches.begin(), TakenBranches.end());
1732   TakenBranches.erase(NewEnd, TakenBranches.end());
1733 }
1734 
1735 bool BinaryFunction::validateExternallyReferencedOffsets() {
1736   SmallPtrSet<MCSymbol *, 4> JTTargets;
1737   for (const JumpTable *JT : llvm::make_second_range(JumpTables))
1738     JTTargets.insert(JT->Entries.begin(), JT->Entries.end());
1739 
1740   bool HasUnclaimedReference = false;
1741   for (uint64_t Destination : ExternallyReferencedOffsets) {
1742     // Ignore __builtin_unreachable().
1743     if (Destination == getSize())
1744       continue;
1745     // Ignore constant islands
1746     if (isInConstantIsland(Destination + getAddress()))
1747       continue;
1748 
1749     if (BinaryBasicBlock *BB = getBasicBlockAtOffset(Destination)) {
1750       // Check if the externally referenced offset is a recognized jump table
1751       // target.
1752       if (JTTargets.contains(BB->getLabel()))
1753         continue;
1754 
1755       if (opts::Verbosity >= 1) {
1756         errs() << "BOLT-WARNING: unclaimed data to code reference (possibly "
1757                << "an unrecognized jump table entry) to " << BB->getName()
1758                << " in " << *this << "\n";
1759       }
1760       auto L = BC.scopeLock();
1761       addEntryPoint(*BB);
1762     } else {
1763       errs() << "BOLT-WARNING: unknown data to code reference to offset "
1764              << Twine::utohexstr(Destination) << " in " << *this << "\n";
1765       setIgnored();
1766     }
1767     HasUnclaimedReference = true;
1768   }
1769   return !HasUnclaimedReference;
1770 }
1771 
1772 bool BinaryFunction::postProcessIndirectBranches(
1773     MCPlusBuilder::AllocatorIdTy AllocId) {
1774   auto addUnknownControlFlow = [&](BinaryBasicBlock &BB) {
1775     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding unknown control flow in " << *this
1776                       << " for " << BB.getName() << "\n");
1777     HasUnknownControlFlow = true;
1778     BB.removeAllSuccessors();
1779     for (uint64_t PossibleDestination : ExternallyReferencedOffsets)
1780       if (BinaryBasicBlock *SuccBB = getBasicBlockAtOffset(PossibleDestination))
1781         BB.addSuccessor(SuccBB);
1782   };
1783 
1784   uint64_t NumIndirectJumps = 0;
1785   MCInst *LastIndirectJump = nullptr;
1786   BinaryBasicBlock *LastIndirectJumpBB = nullptr;
1787   uint64_t LastJT = 0;
1788   uint16_t LastJTIndexReg = BC.MIB->getNoRegister();
1789   for (BinaryBasicBlock &BB : blocks()) {
1790     for (BinaryBasicBlock::iterator II = BB.begin(); II != BB.end(); ++II) {
1791       MCInst &Instr = *II;
1792       if (!BC.MIB->isIndirectBranch(Instr))
1793         continue;
1794 
1795       // If there's an indirect branch in a single-block function -
1796       // it must be a tail call.
1797       if (BasicBlocks.size() == 1) {
1798         BC.MIB->convertJmpToTailCall(Instr);
1799         return true;
1800       }
1801 
1802       ++NumIndirectJumps;
1803 
1804       if (opts::StrictMode && !hasInternalReference()) {
1805         BC.MIB->convertJmpToTailCall(Instr);
1806         break;
1807       }
1808 
1809       // Validate the tail call or jump table assumptions now that we know
1810       // basic block boundaries.
1811       if (BC.MIB->isTailCall(Instr) || BC.MIB->getJumpTable(Instr)) {
1812         const unsigned PtrSize = BC.AsmInfo->getCodePointerSize();
1813         MCInst *MemLocInstr;
1814         unsigned BaseRegNum, IndexRegNum;
1815         int64_t DispValue;
1816         const MCExpr *DispExpr;
1817         MCInst *PCRelBaseInstr;
1818         IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
1819             Instr, BB.begin(), II, PtrSize, MemLocInstr, BaseRegNum,
1820             IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
1821         if (Type != IndirectBranchType::UNKNOWN || MemLocInstr != nullptr)
1822           continue;
1823 
1824         if (!opts::StrictMode)
1825           return false;
1826 
1827         if (BC.MIB->isTailCall(Instr)) {
1828           BC.MIB->convertTailCallToJmp(Instr);
1829         } else {
1830           LastIndirectJump = &Instr;
1831           LastIndirectJumpBB = &BB;
1832           LastJT = BC.MIB->getJumpTable(Instr);
1833           LastJTIndexReg = BC.MIB->getJumpTableIndexReg(Instr);
1834           BC.MIB->unsetJumpTable(Instr);
1835 
1836           JumpTable *JT = BC.getJumpTableContainingAddress(LastJT);
1837           if (JT->Type == JumpTable::JTT_NORMAL) {
1838             // Invalidating the jump table may also invalidate other jump table
1839             // boundaries. Until we have/need a support for this, mark the
1840             // function as non-simple.
1841             LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejected jump table reference"
1842                               << JT->getName() << " in " << *this << '\n');
1843             return false;
1844           }
1845         }
1846 
1847         addUnknownControlFlow(BB);
1848         continue;
1849       }
1850 
1851       // If this block contains an epilogue code and has an indirect branch,
1852       // then most likely it's a tail call. Otherwise, we cannot tell for sure
1853       // what it is and conservatively reject the function's CFG.
1854       bool IsEpilogue = llvm::any_of(BB, [&](const MCInst &Instr) {
1855         return BC.MIB->isLeave(Instr) || BC.MIB->isPop(Instr);
1856       });
1857       if (IsEpilogue) {
1858         BC.MIB->convertJmpToTailCall(Instr);
1859         BB.removeAllSuccessors();
1860         continue;
1861       }
1862 
1863       if (opts::Verbosity >= 2) {
1864         outs() << "BOLT-INFO: rejected potential indirect tail call in "
1865                << "function " << *this << " in basic block " << BB.getName()
1866                << ".\n";
1867         LLVM_DEBUG(BC.printInstructions(dbgs(), BB.begin(), BB.end(),
1868                                         BB.getOffset(), this, true));
1869       }
1870 
1871       if (!opts::StrictMode)
1872         return false;
1873 
1874       addUnknownControlFlow(BB);
1875     }
1876   }
1877 
1878   if (HasInternalLabelReference)
1879     return false;
1880 
1881   // If there's only one jump table, and one indirect jump, and no other
1882   // references, then we should be able to derive the jump table even if we
1883   // fail to match the pattern.
1884   if (HasUnknownControlFlow && NumIndirectJumps == 1 &&
1885       JumpTables.size() == 1 && LastIndirectJump &&
1886       !BC.getJumpTableContainingAddress(LastJT)->IsSplit) {
1887     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: unsetting unknown control flow in "
1888                       << *this << '\n');
1889     BC.MIB->setJumpTable(*LastIndirectJump, LastJT, LastJTIndexReg, AllocId);
1890     HasUnknownControlFlow = false;
1891 
1892     LastIndirectJumpBB->updateJumpTableSuccessors();
1893   }
1894 
1895   if (HasFixedIndirectBranch)
1896     return false;
1897 
1898   // Validate that all data references to function offsets are claimed by
1899   // recognized jump tables. Register externally referenced blocks as entry
1900   // points.
1901   if (!opts::StrictMode && hasInternalReference()) {
1902     if (!validateExternallyReferencedOffsets())
1903       return false;
1904   }
1905 
1906   if (HasUnknownControlFlow && !BC.HasRelocations)
1907     return false;
1908 
1909   return true;
1910 }
1911 
1912 void BinaryFunction::recomputeLandingPads() {
1913   updateBBIndices(0);
1914 
1915   for (BinaryBasicBlock *BB : BasicBlocks) {
1916     BB->LandingPads.clear();
1917     BB->Throwers.clear();
1918   }
1919 
1920   for (BinaryBasicBlock *BB : BasicBlocks) {
1921     std::unordered_set<const BinaryBasicBlock *> BBLandingPads;
1922     for (MCInst &Instr : *BB) {
1923       if (!BC.MIB->isInvoke(Instr))
1924         continue;
1925 
1926       const std::optional<MCPlus::MCLandingPad> EHInfo =
1927           BC.MIB->getEHInfo(Instr);
1928       if (!EHInfo || !EHInfo->first)
1929         continue;
1930 
1931       BinaryBasicBlock *LPBlock = getBasicBlockForLabel(EHInfo->first);
1932       if (!BBLandingPads.count(LPBlock)) {
1933         BBLandingPads.insert(LPBlock);
1934         BB->LandingPads.emplace_back(LPBlock);
1935         LPBlock->Throwers.emplace_back(BB);
1936       }
1937     }
1938   }
1939 }
1940 
1941 bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
1942   auto &MIB = BC.MIB;
1943 
1944   if (!isSimple()) {
1945     assert(!BC.HasRelocations &&
1946            "cannot process file with non-simple function in relocs mode");
1947     return false;
1948   }
1949 
1950   if (CurrentState != State::Disassembled)
1951     return false;
1952 
1953   assert(BasicBlocks.empty() && "basic block list should be empty");
1954   assert((Labels.find(getFirstInstructionOffset()) != Labels.end()) &&
1955          "first instruction should always have a label");
1956 
1957   // Create basic blocks in the original layout order:
1958   //
1959   //  * Every instruction with associated label marks
1960   //    the beginning of a basic block.
1961   //  * Conditional instruction marks the end of a basic block,
1962   //    except when the following instruction is an
1963   //    unconditional branch, and the unconditional branch is not
1964   //    a destination of another branch. In the latter case, the
1965   //    basic block will consist of a single unconditional branch
1966   //    (missed "double-jump" optimization).
1967   //
1968   // Created basic blocks are sorted in layout order since they are
1969   // created in the same order as instructions, and instructions are
1970   // sorted by offsets.
1971   BinaryBasicBlock *InsertBB = nullptr;
1972   BinaryBasicBlock *PrevBB = nullptr;
1973   bool IsLastInstrNop = false;
1974   // Offset of the last non-nop instruction.
1975   uint64_t LastInstrOffset = 0;
1976 
1977   auto addCFIPlaceholders = [this](uint64_t CFIOffset,
1978                                    BinaryBasicBlock *InsertBB) {
1979     for (auto FI = OffsetToCFI.lower_bound(CFIOffset),
1980               FE = OffsetToCFI.upper_bound(CFIOffset);
1981          FI != FE; ++FI) {
1982       addCFIPseudo(InsertBB, InsertBB->end(), FI->second);
1983     }
1984   };
1985 
1986   // For profiling purposes we need to save the offset of the last instruction
1987   // in the basic block.
1988   // NOTE: nops always have an Offset annotation. Annotate the last non-nop as
1989   //       older profiles ignored nops.
1990   auto updateOffset = [&](uint64_t Offset) {
1991     assert(PrevBB && PrevBB != InsertBB && "invalid previous block");
1992     MCInst *LastNonNop = nullptr;
1993     for (BinaryBasicBlock::reverse_iterator RII = PrevBB->getLastNonPseudo(),
1994                                             E = PrevBB->rend();
1995          RII != E; ++RII) {
1996       if (!BC.MIB->isPseudo(*RII) && !BC.MIB->isNoop(*RII)) {
1997         LastNonNop = &*RII;
1998         break;
1999       }
2000     }
2001     if (LastNonNop && !MIB->getOffset(*LastNonNop))
2002       MIB->setOffset(*LastNonNop, static_cast<uint32_t>(Offset), AllocatorId);
2003   };
2004 
2005   for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) {
2006     const uint32_t Offset = I->first;
2007     MCInst &Instr = I->second;
2008 
2009     auto LI = Labels.find(Offset);
2010     if (LI != Labels.end()) {
2011       // Always create new BB at branch destination.
2012       PrevBB = InsertBB ? InsertBB : PrevBB;
2013       InsertBB = addBasicBlockAt(LI->first, LI->second);
2014       if (opts::PreserveBlocksAlignment && IsLastInstrNop)
2015         InsertBB->setDerivedAlignment();
2016 
2017       if (PrevBB)
2018         updateOffset(LastInstrOffset);
2019     }
2020 
2021     // Mark all nops with Offset for profile tracking purposes.
2022     if (MIB->isNoop(Instr) && !MIB->getOffset(Instr)) {
2023       // If "Offset" annotation is not present, set it and mark the nop for
2024       // deletion.
2025       MIB->setOffset(Instr, static_cast<uint32_t>(Offset), AllocatorId);
2026       // Annotate ordinary nops, so we can safely delete them if required.
2027       MIB->addAnnotation(Instr, "NOP", static_cast<uint32_t>(1), AllocatorId);
2028     }
2029 
2030     if (!InsertBB) {
2031       // It must be a fallthrough or unreachable code. Create a new block unless
2032       // we see an unconditional branch following a conditional one. The latter
2033       // should not be a conditional tail call.
2034       assert(PrevBB && "no previous basic block for a fall through");
2035       MCInst *PrevInstr = PrevBB->getLastNonPseudoInstr();
2036       assert(PrevInstr && "no previous instruction for a fall through");
2037       if (MIB->isUnconditionalBranch(Instr) &&
2038           !MIB->isIndirectBranch(*PrevInstr) &&
2039           !MIB->isUnconditionalBranch(*PrevInstr) &&
2040           !MIB->getConditionalTailCall(*PrevInstr) &&
2041           !MIB->isReturn(*PrevInstr)) {
2042         // Temporarily restore inserter basic block.
2043         InsertBB = PrevBB;
2044       } else {
2045         MCSymbol *Label;
2046         {
2047           auto L = BC.scopeLock();
2048           Label = BC.Ctx->createNamedTempSymbol("FT");
2049         }
2050         InsertBB = addBasicBlockAt(Offset, Label);
2051         if (opts::PreserveBlocksAlignment && IsLastInstrNop)
2052           InsertBB->setDerivedAlignment();
2053         updateOffset(LastInstrOffset);
2054       }
2055     }
2056     if (Offset == getFirstInstructionOffset()) {
2057       // Add associated CFI pseudos in the first offset
2058       addCFIPlaceholders(Offset, InsertBB);
2059     }
2060 
2061     const bool IsBlockEnd = MIB->isTerminator(Instr);
2062     IsLastInstrNop = MIB->isNoop(Instr);
2063     if (!IsLastInstrNop)
2064       LastInstrOffset = Offset;
2065     InsertBB->addInstruction(std::move(Instr));
2066 
2067     // Add associated CFI instrs. We always add the CFI instruction that is
2068     // located immediately after this instruction, since the next CFI
2069     // instruction reflects the change in state caused by this instruction.
2070     auto NextInstr = std::next(I);
2071     uint64_t CFIOffset;
2072     if (NextInstr != E)
2073       CFIOffset = NextInstr->first;
2074     else
2075       CFIOffset = getSize();
2076 
2077     // Note: this potentially invalidates instruction pointers/iterators.
2078     addCFIPlaceholders(CFIOffset, InsertBB);
2079 
2080     if (IsBlockEnd) {
2081       PrevBB = InsertBB;
2082       InsertBB = nullptr;
2083     }
2084   }
2085 
2086   if (BasicBlocks.empty()) {
2087     setSimple(false);
2088     return false;
2089   }
2090 
2091   // Intermediate dump.
2092   LLVM_DEBUG(print(dbgs(), "after creating basic blocks"));
2093 
2094   // TODO: handle properly calls to no-return functions,
2095   // e.g. exit(3), etc. Otherwise we'll see a false fall-through
2096   // blocks.
2097 
2098   for (std::pair<uint32_t, uint32_t> &Branch : TakenBranches) {
2099     LLVM_DEBUG(dbgs() << "registering branch [0x"
2100                       << Twine::utohexstr(Branch.first) << "] -> [0x"
2101                       << Twine::utohexstr(Branch.second) << "]\n");
2102     BinaryBasicBlock *FromBB = getBasicBlockContainingOffset(Branch.first);
2103     BinaryBasicBlock *ToBB = getBasicBlockAtOffset(Branch.second);
2104     if (!FromBB || !ToBB) {
2105       if (!FromBB)
2106         errs() << "BOLT-ERROR: cannot find BB containing the branch.\n";
2107       if (!ToBB)
2108         errs() << "BOLT-ERROR: cannot find BB containing branch destination.\n";
2109       BC.exitWithBugReport("disassembly failed - inconsistent branch found.",
2110                            *this);
2111     }
2112 
2113     FromBB->addSuccessor(ToBB);
2114   }
2115 
2116   // Add fall-through branches.
2117   PrevBB = nullptr;
2118   bool IsPrevFT = false; // Is previous block a fall-through.
2119   for (BinaryBasicBlock *BB : BasicBlocks) {
2120     if (IsPrevFT)
2121       PrevBB->addSuccessor(BB);
2122 
2123     if (BB->empty()) {
2124       IsPrevFT = true;
2125       PrevBB = BB;
2126       continue;
2127     }
2128 
2129     MCInst *LastInstr = BB->getLastNonPseudoInstr();
2130     assert(LastInstr &&
2131            "should have non-pseudo instruction in non-empty block");
2132 
2133     if (BB->succ_size() == 0) {
2134       // Since there's no existing successors, we know the last instruction is
2135       // not a conditional branch. Thus if it's a terminator, it shouldn't be a
2136       // fall-through.
2137       //
2138       // Conditional tail call is a special case since we don't add a taken
2139       // branch successor for it.
2140       IsPrevFT = !MIB->isTerminator(*LastInstr) ||
2141                  MIB->getConditionalTailCall(*LastInstr);
2142     } else if (BB->succ_size() == 1) {
2143       IsPrevFT = MIB->isConditionalBranch(*LastInstr);
2144     } else {
2145       IsPrevFT = false;
2146     }
2147 
2148     PrevBB = BB;
2149   }
2150 
2151   // Assign landing pads and throwers info.
2152   recomputeLandingPads();
2153 
2154   // Assign CFI information to each BB entry.
2155   annotateCFIState();
2156 
2157   // Annotate invoke instructions with GNU_args_size data.
2158   propagateGnuArgsSizeInfo(AllocatorId);
2159 
2160   // Set the basic block layout to the original order and set end offsets.
2161   PrevBB = nullptr;
2162   for (BinaryBasicBlock *BB : BasicBlocks) {
2163     Layout.addBasicBlock(BB);
2164     if (PrevBB)
2165       PrevBB->setEndOffset(BB->getOffset());
2166     PrevBB = BB;
2167   }
2168   PrevBB->setEndOffset(getSize());
2169 
2170   Layout.updateLayoutIndices();
2171 
2172   normalizeCFIState();
2173 
2174   // Clean-up memory taken by intermediate structures.
2175   //
2176   // NB: don't clear Labels list as we may need them if we mark the function
2177   //     as non-simple later in the process of discovering extra entry points.
2178   clearList(Instructions);
2179   clearList(OffsetToCFI);
2180   clearList(TakenBranches);
2181 
2182   // Update the state.
2183   CurrentState = State::CFG;
2184 
2185   // Make any necessary adjustments for indirect branches.
2186   if (!postProcessIndirectBranches(AllocatorId)) {
2187     if (opts::Verbosity) {
2188       errs() << "BOLT-WARNING: failed to post-process indirect branches for "
2189              << *this << '\n';
2190     }
2191     // In relocation mode we want to keep processing the function but avoid
2192     // optimizing it.
2193     setSimple(false);
2194   }
2195 
2196   clearList(ExternallyReferencedOffsets);
2197   clearList(UnknownIndirectBranchOffsets);
2198 
2199   return true;
2200 }
2201 
2202 void BinaryFunction::postProcessCFG() {
2203   if (isSimple() && !BasicBlocks.empty()) {
2204     // Convert conditional tail call branches to conditional branches that jump
2205     // to a tail call.
2206     removeConditionalTailCalls();
2207 
2208     postProcessProfile();
2209 
2210     // Eliminate inconsistencies between branch instructions and CFG.
2211     postProcessBranches();
2212   }
2213 
2214   calculateMacroOpFusionStats();
2215 
2216   // The final cleanup of intermediate structures.
2217   clearList(IgnoredBranches);
2218 
2219   // Remove "Offset" annotations, unless we need an address-translation table
2220   // later. This has no cost, since annotations are allocated by a bumpptr
2221   // allocator and won't be released anyway until late in the pipeline.
2222   if (!requiresAddressTranslation() && !opts::Instrument) {
2223     for (BinaryBasicBlock &BB : blocks())
2224       for (MCInst &Inst : BB)
2225         BC.MIB->clearOffset(Inst);
2226   }
2227 
2228   assert((!isSimple() || validateCFG()) &&
2229          "invalid CFG detected after post-processing");
2230 }
2231 
2232 void BinaryFunction::calculateMacroOpFusionStats() {
2233   if (!getBinaryContext().isX86())
2234     return;
2235   for (const BinaryBasicBlock &BB : blocks()) {
2236     auto II = BB.getMacroOpFusionPair();
2237     if (II == BB.end())
2238       continue;
2239 
2240     // Check offset of the second instruction.
2241     // FIXME: arch-specific.
2242     const uint32_t Offset = BC.MIB->getOffsetWithDefault(*std::next(II), 0);
2243     if (!Offset || (getAddress() + Offset) % 64)
2244       continue;
2245 
2246     LLVM_DEBUG(dbgs() << "\nmissed macro-op fusion at address 0x"
2247                       << Twine::utohexstr(getAddress() + Offset)
2248                       << " in function " << *this << "; executed "
2249                       << BB.getKnownExecutionCount() << " times.\n");
2250     ++BC.Stats.MissedMacroFusionPairs;
2251     BC.Stats.MissedMacroFusionExecCount += BB.getKnownExecutionCount();
2252   }
2253 }
2254 
2255 void BinaryFunction::removeTagsFromProfile() {
2256   for (BinaryBasicBlock *BB : BasicBlocks) {
2257     if (BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
2258       BB->ExecutionCount = 0;
2259     for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) {
2260       if (BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
2261           BI.MispredictedCount != BinaryBasicBlock::COUNT_NO_PROFILE)
2262         continue;
2263       BI.Count = 0;
2264       BI.MispredictedCount = 0;
2265     }
2266   }
2267 }
2268 
2269 void BinaryFunction::removeConditionalTailCalls() {
2270   // Blocks to be appended at the end.
2271   std::vector<std::unique_ptr<BinaryBasicBlock>> NewBlocks;
2272 
2273   for (auto BBI = begin(); BBI != end(); ++BBI) {
2274     BinaryBasicBlock &BB = *BBI;
2275     MCInst *CTCInstr = BB.getLastNonPseudoInstr();
2276     if (!CTCInstr)
2277       continue;
2278 
2279     std::optional<uint64_t> TargetAddressOrNone =
2280         BC.MIB->getConditionalTailCall(*CTCInstr);
2281     if (!TargetAddressOrNone)
2282       continue;
2283 
2284     // Gather all necessary information about CTC instruction before
2285     // annotations are destroyed.
2286     const int32_t CFIStateBeforeCTC = BB.getCFIStateAtInstr(CTCInstr);
2287     uint64_t CTCTakenCount = BinaryBasicBlock::COUNT_NO_PROFILE;
2288     uint64_t CTCMispredCount = BinaryBasicBlock::COUNT_NO_PROFILE;
2289     if (hasValidProfile()) {
2290       CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
2291           *CTCInstr, "CTCTakenCount");
2292       CTCMispredCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
2293           *CTCInstr, "CTCMispredCount");
2294     }
2295 
2296     // Assert that the tail call does not throw.
2297     assert(!BC.MIB->getEHInfo(*CTCInstr) &&
2298            "found tail call with associated landing pad");
2299 
2300     // Create a basic block with an unconditional tail call instruction using
2301     // the same destination.
2302     const MCSymbol *CTCTargetLabel = BC.MIB->getTargetSymbol(*CTCInstr);
2303     assert(CTCTargetLabel && "symbol expected for conditional tail call");
2304     MCInst TailCallInstr;
2305     BC.MIB->createTailCall(TailCallInstr, CTCTargetLabel, BC.Ctx.get());
2306     // Link new BBs to the original input offset of the BB where the CTC
2307     // is, so we can map samples recorded in new BBs back to the original BB
2308     // seem in the input binary (if using BAT)
2309     std::unique_ptr<BinaryBasicBlock> TailCallBB =
2310         createBasicBlock(BC.Ctx->createNamedTempSymbol("TC"));
2311     TailCallBB->setOffset(BB.getInputOffset());
2312     TailCallBB->addInstruction(TailCallInstr);
2313     TailCallBB->setCFIState(CFIStateBeforeCTC);
2314 
2315     // Add CFG edge with profile info from BB to TailCallBB.
2316     BB.addSuccessor(TailCallBB.get(), CTCTakenCount, CTCMispredCount);
2317 
2318     // Add execution count for the block.
2319     TailCallBB->setExecutionCount(CTCTakenCount);
2320 
2321     BC.MIB->convertTailCallToJmp(*CTCInstr);
2322 
2323     BC.MIB->replaceBranchTarget(*CTCInstr, TailCallBB->getLabel(),
2324                                 BC.Ctx.get());
2325 
2326     // Add basic block to the list that will be added to the end.
2327     NewBlocks.emplace_back(std::move(TailCallBB));
2328 
2329     // Swap edges as the TailCallBB corresponds to the taken branch.
2330     BB.swapConditionalSuccessors();
2331 
2332     // This branch is no longer a conditional tail call.
2333     BC.MIB->unsetConditionalTailCall(*CTCInstr);
2334 
2335     // Move offset from CTCInstr to TailCallInstr.
2336     if (std::optional<uint32_t> Offset = BC.MIB->getOffset(*CTCInstr)) {
2337       BC.MIB->setOffset(TailCallInstr, *Offset);
2338       BC.MIB->clearOffset(*CTCInstr);
2339     }
2340   }
2341 
2342   insertBasicBlocks(std::prev(end()), std::move(NewBlocks),
2343                     /* UpdateLayout */ true,
2344                     /* UpdateCFIState */ false);
2345 }
2346 
2347 uint64_t BinaryFunction::getFunctionScore() const {
2348   if (FunctionScore != -1)
2349     return FunctionScore;
2350 
2351   if (!isSimple() || !hasValidProfile()) {
2352     FunctionScore = 0;
2353     return FunctionScore;
2354   }
2355 
2356   uint64_t TotalScore = 0ULL;
2357   for (const BinaryBasicBlock &BB : blocks()) {
2358     uint64_t BBExecCount = BB.getExecutionCount();
2359     if (BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
2360       continue;
2361     TotalScore += BBExecCount * BB.getNumNonPseudos();
2362   }
2363   FunctionScore = TotalScore;
2364   return FunctionScore;
2365 }
2366 
2367 void BinaryFunction::annotateCFIState() {
2368   assert(CurrentState == State::Disassembled && "unexpected function state");
2369   assert(!BasicBlocks.empty() && "basic block list should not be empty");
2370 
2371   // This is an index of the last processed CFI in FDE CFI program.
2372   uint32_t State = 0;
2373 
2374   // This is an index of RememberState CFI reflecting effective state right
2375   // after execution of RestoreState CFI.
2376   //
2377   // It differs from State iff the CFI at (State-1)
2378   // was RestoreState (modulo GNU_args_size CFIs, which are ignored).
2379   //
2380   // This allows us to generate shorter replay sequences when producing new
2381   // CFI programs.
2382   uint32_t EffectiveState = 0;
2383 
2384   // For tracking RememberState/RestoreState sequences.
2385   std::stack<uint32_t> StateStack;
2386 
2387   for (BinaryBasicBlock *BB : BasicBlocks) {
2388     BB->setCFIState(EffectiveState);
2389 
2390     for (const MCInst &Instr : *BB) {
2391       const MCCFIInstruction *CFI = getCFIFor(Instr);
2392       if (!CFI)
2393         continue;
2394 
2395       ++State;
2396 
2397       switch (CFI->getOperation()) {
2398       case MCCFIInstruction::OpRememberState:
2399         StateStack.push(EffectiveState);
2400         EffectiveState = State;
2401         break;
2402       case MCCFIInstruction::OpRestoreState:
2403         assert(!StateStack.empty() && "corrupt CFI stack");
2404         EffectiveState = StateStack.top();
2405         StateStack.pop();
2406         break;
2407       case MCCFIInstruction::OpGnuArgsSize:
2408         // OpGnuArgsSize CFIs do not affect the CFI state.
2409         break;
2410       default:
2411         // Any other CFI updates the state.
2412         EffectiveState = State;
2413         break;
2414       }
2415     }
2416   }
2417 
2418   assert(StateStack.empty() && "corrupt CFI stack");
2419 }
2420 
2421 namespace {
2422 
2423 /// Our full interpretation of a DWARF CFI machine state at a given point
2424 struct CFISnapshot {
2425   /// CFA register number and offset defining the canonical frame at this
2426   /// point, or the number of a rule (CFI state) that computes it with a
2427   /// DWARF expression. This number will be negative if it refers to a CFI
2428   /// located in the CIE instead of the FDE.
2429   uint32_t CFAReg;
2430   int32_t CFAOffset;
2431   int32_t CFARule;
2432   /// Mapping of rules (CFI states) that define the location of each
2433   /// register. If absent, no rule defining the location of such register
2434   /// was ever read. This number will be negative if it refers to a CFI
2435   /// located in the CIE instead of the FDE.
2436   DenseMap<int32_t, int32_t> RegRule;
2437 
2438   /// References to CIE, FDE and expanded instructions after a restore state
2439   const BinaryFunction::CFIInstrMapType &CIE;
2440   const BinaryFunction::CFIInstrMapType &FDE;
2441   const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents;
2442 
2443   /// Current FDE CFI number representing the state where the snapshot is at
2444   int32_t CurState;
2445 
2446   /// Used when we don't have information about which state/rule to apply
2447   /// to recover the location of either the CFA or a specific register
2448   constexpr static int32_t UNKNOWN = std::numeric_limits<int32_t>::min();
2449 
2450 private:
2451   /// Update our snapshot by executing a single CFI
2452   void update(const MCCFIInstruction &Instr, int32_t RuleNumber) {
2453     switch (Instr.getOperation()) {
2454     case MCCFIInstruction::OpSameValue:
2455     case MCCFIInstruction::OpRelOffset:
2456     case MCCFIInstruction::OpOffset:
2457     case MCCFIInstruction::OpRestore:
2458     case MCCFIInstruction::OpUndefined:
2459     case MCCFIInstruction::OpRegister:
2460       RegRule[Instr.getRegister()] = RuleNumber;
2461       break;
2462     case MCCFIInstruction::OpDefCfaRegister:
2463       CFAReg = Instr.getRegister();
2464       CFARule = UNKNOWN;
2465       break;
2466     case MCCFIInstruction::OpDefCfaOffset:
2467       CFAOffset = Instr.getOffset();
2468       CFARule = UNKNOWN;
2469       break;
2470     case MCCFIInstruction::OpDefCfa:
2471       CFAReg = Instr.getRegister();
2472       CFAOffset = Instr.getOffset();
2473       CFARule = UNKNOWN;
2474       break;
2475     case MCCFIInstruction::OpEscape: {
2476       std::optional<uint8_t> Reg =
2477           readDWARFExpressionTargetReg(Instr.getValues());
2478       // Handle DW_CFA_def_cfa_expression
2479       if (!Reg) {
2480         CFARule = RuleNumber;
2481         break;
2482       }
2483       RegRule[*Reg] = RuleNumber;
2484       break;
2485     }
2486     case MCCFIInstruction::OpAdjustCfaOffset:
2487     case MCCFIInstruction::OpWindowSave:
2488     case MCCFIInstruction::OpNegateRAState:
2489     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2490       llvm_unreachable("unsupported CFI opcode");
2491       break;
2492     case MCCFIInstruction::OpRememberState:
2493     case MCCFIInstruction::OpRestoreState:
2494     case MCCFIInstruction::OpGnuArgsSize:
2495       // do not affect CFI state
2496       break;
2497     }
2498   }
2499 
2500 public:
2501   /// Advance state reading FDE CFI instructions up to State number
2502   void advanceTo(int32_t State) {
2503     for (int32_t I = CurState, E = State; I != E; ++I) {
2504       const MCCFIInstruction &Instr = FDE[I];
2505       if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) {
2506         update(Instr, I);
2507         continue;
2508       }
2509       // If restore state instruction, fetch the equivalent CFIs that have
2510       // the same effect of this restore. This is used to ensure remember-
2511       // restore pairs are completely removed.
2512       auto Iter = FrameRestoreEquivalents.find(I);
2513       if (Iter == FrameRestoreEquivalents.end())
2514         continue;
2515       for (int32_t RuleNumber : Iter->second)
2516         update(FDE[RuleNumber], RuleNumber);
2517     }
2518 
2519     assert(((CFAReg != (uint32_t)UNKNOWN && CFAOffset != UNKNOWN) ||
2520             CFARule != UNKNOWN) &&
2521            "CIE did not define default CFA?");
2522 
2523     CurState = State;
2524   }
2525 
2526   /// Interpret all CIE and FDE instructions up until CFI State number and
2527   /// populate this snapshot
2528   CFISnapshot(
2529       const BinaryFunction::CFIInstrMapType &CIE,
2530       const BinaryFunction::CFIInstrMapType &FDE,
2531       const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents,
2532       int32_t State)
2533       : CIE(CIE), FDE(FDE), FrameRestoreEquivalents(FrameRestoreEquivalents) {
2534     CFAReg = UNKNOWN;
2535     CFAOffset = UNKNOWN;
2536     CFARule = UNKNOWN;
2537     CurState = 0;
2538 
2539     for (int32_t I = 0, E = CIE.size(); I != E; ++I) {
2540       const MCCFIInstruction &Instr = CIE[I];
2541       update(Instr, -I);
2542     }
2543 
2544     advanceTo(State);
2545   }
2546 };
2547 
2548 /// A CFI snapshot with the capability of checking if incremental additions to
2549 /// it are redundant. This is used to ensure we do not emit two CFI instructions
2550 /// back-to-back that are doing the same state change, or to avoid emitting a
2551 /// CFI at all when the state at that point would not be modified after that CFI
2552 struct CFISnapshotDiff : public CFISnapshot {
2553   bool RestoredCFAReg{false};
2554   bool RestoredCFAOffset{false};
2555   DenseMap<int32_t, bool> RestoredRegs;
2556 
2557   CFISnapshotDiff(const CFISnapshot &S) : CFISnapshot(S) {}
2558 
2559   CFISnapshotDiff(
2560       const BinaryFunction::CFIInstrMapType &CIE,
2561       const BinaryFunction::CFIInstrMapType &FDE,
2562       const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents,
2563       int32_t State)
2564       : CFISnapshot(CIE, FDE, FrameRestoreEquivalents, State) {}
2565 
2566   /// Return true if applying Instr to this state is redundant and can be
2567   /// dismissed.
2568   bool isRedundant(const MCCFIInstruction &Instr) {
2569     switch (Instr.getOperation()) {
2570     case MCCFIInstruction::OpSameValue:
2571     case MCCFIInstruction::OpRelOffset:
2572     case MCCFIInstruction::OpOffset:
2573     case MCCFIInstruction::OpRestore:
2574     case MCCFIInstruction::OpUndefined:
2575     case MCCFIInstruction::OpRegister:
2576     case MCCFIInstruction::OpEscape: {
2577       uint32_t Reg;
2578       if (Instr.getOperation() != MCCFIInstruction::OpEscape) {
2579         Reg = Instr.getRegister();
2580       } else {
2581         std::optional<uint8_t> R =
2582             readDWARFExpressionTargetReg(Instr.getValues());
2583         // Handle DW_CFA_def_cfa_expression
2584         if (!R) {
2585           if (RestoredCFAReg && RestoredCFAOffset)
2586             return true;
2587           RestoredCFAReg = true;
2588           RestoredCFAOffset = true;
2589           return false;
2590         }
2591         Reg = *R;
2592       }
2593       if (RestoredRegs[Reg])
2594         return true;
2595       RestoredRegs[Reg] = true;
2596       const int32_t CurRegRule = RegRule.contains(Reg) ? RegRule[Reg] : UNKNOWN;
2597       if (CurRegRule == UNKNOWN) {
2598         if (Instr.getOperation() == MCCFIInstruction::OpRestore ||
2599             Instr.getOperation() == MCCFIInstruction::OpSameValue)
2600           return true;
2601         return false;
2602       }
2603       const MCCFIInstruction &LastDef =
2604           CurRegRule < 0 ? CIE[-CurRegRule] : FDE[CurRegRule];
2605       return LastDef == Instr;
2606     }
2607     case MCCFIInstruction::OpDefCfaRegister:
2608       if (RestoredCFAReg)
2609         return true;
2610       RestoredCFAReg = true;
2611       return CFAReg == Instr.getRegister();
2612     case MCCFIInstruction::OpDefCfaOffset:
2613       if (RestoredCFAOffset)
2614         return true;
2615       RestoredCFAOffset = true;
2616       return CFAOffset == Instr.getOffset();
2617     case MCCFIInstruction::OpDefCfa:
2618       if (RestoredCFAReg && RestoredCFAOffset)
2619         return true;
2620       RestoredCFAReg = true;
2621       RestoredCFAOffset = true;
2622       return CFAReg == Instr.getRegister() && CFAOffset == Instr.getOffset();
2623     case MCCFIInstruction::OpAdjustCfaOffset:
2624     case MCCFIInstruction::OpWindowSave:
2625     case MCCFIInstruction::OpNegateRAState:
2626     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2627       llvm_unreachable("unsupported CFI opcode");
2628       return false;
2629     case MCCFIInstruction::OpRememberState:
2630     case MCCFIInstruction::OpRestoreState:
2631     case MCCFIInstruction::OpGnuArgsSize:
2632       // do not affect CFI state
2633       return true;
2634     }
2635     return false;
2636   }
2637 };
2638 
2639 } // end anonymous namespace
2640 
2641 bool BinaryFunction::replayCFIInstrs(int32_t FromState, int32_t ToState,
2642                                      BinaryBasicBlock *InBB,
2643                                      BinaryBasicBlock::iterator InsertIt) {
2644   if (FromState == ToState)
2645     return true;
2646   assert(FromState < ToState && "can only replay CFIs forward");
2647 
2648   CFISnapshotDiff CFIDiff(CIEFrameInstructions, FrameInstructions,
2649                           FrameRestoreEquivalents, FromState);
2650 
2651   std::vector<uint32_t> NewCFIs;
2652   for (int32_t CurState = FromState; CurState < ToState; ++CurState) {
2653     MCCFIInstruction *Instr = &FrameInstructions[CurState];
2654     if (Instr->getOperation() == MCCFIInstruction::OpRestoreState) {
2655       auto Iter = FrameRestoreEquivalents.find(CurState);
2656       assert(Iter != FrameRestoreEquivalents.end());
2657       NewCFIs.insert(NewCFIs.end(), Iter->second.begin(), Iter->second.end());
2658       // RestoreState / Remember will be filtered out later by CFISnapshotDiff,
2659       // so we might as well fall-through here.
2660     }
2661     NewCFIs.push_back(CurState);
2662   }
2663 
2664   // Replay instructions while avoiding duplicates
2665   for (int32_t State : llvm::reverse(NewCFIs)) {
2666     if (CFIDiff.isRedundant(FrameInstructions[State]))
2667       continue;
2668     InsertIt = addCFIPseudo(InBB, InsertIt, State);
2669   }
2670 
2671   return true;
2672 }
2673 
2674 SmallVector<int32_t, 4>
2675 BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState,
2676                                BinaryBasicBlock *InBB,
2677                                BinaryBasicBlock::iterator &InsertIt) {
2678   SmallVector<int32_t, 4> NewStates;
2679 
2680   CFISnapshot ToCFITable(CIEFrameInstructions, FrameInstructions,
2681                          FrameRestoreEquivalents, ToState);
2682   CFISnapshotDiff FromCFITable(ToCFITable);
2683   FromCFITable.advanceTo(FromState);
2684 
2685   auto undoStateDefCfa = [&]() {
2686     if (ToCFITable.CFARule == CFISnapshot::UNKNOWN) {
2687       FrameInstructions.emplace_back(MCCFIInstruction::cfiDefCfa(
2688           nullptr, ToCFITable.CFAReg, ToCFITable.CFAOffset));
2689       if (FromCFITable.isRedundant(FrameInstructions.back())) {
2690         FrameInstructions.pop_back();
2691         return;
2692       }
2693       NewStates.push_back(FrameInstructions.size() - 1);
2694       InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size() - 1);
2695       ++InsertIt;
2696     } else if (ToCFITable.CFARule < 0) {
2697       if (FromCFITable.isRedundant(CIEFrameInstructions[-ToCFITable.CFARule]))
2698         return;
2699       NewStates.push_back(FrameInstructions.size());
2700       InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size());
2701       ++InsertIt;
2702       FrameInstructions.emplace_back(CIEFrameInstructions[-ToCFITable.CFARule]);
2703     } else if (!FromCFITable.isRedundant(
2704                    FrameInstructions[ToCFITable.CFARule])) {
2705       NewStates.push_back(ToCFITable.CFARule);
2706       InsertIt = addCFIPseudo(InBB, InsertIt, ToCFITable.CFARule);
2707       ++InsertIt;
2708     }
2709   };
2710 
2711   auto undoState = [&](const MCCFIInstruction &Instr) {
2712     switch (Instr.getOperation()) {
2713     case MCCFIInstruction::OpRememberState:
2714     case MCCFIInstruction::OpRestoreState:
2715       break;
2716     case MCCFIInstruction::OpSameValue:
2717     case MCCFIInstruction::OpRelOffset:
2718     case MCCFIInstruction::OpOffset:
2719     case MCCFIInstruction::OpRestore:
2720     case MCCFIInstruction::OpUndefined:
2721     case MCCFIInstruction::OpEscape:
2722     case MCCFIInstruction::OpRegister: {
2723       uint32_t Reg;
2724       if (Instr.getOperation() != MCCFIInstruction::OpEscape) {
2725         Reg = Instr.getRegister();
2726       } else {
2727         std::optional<uint8_t> R =
2728             readDWARFExpressionTargetReg(Instr.getValues());
2729         // Handle DW_CFA_def_cfa_expression
2730         if (!R) {
2731           undoStateDefCfa();
2732           return;
2733         }
2734         Reg = *R;
2735       }
2736 
2737       if (!ToCFITable.RegRule.contains(Reg)) {
2738         FrameInstructions.emplace_back(
2739             MCCFIInstruction::createRestore(nullptr, Reg));
2740         if (FromCFITable.isRedundant(FrameInstructions.back())) {
2741           FrameInstructions.pop_back();
2742           break;
2743         }
2744         NewStates.push_back(FrameInstructions.size() - 1);
2745         InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size() - 1);
2746         ++InsertIt;
2747         break;
2748       }
2749       const int32_t Rule = ToCFITable.RegRule[Reg];
2750       if (Rule < 0) {
2751         if (FromCFITable.isRedundant(CIEFrameInstructions[-Rule]))
2752           break;
2753         NewStates.push_back(FrameInstructions.size());
2754         InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size());
2755         ++InsertIt;
2756         FrameInstructions.emplace_back(CIEFrameInstructions[-Rule]);
2757         break;
2758       }
2759       if (FromCFITable.isRedundant(FrameInstructions[Rule]))
2760         break;
2761       NewStates.push_back(Rule);
2762       InsertIt = addCFIPseudo(InBB, InsertIt, Rule);
2763       ++InsertIt;
2764       break;
2765     }
2766     case MCCFIInstruction::OpDefCfaRegister:
2767     case MCCFIInstruction::OpDefCfaOffset:
2768     case MCCFIInstruction::OpDefCfa:
2769       undoStateDefCfa();
2770       break;
2771     case MCCFIInstruction::OpAdjustCfaOffset:
2772     case MCCFIInstruction::OpWindowSave:
2773     case MCCFIInstruction::OpNegateRAState:
2774     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2775       llvm_unreachable("unsupported CFI opcode");
2776       break;
2777     case MCCFIInstruction::OpGnuArgsSize:
2778       // do not affect CFI state
2779       break;
2780     }
2781   };
2782 
2783   // Undo all modifications from ToState to FromState
2784   for (int32_t I = ToState, E = FromState; I != E; ++I) {
2785     const MCCFIInstruction &Instr = FrameInstructions[I];
2786     if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) {
2787       undoState(Instr);
2788       continue;
2789     }
2790     auto Iter = FrameRestoreEquivalents.find(I);
2791     if (Iter == FrameRestoreEquivalents.end())
2792       continue;
2793     for (int32_t State : Iter->second)
2794       undoState(FrameInstructions[State]);
2795   }
2796 
2797   return NewStates;
2798 }
2799 
2800 void BinaryFunction::normalizeCFIState() {
2801   // Reordering blocks with remember-restore state instructions can be specially
2802   // tricky. When rewriting the CFI, we omit remember-restore state instructions
2803   // entirely. For restore state, we build a map expanding each restore to the
2804   // equivalent unwindCFIState sequence required at that point to achieve the
2805   // same effect of the restore. All remember state are then just ignored.
2806   std::stack<int32_t> Stack;
2807   for (BinaryBasicBlock *CurBB : Layout.blocks()) {
2808     for (auto II = CurBB->begin(); II != CurBB->end(); ++II) {
2809       if (const MCCFIInstruction *CFI = getCFIFor(*II)) {
2810         if (CFI->getOperation() == MCCFIInstruction::OpRememberState) {
2811           Stack.push(II->getOperand(0).getImm());
2812           continue;
2813         }
2814         if (CFI->getOperation() == MCCFIInstruction::OpRestoreState) {
2815           const int32_t RememberState = Stack.top();
2816           const int32_t CurState = II->getOperand(0).getImm();
2817           FrameRestoreEquivalents[CurState] =
2818               unwindCFIState(CurState, RememberState, CurBB, II);
2819           Stack.pop();
2820         }
2821       }
2822     }
2823   }
2824 }
2825 
2826 bool BinaryFunction::finalizeCFIState() {
2827   LLVM_DEBUG(
2828       dbgs() << "Trying to fix CFI states for each BB after reordering.\n");
2829   LLVM_DEBUG(dbgs() << "This is the list of CFI states for each BB of " << *this
2830                     << ": ");
2831 
2832   const char *Sep = "";
2833   (void)Sep;
2834   for (FunctionFragment &FF : Layout.fragments()) {
2835     // Hot-cold border: at start of each region (with a different FDE) we need
2836     // to reset the CFI state.
2837     int32_t State = 0;
2838 
2839     for (BinaryBasicBlock *BB : FF) {
2840       const int32_t CFIStateAtExit = BB->getCFIStateAtExit();
2841 
2842       // We need to recover the correct state if it doesn't match expected
2843       // state at BB entry point.
2844       if (BB->getCFIState() < State) {
2845         // In this case, State is currently higher than what this BB expect it
2846         // to be. To solve this, we need to insert CFI instructions to undo
2847         // the effect of all CFI from BB's state to current State.
2848         auto InsertIt = BB->begin();
2849         unwindCFIState(State, BB->getCFIState(), BB, InsertIt);
2850       } else if (BB->getCFIState() > State) {
2851         // If BB's CFI state is greater than State, it means we are behind in
2852         // the state. Just emit all instructions to reach this state at the
2853         // beginning of this BB. If this sequence of instructions involve
2854         // remember state or restore state, bail out.
2855         if (!replayCFIInstrs(State, BB->getCFIState(), BB, BB->begin()))
2856           return false;
2857       }
2858 
2859       State = CFIStateAtExit;
2860       LLVM_DEBUG(dbgs() << Sep << State; Sep = ", ");
2861     }
2862   }
2863   LLVM_DEBUG(dbgs() << "\n");
2864 
2865   for (BinaryBasicBlock &BB : blocks()) {
2866     for (auto II = BB.begin(); II != BB.end();) {
2867       const MCCFIInstruction *CFI = getCFIFor(*II);
2868       if (CFI && (CFI->getOperation() == MCCFIInstruction::OpRememberState ||
2869                   CFI->getOperation() == MCCFIInstruction::OpRestoreState)) {
2870         II = BB.eraseInstruction(II);
2871       } else {
2872         ++II;
2873       }
2874     }
2875   }
2876 
2877   return true;
2878 }
2879 
2880 bool BinaryFunction::requiresAddressTranslation() const {
2881   return opts::EnableBAT || hasSDTMarker() || hasPseudoProbe();
2882 }
2883 
2884 bool BinaryFunction::requiresAddressMap() const {
2885   if (isInjected())
2886     return false;
2887 
2888   return opts::UpdateDebugSections || isMultiEntry() ||
2889          requiresAddressTranslation();
2890 }
2891 
2892 uint64_t BinaryFunction::getInstructionCount() const {
2893   uint64_t Count = 0;
2894   for (const BinaryBasicBlock &BB : blocks())
2895     Count += BB.getNumNonPseudos();
2896   return Count;
2897 }
2898 
2899 void BinaryFunction::clearDisasmState() {
2900   clearList(Instructions);
2901   clearList(IgnoredBranches);
2902   clearList(TakenBranches);
2903 
2904   if (BC.HasRelocations) {
2905     for (std::pair<const uint32_t, MCSymbol *> &LI : Labels)
2906       BC.UndefinedSymbols.insert(LI.second);
2907     for (MCSymbol *const EndLabel : FunctionEndLabels)
2908       if (EndLabel)
2909         BC.UndefinedSymbols.insert(EndLabel);
2910   }
2911 }
2912 
2913 void BinaryFunction::setTrapOnEntry() {
2914   clearDisasmState();
2915 
2916   forEachEntryPoint([&](uint64_t Offset, const MCSymbol *Label) -> bool {
2917     MCInst TrapInstr;
2918     BC.MIB->createTrap(TrapInstr);
2919     addInstruction(Offset, std::move(TrapInstr));
2920     return true;
2921   });
2922 
2923   TrapsOnEntry = true;
2924 }
2925 
2926 void BinaryFunction::setIgnored() {
2927   if (opts::processAllFunctions()) {
2928     // We can accept ignored functions before they've been disassembled.
2929     // In that case, they would still get disassembled and emited, but not
2930     // optimized.
2931     assert(CurrentState == State::Empty &&
2932            "cannot ignore non-empty functions in current mode");
2933     IsIgnored = true;
2934     return;
2935   }
2936 
2937   clearDisasmState();
2938 
2939   // Clear CFG state too.
2940   if (hasCFG()) {
2941     releaseCFG();
2942 
2943     for (BinaryBasicBlock *BB : BasicBlocks)
2944       delete BB;
2945     clearList(BasicBlocks);
2946 
2947     for (BinaryBasicBlock *BB : DeletedBasicBlocks)
2948       delete BB;
2949     clearList(DeletedBasicBlocks);
2950 
2951     Layout.clear();
2952   }
2953 
2954   CurrentState = State::Empty;
2955 
2956   IsIgnored = true;
2957   IsSimple = false;
2958   LLVM_DEBUG(dbgs() << "Ignoring " << getPrintName() << '\n');
2959 }
2960 
2961 void BinaryFunction::duplicateConstantIslands() {
2962   assert(Islands && "function expected to have constant islands");
2963 
2964   for (BinaryBasicBlock *BB : getLayout().blocks()) {
2965     if (!BB->isCold())
2966       continue;
2967 
2968     for (MCInst &Inst : *BB) {
2969       int OpNum = 0;
2970       for (MCOperand &Operand : Inst) {
2971         if (!Operand.isExpr()) {
2972           ++OpNum;
2973           continue;
2974         }
2975         const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Inst, OpNum);
2976         // Check if this is an island symbol
2977         if (!Islands->Symbols.count(Symbol) &&
2978             !Islands->ProxySymbols.count(Symbol))
2979           continue;
2980 
2981         // Create cold symbol, if missing
2982         auto ISym = Islands->ColdSymbols.find(Symbol);
2983         MCSymbol *ColdSymbol;
2984         if (ISym != Islands->ColdSymbols.end()) {
2985           ColdSymbol = ISym->second;
2986         } else {
2987           ColdSymbol = BC.Ctx->getOrCreateSymbol(Symbol->getName() + ".cold");
2988           Islands->ColdSymbols[Symbol] = ColdSymbol;
2989           // Check if this is a proxy island symbol and update owner proxy map
2990           if (Islands->ProxySymbols.count(Symbol)) {
2991             BinaryFunction *Owner = Islands->ProxySymbols[Symbol];
2992             auto IProxiedSym = Owner->Islands->Proxies[this].find(Symbol);
2993             Owner->Islands->ColdProxies[this][IProxiedSym->second] = ColdSymbol;
2994           }
2995         }
2996 
2997         // Update instruction reference
2998         Operand = MCOperand::createExpr(BC.MIB->getTargetExprFor(
2999             Inst,
3000             MCSymbolRefExpr::create(ColdSymbol, MCSymbolRefExpr::VK_None,
3001                                     *BC.Ctx),
3002             *BC.Ctx, 0));
3003         ++OpNum;
3004       }
3005     }
3006   }
3007 }
3008 
3009 #ifndef MAX_PATH
3010 #define MAX_PATH 255
3011 #endif
3012 
3013 static std::string constructFilename(std::string Filename,
3014                                      std::string Annotation,
3015                                      std::string Suffix) {
3016   std::replace(Filename.begin(), Filename.end(), '/', '-');
3017   if (!Annotation.empty())
3018     Annotation.insert(0, "-");
3019   if (Filename.size() + Annotation.size() + Suffix.size() > MAX_PATH) {
3020     assert(Suffix.size() + Annotation.size() <= MAX_PATH);
3021     if (opts::Verbosity >= 1) {
3022       errs() << "BOLT-WARNING: Filename \"" << Filename << Annotation << Suffix
3023              << "\" exceeds the " << MAX_PATH << " size limit, truncating.\n";
3024     }
3025     Filename.resize(MAX_PATH - (Suffix.size() + Annotation.size()));
3026   }
3027   Filename += Annotation;
3028   Filename += Suffix;
3029   return Filename;
3030 }
3031 
3032 static std::string formatEscapes(const std::string &Str) {
3033   std::string Result;
3034   for (unsigned I = 0; I < Str.size(); ++I) {
3035     char C = Str[I];
3036     switch (C) {
3037     case '\n':
3038       Result += "&#13;";
3039       break;
3040     case '"':
3041       break;
3042     default:
3043       Result += C;
3044       break;
3045     }
3046   }
3047   return Result;
3048 }
3049 
3050 void BinaryFunction::dumpGraph(raw_ostream &OS) const {
3051   OS << "digraph \"" << getPrintName() << "\" {\n"
3052      << "node [fontname=courier, shape=box, style=filled, colorscheme=brbg9]\n";
3053   uint64_t Offset = Address;
3054   for (BinaryBasicBlock *BB : BasicBlocks) {
3055     auto LayoutPos = find(Layout.blocks(), BB);
3056     unsigned LayoutIndex = LayoutPos - Layout.block_begin();
3057     const char *ColdStr = BB->isCold() ? " (cold)" : "";
3058     std::vector<std::string> Attrs;
3059     // Bold box for entry points
3060     if (isEntryPoint(*BB))
3061       Attrs.push_back("penwidth=2");
3062     if (BLI && BLI->getLoopFor(BB)) {
3063       // Distinguish innermost loops
3064       const BinaryLoop *Loop = BLI->getLoopFor(BB);
3065       if (Loop->isInnermost())
3066         Attrs.push_back("fillcolor=6");
3067       else // some outer loop
3068         Attrs.push_back("fillcolor=4");
3069     } else { // non-loopy code
3070       Attrs.push_back("fillcolor=5");
3071     }
3072     ListSeparator LS;
3073     OS << "\"" << BB->getName() << "\" [";
3074     for (StringRef Attr : Attrs)
3075       OS << LS << Attr;
3076     OS << "]\n";
3077     OS << format("\"%s\" [label=\"%s%s\\n(C:%lu,O:%lu,I:%u,L:%u,CFI:%u)\\n",
3078                  BB->getName().data(), BB->getName().data(), ColdStr,
3079                  BB->getKnownExecutionCount(), BB->getOffset(), getIndex(BB),
3080                  LayoutIndex, BB->getCFIState());
3081 
3082     if (opts::DotToolTipCode) {
3083       std::string Str;
3084       raw_string_ostream CS(Str);
3085       Offset = BC.printInstructions(CS, BB->begin(), BB->end(), Offset, this,
3086                                     /* PrintMCInst = */ false,
3087                                     /* PrintMemData = */ false,
3088                                     /* PrintRelocations = */ false,
3089                                     /* Endl = */ R"(\\l)");
3090       OS << formatEscapes(CS.str()) << '\n';
3091     }
3092     OS << "\"]\n";
3093 
3094     // analyzeBranch is just used to get the names of the branch
3095     // opcodes.
3096     const MCSymbol *TBB = nullptr;
3097     const MCSymbol *FBB = nullptr;
3098     MCInst *CondBranch = nullptr;
3099     MCInst *UncondBranch = nullptr;
3100     const bool Success = BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch);
3101 
3102     const MCInst *LastInstr = BB->getLastNonPseudoInstr();
3103     const bool IsJumpTable = LastInstr && BC.MIB->getJumpTable(*LastInstr);
3104 
3105     auto BI = BB->branch_info_begin();
3106     for (BinaryBasicBlock *Succ : BB->successors()) {
3107       std::string Branch;
3108       if (Success) {
3109         if (Succ == BB->getConditionalSuccessor(true)) {
3110           Branch = CondBranch ? std::string(BC.InstPrinter->getOpcodeName(
3111                                     CondBranch->getOpcode()))
3112                               : "TB";
3113         } else if (Succ == BB->getConditionalSuccessor(false)) {
3114           Branch = UncondBranch ? std::string(BC.InstPrinter->getOpcodeName(
3115                                       UncondBranch->getOpcode()))
3116                                 : "FB";
3117         } else {
3118           Branch = "FT";
3119         }
3120       }
3121       if (IsJumpTable)
3122         Branch = "JT";
3123       OS << format("\"%s\" -> \"%s\" [label=\"%s", BB->getName().data(),
3124                    Succ->getName().data(), Branch.c_str());
3125 
3126       if (BB->getExecutionCount() != COUNT_NO_PROFILE &&
3127           BI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
3128         OS << "\\n(C:" << BI->Count << ",M:" << BI->MispredictedCount << ")";
3129       } else if (ExecutionCount != COUNT_NO_PROFILE &&
3130                  BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
3131         OS << "\\n(IC:" << BI->Count << ")";
3132       }
3133       OS << "\"]\n";
3134 
3135       ++BI;
3136     }
3137     for (BinaryBasicBlock *LP : BB->landing_pads()) {
3138       OS << format("\"%s\" -> \"%s\" [constraint=false style=dashed]\n",
3139                    BB->getName().data(), LP->getName().data());
3140     }
3141   }
3142   OS << "}\n";
3143 }
3144 
3145 void BinaryFunction::viewGraph() const {
3146   SmallString<MAX_PATH> Filename;
3147   if (std::error_code EC =
3148           sys::fs::createTemporaryFile("bolt-cfg", "dot", Filename)) {
3149     errs() << "BOLT-ERROR: " << EC.message() << ", unable to create "
3150            << " bolt-cfg-XXXXX.dot temporary file.\n";
3151     return;
3152   }
3153   dumpGraphToFile(std::string(Filename));
3154   if (DisplayGraph(Filename))
3155     errs() << "BOLT-ERROR: Can't display " << Filename << " with graphviz.\n";
3156   if (std::error_code EC = sys::fs::remove(Filename)) {
3157     errs() << "BOLT-WARNING: " << EC.message() << ", failed to remove "
3158            << Filename << "\n";
3159   }
3160 }
3161 
3162 void BinaryFunction::dumpGraphForPass(std::string Annotation) const {
3163   if (!opts::shouldPrint(*this))
3164     return;
3165 
3166   std::string Filename = constructFilename(getPrintName(), Annotation, ".dot");
3167   if (opts::Verbosity >= 1)
3168     outs() << "BOLT-INFO: dumping CFG to " << Filename << "\n";
3169   dumpGraphToFile(Filename);
3170 }
3171 
3172 void BinaryFunction::dumpGraphToFile(std::string Filename) const {
3173   std::error_code EC;
3174   raw_fd_ostream of(Filename, EC, sys::fs::OF_None);
3175   if (EC) {
3176     if (opts::Verbosity >= 1) {
3177       errs() << "BOLT-WARNING: " << EC.message() << ", unable to open "
3178              << Filename << " for output.\n";
3179     }
3180     return;
3181   }
3182   dumpGraph(of);
3183 }
3184 
3185 bool BinaryFunction::validateCFG() const {
3186   // Skip the validation of CFG after it is finalized
3187   if (CurrentState == State::CFG_Finalized)
3188     return true;
3189 
3190   bool Valid = true;
3191   for (BinaryBasicBlock *BB : BasicBlocks)
3192     Valid &= BB->validateSuccessorInvariants();
3193 
3194   if (!Valid)
3195     return Valid;
3196 
3197   // Make sure all blocks in CFG are valid.
3198   auto validateBlock = [this](const BinaryBasicBlock *BB, StringRef Desc) {
3199     if (!BB->isValid()) {
3200       errs() << "BOLT-ERROR: deleted " << Desc << " " << BB->getName()
3201              << " detected in:\n";
3202       this->dump();
3203       return false;
3204     }
3205     return true;
3206   };
3207   for (const BinaryBasicBlock *BB : BasicBlocks) {
3208     if (!validateBlock(BB, "block"))
3209       return false;
3210     for (const BinaryBasicBlock *PredBB : BB->predecessors())
3211       if (!validateBlock(PredBB, "predecessor"))
3212         return false;
3213     for (const BinaryBasicBlock *SuccBB : BB->successors())
3214       if (!validateBlock(SuccBB, "successor"))
3215         return false;
3216     for (const BinaryBasicBlock *LP : BB->landing_pads())
3217       if (!validateBlock(LP, "landing pad"))
3218         return false;
3219     for (const BinaryBasicBlock *Thrower : BB->throwers())
3220       if (!validateBlock(Thrower, "thrower"))
3221         return false;
3222   }
3223 
3224   for (const BinaryBasicBlock *BB : BasicBlocks) {
3225     std::unordered_set<const BinaryBasicBlock *> BBLandingPads;
3226     for (const BinaryBasicBlock *LP : BB->landing_pads()) {
3227       if (BBLandingPads.count(LP)) {
3228         errs() << "BOLT-ERROR: duplicate landing pad detected in"
3229                << BB->getName() << " in function " << *this << '\n';
3230         return false;
3231       }
3232       BBLandingPads.insert(LP);
3233     }
3234 
3235     std::unordered_set<const BinaryBasicBlock *> BBThrowers;
3236     for (const BinaryBasicBlock *Thrower : BB->throwers()) {
3237       if (BBThrowers.count(Thrower)) {
3238         errs() << "BOLT-ERROR: duplicate thrower detected in" << BB->getName()
3239                << " in function " << *this << '\n';
3240         return false;
3241       }
3242       BBThrowers.insert(Thrower);
3243     }
3244 
3245     for (const BinaryBasicBlock *LPBlock : BB->landing_pads()) {
3246       if (!llvm::is_contained(LPBlock->throwers(), BB)) {
3247         errs() << "BOLT-ERROR: inconsistent landing pad detected in " << *this
3248                << ": " << BB->getName() << " is in LandingPads but not in "
3249                << LPBlock->getName() << " Throwers\n";
3250         return false;
3251       }
3252     }
3253     for (const BinaryBasicBlock *Thrower : BB->throwers()) {
3254       if (!llvm::is_contained(Thrower->landing_pads(), BB)) {
3255         errs() << "BOLT-ERROR: inconsistent thrower detected in " << *this
3256                << ": " << BB->getName() << " is in Throwers list but not in "
3257                << Thrower->getName() << " LandingPads\n";
3258         return false;
3259       }
3260     }
3261   }
3262 
3263   return Valid;
3264 }
3265 
3266 void BinaryFunction::fixBranches() {
3267   auto &MIB = BC.MIB;
3268   MCContext *Ctx = BC.Ctx.get();
3269 
3270   for (BinaryBasicBlock *BB : BasicBlocks) {
3271     const MCSymbol *TBB = nullptr;
3272     const MCSymbol *FBB = nullptr;
3273     MCInst *CondBranch = nullptr;
3274     MCInst *UncondBranch = nullptr;
3275     if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch))
3276       continue;
3277 
3278     // We will create unconditional branch with correct destination if needed.
3279     if (UncondBranch)
3280       BB->eraseInstruction(BB->findInstruction(UncondBranch));
3281 
3282     // Basic block that follows the current one in the final layout.
3283     const BinaryBasicBlock *NextBB =
3284         Layout.getBasicBlockAfter(BB, /*IgnoreSplits=*/false);
3285 
3286     if (BB->succ_size() == 1) {
3287       // __builtin_unreachable() could create a conditional branch that
3288       // falls-through into the next function - hence the block will have only
3289       // one valid successor. Since behaviour is undefined - we replace
3290       // the conditional branch with an unconditional if required.
3291       if (CondBranch)
3292         BB->eraseInstruction(BB->findInstruction(CondBranch));
3293       if (BB->getSuccessor() == NextBB)
3294         continue;
3295       BB->addBranchInstruction(BB->getSuccessor());
3296     } else if (BB->succ_size() == 2) {
3297       assert(CondBranch && "conditional branch expected");
3298       const BinaryBasicBlock *TSuccessor = BB->getConditionalSuccessor(true);
3299       const BinaryBasicBlock *FSuccessor = BB->getConditionalSuccessor(false);
3300       // Check whether we support reversing this branch direction
3301       const bool IsSupported = !MIB->isUnsupportedBranch(*CondBranch);
3302       if (NextBB && NextBB == TSuccessor && IsSupported) {
3303         std::swap(TSuccessor, FSuccessor);
3304         {
3305           auto L = BC.scopeLock();
3306           MIB->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(), Ctx);
3307         }
3308         BB->swapConditionalSuccessors();
3309       } else {
3310         auto L = BC.scopeLock();
3311         MIB->replaceBranchTarget(*CondBranch, TSuccessor->getLabel(), Ctx);
3312       }
3313       if (TSuccessor == FSuccessor)
3314         BB->removeDuplicateConditionalSuccessor(CondBranch);
3315       if (!NextBB ||
3316           ((NextBB != TSuccessor || !IsSupported) && NextBB != FSuccessor)) {
3317         // If one of the branches is guaranteed to be "long" while the other
3318         // could be "short", then prioritize short for "taken". This will
3319         // generate a sequence 1 byte shorter on x86.
3320         if (IsSupported && BC.isX86() &&
3321             TSuccessor->getFragmentNum() != FSuccessor->getFragmentNum() &&
3322             BB->getFragmentNum() != TSuccessor->getFragmentNum()) {
3323           std::swap(TSuccessor, FSuccessor);
3324           {
3325             auto L = BC.scopeLock();
3326             MIB->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(),
3327                                         Ctx);
3328           }
3329           BB->swapConditionalSuccessors();
3330         }
3331         BB->addBranchInstruction(FSuccessor);
3332       }
3333     }
3334     // Cases where the number of successors is 0 (block ends with a
3335     // terminator) or more than 2 (switch table) don't require branch
3336     // instruction adjustments.
3337   }
3338   assert((!isSimple() || validateCFG()) &&
3339          "Invalid CFG detected after fixing branches");
3340 }
3341 
3342 void BinaryFunction::propagateGnuArgsSizeInfo(
3343     MCPlusBuilder::AllocatorIdTy AllocId) {
3344   assert(CurrentState == State::Disassembled && "unexpected function state");
3345 
3346   if (!hasEHRanges() || !usesGnuArgsSize())
3347     return;
3348 
3349   // The current value of DW_CFA_GNU_args_size affects all following
3350   // invoke instructions until the next CFI overrides it.
3351   // It is important to iterate basic blocks in the original order when
3352   // assigning the value.
3353   uint64_t CurrentGnuArgsSize = 0;
3354   for (BinaryBasicBlock *BB : BasicBlocks) {
3355     for (auto II = BB->begin(); II != BB->end();) {
3356       MCInst &Instr = *II;
3357       if (BC.MIB->isCFI(Instr)) {
3358         const MCCFIInstruction *CFI = getCFIFor(Instr);
3359         if (CFI->getOperation() == MCCFIInstruction::OpGnuArgsSize) {
3360           CurrentGnuArgsSize = CFI->getOffset();
3361           // Delete DW_CFA_GNU_args_size instructions and only regenerate
3362           // during the final code emission. The information is embedded
3363           // inside call instructions.
3364           II = BB->erasePseudoInstruction(II);
3365           continue;
3366         }
3367       } else if (BC.MIB->isInvoke(Instr)) {
3368         // Add the value of GNU_args_size as an extra operand to invokes.
3369         BC.MIB->addGnuArgsSize(Instr, CurrentGnuArgsSize, AllocId);
3370       }
3371       ++II;
3372     }
3373   }
3374 }
3375 
3376 void BinaryFunction::postProcessBranches() {
3377   if (!isSimple())
3378     return;
3379   for (BinaryBasicBlock &BB : blocks()) {
3380     auto LastInstrRI = BB.getLastNonPseudo();
3381     if (BB.succ_size() == 1) {
3382       if (LastInstrRI != BB.rend() &&
3383           BC.MIB->isConditionalBranch(*LastInstrRI)) {
3384         // __builtin_unreachable() could create a conditional branch that
3385         // falls-through into the next function - hence the block will have only
3386         // one valid successor. Such behaviour is undefined and thus we remove
3387         // the conditional branch while leaving a valid successor.
3388         BB.eraseInstruction(std::prev(LastInstrRI.base()));
3389         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: erasing conditional branch in "
3390                           << BB.getName() << " in function " << *this << '\n');
3391       }
3392     } else if (BB.succ_size() == 0) {
3393       // Ignore unreachable basic blocks.
3394       if (BB.pred_size() == 0 || BB.isLandingPad())
3395         continue;
3396 
3397       // If it's the basic block that does not end up with a terminator - we
3398       // insert a return instruction unless it's a call instruction.
3399       if (LastInstrRI == BB.rend()) {
3400         LLVM_DEBUG(
3401             dbgs() << "BOLT-DEBUG: at least one instruction expected in BB "
3402                    << BB.getName() << " in function " << *this << '\n');
3403         continue;
3404       }
3405       if (!BC.MIB->isTerminator(*LastInstrRI) &&
3406           !BC.MIB->isCall(*LastInstrRI)) {
3407         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding return to basic block "
3408                           << BB.getName() << " in function " << *this << '\n');
3409         MCInst ReturnInstr;
3410         BC.MIB->createReturn(ReturnInstr);
3411         BB.addInstruction(ReturnInstr);
3412       }
3413     }
3414   }
3415   assert(validateCFG() && "invalid CFG");
3416 }
3417 
3418 MCSymbol *BinaryFunction::addEntryPointAtOffset(uint64_t Offset) {
3419   assert(Offset && "cannot add primary entry point");
3420   assert(CurrentState == State::Empty || CurrentState == State::Disassembled);
3421 
3422   const uint64_t EntryPointAddress = getAddress() + Offset;
3423   MCSymbol *LocalSymbol = getOrCreateLocalLabel(EntryPointAddress);
3424 
3425   MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(LocalSymbol);
3426   if (EntrySymbol)
3427     return EntrySymbol;
3428 
3429   if (BinaryData *EntryBD = BC.getBinaryDataAtAddress(EntryPointAddress)) {
3430     EntrySymbol = EntryBD->getSymbol();
3431   } else {
3432     EntrySymbol = BC.getOrCreateGlobalSymbol(
3433         EntryPointAddress, Twine("__ENTRY_") + getOneName() + "@");
3434   }
3435   SecondaryEntryPoints[LocalSymbol] = EntrySymbol;
3436 
3437   BC.setSymbolToFunctionMap(EntrySymbol, this);
3438 
3439   return EntrySymbol;
3440 }
3441 
3442 MCSymbol *BinaryFunction::addEntryPoint(const BinaryBasicBlock &BB) {
3443   assert(CurrentState == State::CFG &&
3444          "basic block can be added as an entry only in a function with CFG");
3445 
3446   if (&BB == BasicBlocks.front())
3447     return getSymbol();
3448 
3449   MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(BB);
3450   if (EntrySymbol)
3451     return EntrySymbol;
3452 
3453   EntrySymbol =
3454       BC.Ctx->getOrCreateSymbol("__ENTRY_" + BB.getLabel()->getName());
3455 
3456   SecondaryEntryPoints[BB.getLabel()] = EntrySymbol;
3457 
3458   BC.setSymbolToFunctionMap(EntrySymbol, this);
3459 
3460   return EntrySymbol;
3461 }
3462 
3463 MCSymbol *BinaryFunction::getSymbolForEntryID(uint64_t EntryID) {
3464   if (EntryID == 0)
3465     return getSymbol();
3466 
3467   if (!isMultiEntry())
3468     return nullptr;
3469 
3470   uint64_t NumEntries = 0;
3471   if (hasCFG()) {
3472     for (BinaryBasicBlock *BB : BasicBlocks) {
3473       MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
3474       if (!EntrySymbol)
3475         continue;
3476       if (NumEntries == EntryID)
3477         return EntrySymbol;
3478       ++NumEntries;
3479     }
3480   } else {
3481     for (std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3482       MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3483       if (!EntrySymbol)
3484         continue;
3485       if (NumEntries == EntryID)
3486         return EntrySymbol;
3487       ++NumEntries;
3488     }
3489   }
3490 
3491   return nullptr;
3492 }
3493 
3494 uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
3495   if (!isMultiEntry())
3496     return 0;
3497 
3498   for (const MCSymbol *FunctionSymbol : getSymbols())
3499     if (FunctionSymbol == Symbol)
3500       return 0;
3501 
3502   // Check all secondary entries available as either basic blocks or lables.
3503   uint64_t NumEntries = 0;
3504   for (const BinaryBasicBlock *BB : BasicBlocks) {
3505     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
3506     if (!EntrySymbol)
3507       continue;
3508     if (EntrySymbol == Symbol)
3509       return NumEntries;
3510     ++NumEntries;
3511   }
3512   NumEntries = 0;
3513   for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3514     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3515     if (!EntrySymbol)
3516       continue;
3517     if (EntrySymbol == Symbol)
3518       return NumEntries;
3519     ++NumEntries;
3520   }
3521 
3522   llvm_unreachable("symbol not found");
3523 }
3524 
3525 bool BinaryFunction::forEachEntryPoint(EntryPointCallbackTy Callback) const {
3526   bool Status = Callback(0, getSymbol());
3527   if (!isMultiEntry())
3528     return Status;
3529 
3530   for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3531     if (!Status)
3532       break;
3533 
3534     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3535     if (!EntrySymbol)
3536       continue;
3537 
3538     Status = Callback(KV.first, EntrySymbol);
3539   }
3540 
3541   return Status;
3542 }
3543 
3544 BinaryFunction::BasicBlockListType BinaryFunction::dfs() const {
3545   BasicBlockListType DFS;
3546   unsigned Index = 0;
3547   std::stack<BinaryBasicBlock *> Stack;
3548 
3549   // Push entry points to the stack in reverse order.
3550   //
3551   // NB: we rely on the original order of entries to match.
3552   SmallVector<BinaryBasicBlock *> EntryPoints;
3553   llvm::copy_if(BasicBlocks, std::back_inserter(EntryPoints),
3554           [&](const BinaryBasicBlock *const BB) { return isEntryPoint(*BB); });
3555   // Sort entry points by their offset to make sure we got them in the right
3556   // order.
3557   llvm::stable_sort(EntryPoints, [](const BinaryBasicBlock *const A,
3558                               const BinaryBasicBlock *const B) {
3559     return A->getOffset() < B->getOffset();
3560   });
3561   for (BinaryBasicBlock *const BB : reverse(EntryPoints))
3562     Stack.push(BB);
3563 
3564   for (BinaryBasicBlock &BB : blocks())
3565     BB.setLayoutIndex(BinaryBasicBlock::InvalidIndex);
3566 
3567   while (!Stack.empty()) {
3568     BinaryBasicBlock *BB = Stack.top();
3569     Stack.pop();
3570 
3571     if (BB->getLayoutIndex() != BinaryBasicBlock::InvalidIndex)
3572       continue;
3573 
3574     BB->setLayoutIndex(Index++);
3575     DFS.push_back(BB);
3576 
3577     for (BinaryBasicBlock *SuccBB : BB->landing_pads()) {
3578       Stack.push(SuccBB);
3579     }
3580 
3581     const MCSymbol *TBB = nullptr;
3582     const MCSymbol *FBB = nullptr;
3583     MCInst *CondBranch = nullptr;
3584     MCInst *UncondBranch = nullptr;
3585     if (BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch) && CondBranch &&
3586         BB->succ_size() == 2) {
3587       if (BC.MIB->getCanonicalBranchCondCode(BC.MIB->getCondCode(
3588               *CondBranch)) == BC.MIB->getCondCode(*CondBranch)) {
3589         Stack.push(BB->getConditionalSuccessor(true));
3590         Stack.push(BB->getConditionalSuccessor(false));
3591       } else {
3592         Stack.push(BB->getConditionalSuccessor(false));
3593         Stack.push(BB->getConditionalSuccessor(true));
3594       }
3595     } else {
3596       for (BinaryBasicBlock *SuccBB : BB->successors()) {
3597         Stack.push(SuccBB);
3598       }
3599     }
3600   }
3601 
3602   return DFS;
3603 }
3604 
3605 size_t BinaryFunction::computeHash(bool UseDFS,
3606                                    OperandHashFuncTy OperandHashFunc) const {
3607   if (size() == 0)
3608     return 0;
3609 
3610   assert(hasCFG() && "function is expected to have CFG");
3611 
3612   SmallVector<const BinaryBasicBlock *, 0> Order;
3613   if (UseDFS)
3614     llvm::copy(dfs(), std::back_inserter(Order));
3615   else
3616     llvm::copy(Layout.blocks(), std::back_inserter(Order));
3617 
3618   // The hash is computed by creating a string of all instruction opcodes and
3619   // possibly their operands and then hashing that string with std::hash.
3620   std::string HashString;
3621   for (const BinaryBasicBlock *BB : Order)
3622     HashString.append(hashBlock(BC, *BB, OperandHashFunc));
3623 
3624   return Hash = std::hash<std::string>{}(HashString);
3625 }
3626 
3627 void BinaryFunction::insertBasicBlocks(
3628     BinaryBasicBlock *Start,
3629     std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
3630     const bool UpdateLayout, const bool UpdateCFIState,
3631     const bool RecomputeLandingPads) {
3632   const int64_t StartIndex = Start ? getIndex(Start) : -1LL;
3633   const size_t NumNewBlocks = NewBBs.size();
3634 
3635   BasicBlocks.insert(BasicBlocks.begin() + (StartIndex + 1), NumNewBlocks,
3636                      nullptr);
3637 
3638   int64_t I = StartIndex + 1;
3639   for (std::unique_ptr<BinaryBasicBlock> &BB : NewBBs) {
3640     assert(!BasicBlocks[I]);
3641     BasicBlocks[I++] = BB.release();
3642   }
3643 
3644   if (RecomputeLandingPads)
3645     recomputeLandingPads();
3646   else
3647     updateBBIndices(0);
3648 
3649   if (UpdateLayout)
3650     updateLayout(Start, NumNewBlocks);
3651 
3652   if (UpdateCFIState)
3653     updateCFIState(Start, NumNewBlocks);
3654 }
3655 
3656 BinaryFunction::iterator BinaryFunction::insertBasicBlocks(
3657     BinaryFunction::iterator StartBB,
3658     std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
3659     const bool UpdateLayout, const bool UpdateCFIState,
3660     const bool RecomputeLandingPads) {
3661   const unsigned StartIndex = getIndex(&*StartBB);
3662   const size_t NumNewBlocks = NewBBs.size();
3663 
3664   BasicBlocks.insert(BasicBlocks.begin() + StartIndex + 1, NumNewBlocks,
3665                      nullptr);
3666   auto RetIter = BasicBlocks.begin() + StartIndex + 1;
3667 
3668   unsigned I = StartIndex + 1;
3669   for (std::unique_ptr<BinaryBasicBlock> &BB : NewBBs) {
3670     assert(!BasicBlocks[I]);
3671     BasicBlocks[I++] = BB.release();
3672   }
3673 
3674   if (RecomputeLandingPads)
3675     recomputeLandingPads();
3676   else
3677     updateBBIndices(0);
3678 
3679   if (UpdateLayout)
3680     updateLayout(*std::prev(RetIter), NumNewBlocks);
3681 
3682   if (UpdateCFIState)
3683     updateCFIState(*std::prev(RetIter), NumNewBlocks);
3684 
3685   return RetIter;
3686 }
3687 
3688 void BinaryFunction::updateBBIndices(const unsigned StartIndex) {
3689   for (unsigned I = StartIndex; I < BasicBlocks.size(); ++I)
3690     BasicBlocks[I]->Index = I;
3691 }
3692 
3693 void BinaryFunction::updateCFIState(BinaryBasicBlock *Start,
3694                                     const unsigned NumNewBlocks) {
3695   const int32_t CFIState = Start->getCFIStateAtExit();
3696   const unsigned StartIndex = getIndex(Start) + 1;
3697   for (unsigned I = 0; I < NumNewBlocks; ++I)
3698     BasicBlocks[StartIndex + I]->setCFIState(CFIState);
3699 }
3700 
3701 void BinaryFunction::updateLayout(BinaryBasicBlock *Start,
3702                                   const unsigned NumNewBlocks) {
3703   BasicBlockListType::iterator Begin;
3704   BasicBlockListType::iterator End;
3705 
3706   // If start not provided copy new blocks from the beginning of BasicBlocks
3707   if (!Start) {
3708     Begin = BasicBlocks.begin();
3709     End = BasicBlocks.begin() + NumNewBlocks;
3710   } else {
3711     unsigned StartIndex = getIndex(Start);
3712     Begin = std::next(BasicBlocks.begin(), StartIndex + 1);
3713     End = std::next(BasicBlocks.begin(), StartIndex + NumNewBlocks + 1);
3714   }
3715 
3716   // Insert new blocks in the layout immediately after Start.
3717   Layout.insertBasicBlocks(Start, {Begin, End});
3718   Layout.updateLayoutIndices();
3719 }
3720 
3721 bool BinaryFunction::checkForAmbiguousJumpTables() {
3722   SmallSet<uint64_t, 4> JumpTables;
3723   for (BinaryBasicBlock *&BB : BasicBlocks) {
3724     for (MCInst &Inst : *BB) {
3725       if (!BC.MIB->isIndirectBranch(Inst))
3726         continue;
3727       uint64_t JTAddress = BC.MIB->getJumpTable(Inst);
3728       if (!JTAddress)
3729         continue;
3730       // This address can be inside another jump table, but we only consider
3731       // it ambiguous when the same start address is used, not the same JT
3732       // object.
3733       if (!JumpTables.count(JTAddress)) {
3734         JumpTables.insert(JTAddress);
3735         continue;
3736       }
3737       return true;
3738     }
3739   }
3740   return false;
3741 }
3742 
3743 void BinaryFunction::disambiguateJumpTables(
3744     MCPlusBuilder::AllocatorIdTy AllocId) {
3745   assert((opts::JumpTables != JTS_BASIC && isSimple()) || !BC.HasRelocations);
3746   SmallPtrSet<JumpTable *, 4> JumpTables;
3747   for (BinaryBasicBlock *&BB : BasicBlocks) {
3748     for (MCInst &Inst : *BB) {
3749       if (!BC.MIB->isIndirectBranch(Inst))
3750         continue;
3751       JumpTable *JT = getJumpTable(Inst);
3752       if (!JT)
3753         continue;
3754       auto Iter = JumpTables.find(JT);
3755       if (Iter == JumpTables.end()) {
3756         JumpTables.insert(JT);
3757         continue;
3758       }
3759       // This instruction is an indirect jump using a jump table, but it is
3760       // using the same jump table of another jump. Try all our tricks to
3761       // extract the jump table symbol and make it point to a new, duplicated JT
3762       MCPhysReg BaseReg1;
3763       uint64_t Scale;
3764       const MCSymbol *Target;
3765       // In case we match if our first matcher, first instruction is the one to
3766       // patch
3767       MCInst *JTLoadInst = &Inst;
3768       // Try a standard indirect jump matcher, scale 8
3769       std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher =
3770           BC.MIB->matchIndJmp(BC.MIB->matchReg(BaseReg1),
3771                               BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3772                               /*Offset=*/BC.MIB->matchSymbol(Target));
3773       if (!IndJmpMatcher->match(
3774               *BC.MRI, *BC.MIB,
3775               MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3776           BaseReg1 != BC.MIB->getNoRegister() || Scale != 8) {
3777         MCPhysReg BaseReg2;
3778         uint64_t Offset;
3779         // Standard JT matching failed. Trying now:
3780         //     movq  "jt.2397/1"(,%rax,8), %rax
3781         //     jmpq  *%rax
3782         std::unique_ptr<MCPlusBuilder::MCInstMatcher> LoadMatcherOwner =
3783             BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg1),
3784                               BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3785                               /*Offset=*/BC.MIB->matchSymbol(Target));
3786         MCPlusBuilder::MCInstMatcher *LoadMatcher = LoadMatcherOwner.get();
3787         std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher2 =
3788             BC.MIB->matchIndJmp(std::move(LoadMatcherOwner));
3789         if (!IndJmpMatcher2->match(
3790                 *BC.MRI, *BC.MIB,
3791                 MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3792             BaseReg1 != BC.MIB->getNoRegister() || Scale != 8) {
3793           // JT matching failed. Trying now:
3794           // PIC-style matcher, scale 4
3795           //    addq    %rdx, %rsi
3796           //    addq    %rdx, %rdi
3797           //    leaq    DATAat0x402450(%rip), %r11
3798           //    movslq  (%r11,%rdx,4), %rcx
3799           //    addq    %r11, %rcx
3800           //    jmpq    *%rcx # JUMPTABLE @0x402450
3801           std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICIndJmpMatcher =
3802               BC.MIB->matchIndJmp(BC.MIB->matchAdd(
3803                   BC.MIB->matchReg(BaseReg1),
3804                   BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg2),
3805                                     BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3806                                     BC.MIB->matchImm(Offset))));
3807           std::unique_ptr<MCPlusBuilder::MCInstMatcher> LEAMatcherOwner =
3808               BC.MIB->matchLoadAddr(BC.MIB->matchSymbol(Target));
3809           MCPlusBuilder::MCInstMatcher *LEAMatcher = LEAMatcherOwner.get();
3810           std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICBaseAddrMatcher =
3811               BC.MIB->matchIndJmp(BC.MIB->matchAdd(std::move(LEAMatcherOwner),
3812                                                    BC.MIB->matchAnyOperand()));
3813           if (!PICIndJmpMatcher->match(
3814                   *BC.MRI, *BC.MIB,
3815                   MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3816               Scale != 4 || BaseReg1 != BaseReg2 || Offset != 0 ||
3817               !PICBaseAddrMatcher->match(
3818                   *BC.MRI, *BC.MIB,
3819                   MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1)) {
3820             llvm_unreachable("Failed to extract jump table base");
3821             continue;
3822           }
3823           // Matched PIC, identify the instruction with the reference to the JT
3824           JTLoadInst = LEAMatcher->CurInst;
3825         } else {
3826           // Matched non-PIC
3827           JTLoadInst = LoadMatcher->CurInst;
3828         }
3829       }
3830 
3831       uint64_t NewJumpTableID = 0;
3832       const MCSymbol *NewJTLabel;
3833       std::tie(NewJumpTableID, NewJTLabel) =
3834           BC.duplicateJumpTable(*this, JT, Target);
3835       {
3836         auto L = BC.scopeLock();
3837         BC.MIB->replaceMemOperandDisp(*JTLoadInst, NewJTLabel, BC.Ctx.get());
3838       }
3839       // We use a unique ID with the high bit set as address for this "injected"
3840       // jump table (not originally in the input binary).
3841       BC.MIB->setJumpTable(Inst, NewJumpTableID, 0, AllocId);
3842     }
3843   }
3844 }
3845 
3846 bool BinaryFunction::replaceJumpTableEntryIn(BinaryBasicBlock *BB,
3847                                              BinaryBasicBlock *OldDest,
3848                                              BinaryBasicBlock *NewDest) {
3849   MCInst *Instr = BB->getLastNonPseudoInstr();
3850   if (!Instr || !BC.MIB->isIndirectBranch(*Instr))
3851     return false;
3852   uint64_t JTAddress = BC.MIB->getJumpTable(*Instr);
3853   assert(JTAddress && "Invalid jump table address");
3854   JumpTable *JT = getJumpTableContainingAddress(JTAddress);
3855   assert(JT && "No jump table structure for this indirect branch");
3856   bool Patched = JT->replaceDestination(JTAddress, OldDest->getLabel(),
3857                                         NewDest->getLabel());
3858   (void)Patched;
3859   assert(Patched && "Invalid entry to be replaced in jump table");
3860   return true;
3861 }
3862 
3863 BinaryBasicBlock *BinaryFunction::splitEdge(BinaryBasicBlock *From,
3864                                             BinaryBasicBlock *To) {
3865   // Create intermediate BB
3866   MCSymbol *Tmp;
3867   {
3868     auto L = BC.scopeLock();
3869     Tmp = BC.Ctx->createNamedTempSymbol("SplitEdge");
3870   }
3871   // Link new BBs to the original input offset of the From BB, so we can map
3872   // samples recorded in new BBs back to the original BB seem in the input
3873   // binary (if using BAT)
3874   std::unique_ptr<BinaryBasicBlock> NewBB = createBasicBlock(Tmp);
3875   NewBB->setOffset(From->getInputOffset());
3876   BinaryBasicBlock *NewBBPtr = NewBB.get();
3877 
3878   // Update "From" BB
3879   auto I = From->succ_begin();
3880   auto BI = From->branch_info_begin();
3881   for (; I != From->succ_end(); ++I) {
3882     if (*I == To)
3883       break;
3884     ++BI;
3885   }
3886   assert(I != From->succ_end() && "Invalid CFG edge in splitEdge!");
3887   uint64_t OrigCount = BI->Count;
3888   uint64_t OrigMispreds = BI->MispredictedCount;
3889   replaceJumpTableEntryIn(From, To, NewBBPtr);
3890   From->replaceSuccessor(To, NewBBPtr, OrigCount, OrigMispreds);
3891 
3892   NewBB->addSuccessor(To, OrigCount, OrigMispreds);
3893   NewBB->setExecutionCount(OrigCount);
3894   NewBB->setIsCold(From->isCold());
3895 
3896   // Update CFI and BB layout with new intermediate BB
3897   std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs;
3898   NewBBs.emplace_back(std::move(NewBB));
3899   insertBasicBlocks(From, std::move(NewBBs), true, true,
3900                     /*RecomputeLandingPads=*/false);
3901   return NewBBPtr;
3902 }
3903 
3904 void BinaryFunction::deleteConservativeEdges() {
3905   // Our goal is to aggressively remove edges from the CFG that we believe are
3906   // wrong. This is used for instrumentation, where it is safe to remove
3907   // fallthrough edges because we won't reorder blocks.
3908   for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) {
3909     BinaryBasicBlock *BB = *I;
3910     if (BB->succ_size() != 1 || BB->size() == 0)
3911       continue;
3912 
3913     auto NextBB = std::next(I);
3914     MCInst *Last = BB->getLastNonPseudoInstr();
3915     // Fallthrough is a landing pad? Delete this edge (as long as we don't
3916     // have a direct jump to it)
3917     if ((*BB->succ_begin())->isLandingPad() && NextBB != E &&
3918         *BB->succ_begin() == *NextBB && Last && !BC.MIB->isBranch(*Last)) {
3919       BB->removeAllSuccessors();
3920       continue;
3921     }
3922 
3923     // Look for suspicious calls at the end of BB where gcc may optimize it and
3924     // remove the jump to the epilogue when it knows the call won't return.
3925     if (!Last || !BC.MIB->isCall(*Last))
3926       continue;
3927 
3928     const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(*Last);
3929     if (!CalleeSymbol)
3930       continue;
3931 
3932     StringRef CalleeName = CalleeSymbol->getName();
3933     if (CalleeName != "__cxa_throw@PLT" && CalleeName != "_Unwind_Resume@PLT" &&
3934         CalleeName != "__cxa_rethrow@PLT" && CalleeName != "exit@PLT" &&
3935         CalleeName != "abort@PLT")
3936       continue;
3937 
3938     BB->removeAllSuccessors();
3939   }
3940 }
3941 
3942 bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
3943                                           uint64_t SymbolSize) const {
3944   // If this symbol is in a different section from the one where the
3945   // function symbol is, don't consider it as valid.
3946   if (!getOriginSection()->containsAddress(
3947           cantFail(Symbol.getAddress(), "cannot get symbol address")))
3948     return false;
3949 
3950   // Some symbols are tolerated inside function bodies, others are not.
3951   // The real function boundaries may not be known at this point.
3952   if (BC.isMarker(Symbol))
3953     return true;
3954 
3955   // It's okay to have a zero-sized symbol in the middle of non-zero-sized
3956   // function.
3957   if (SymbolSize == 0 && containsAddress(cantFail(Symbol.getAddress())))
3958     return true;
3959 
3960   if (cantFail(Symbol.getType()) != SymbolRef::ST_Unknown)
3961     return false;
3962 
3963   if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)
3964     return false;
3965 
3966   return true;
3967 }
3968 
3969 void BinaryFunction::adjustExecutionCount(uint64_t Count) {
3970   if (getKnownExecutionCount() == 0 || Count == 0)
3971     return;
3972 
3973   if (ExecutionCount < Count)
3974     Count = ExecutionCount;
3975 
3976   double AdjustmentRatio = ((double)ExecutionCount - Count) / ExecutionCount;
3977   if (AdjustmentRatio < 0.0)
3978     AdjustmentRatio = 0.0;
3979 
3980   for (BinaryBasicBlock &BB : blocks())
3981     BB.adjustExecutionCount(AdjustmentRatio);
3982 
3983   ExecutionCount -= Count;
3984 }
3985 
3986 BinaryFunction::~BinaryFunction() {
3987   for (BinaryBasicBlock *BB : BasicBlocks)
3988     delete BB;
3989   for (BinaryBasicBlock *BB : DeletedBasicBlocks)
3990     delete BB;
3991 }
3992 
3993 void BinaryFunction::calculateLoopInfo() {
3994   // Discover loops.
3995   BinaryDominatorTree DomTree;
3996   DomTree.recalculate(*this);
3997   BLI.reset(new BinaryLoopInfo());
3998   BLI->analyze(DomTree);
3999 
4000   // Traverse discovered loops and add depth and profile information.
4001   std::stack<BinaryLoop *> St;
4002   for (auto I = BLI->begin(), E = BLI->end(); I != E; ++I) {
4003     St.push(*I);
4004     ++BLI->OuterLoops;
4005   }
4006 
4007   while (!St.empty()) {
4008     BinaryLoop *L = St.top();
4009     St.pop();
4010     ++BLI->TotalLoops;
4011     BLI->MaximumDepth = std::max(L->getLoopDepth(), BLI->MaximumDepth);
4012 
4013     // Add nested loops in the stack.
4014     for (BinaryLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
4015       St.push(*I);
4016 
4017     // Skip if no valid profile is found.
4018     if (!hasValidProfile()) {
4019       L->EntryCount = COUNT_NO_PROFILE;
4020       L->ExitCount = COUNT_NO_PROFILE;
4021       L->TotalBackEdgeCount = COUNT_NO_PROFILE;
4022       continue;
4023     }
4024 
4025     // Compute back edge count.
4026     SmallVector<BinaryBasicBlock *, 1> Latches;
4027     L->getLoopLatches(Latches);
4028 
4029     for (BinaryBasicBlock *Latch : Latches) {
4030       auto BI = Latch->branch_info_begin();
4031       for (BinaryBasicBlock *Succ : Latch->successors()) {
4032         if (Succ == L->getHeader()) {
4033           assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
4034                  "profile data not found");
4035           L->TotalBackEdgeCount += BI->Count;
4036         }
4037         ++BI;
4038       }
4039     }
4040 
4041     // Compute entry count.
4042     L->EntryCount = L->getHeader()->getExecutionCount() - L->TotalBackEdgeCount;
4043 
4044     // Compute exit count.
4045     SmallVector<BinaryLoop::Edge, 1> ExitEdges;
4046     L->getExitEdges(ExitEdges);
4047     for (BinaryLoop::Edge &Exit : ExitEdges) {
4048       const BinaryBasicBlock *Exiting = Exit.first;
4049       const BinaryBasicBlock *ExitTarget = Exit.second;
4050       auto BI = Exiting->branch_info_begin();
4051       for (BinaryBasicBlock *Succ : Exiting->successors()) {
4052         if (Succ == ExitTarget) {
4053           assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
4054                  "profile data not found");
4055           L->ExitCount += BI->Count;
4056         }
4057         ++BI;
4058       }
4059     }
4060   }
4061 }
4062 
4063 void BinaryFunction::updateOutputValues(const BOLTLinker &Linker) {
4064   if (!isEmitted()) {
4065     assert(!isInjected() && "injected function should be emitted");
4066     setOutputAddress(getAddress());
4067     setOutputSize(getSize());
4068     return;
4069   }
4070 
4071   const auto SymbolInfo = Linker.lookupSymbolInfo(getSymbol()->getName());
4072   assert(SymbolInfo && "Cannot find function entry symbol");
4073   setOutputAddress(SymbolInfo->Address);
4074   setOutputSize(SymbolInfo->Size);
4075 
4076   if (BC.HasRelocations || isInjected()) {
4077     if (hasConstantIsland()) {
4078       const auto DataAddress =
4079           Linker.lookupSymbol(getFunctionConstantIslandLabel()->getName());
4080       assert(DataAddress && "Cannot find function CI symbol");
4081       setOutputDataAddress(*DataAddress);
4082       for (auto It : Islands->Offsets) {
4083         const uint64_t OldOffset = It.first;
4084         BinaryData *BD = BC.getBinaryDataAtAddress(getAddress() + OldOffset);
4085         if (!BD)
4086           continue;
4087 
4088         MCSymbol *Symbol = It.second;
4089         const auto NewAddress = Linker.lookupSymbol(Symbol->getName());
4090         assert(NewAddress && "Cannot find CI symbol");
4091         auto &Section = *getCodeSection();
4092         const auto NewOffset = *NewAddress - Section.getOutputAddress();
4093         BD->setOutputLocation(Section, NewOffset);
4094       }
4095     }
4096     if (isSplit()) {
4097       for (FunctionFragment &FF : getLayout().getSplitFragments()) {
4098         ErrorOr<BinarySection &> ColdSection =
4099             getCodeSection(FF.getFragmentNum());
4100         // If fragment is empty, cold section might not exist
4101         if (FF.empty() && ColdSection.getError())
4102           continue;
4103 
4104         const MCSymbol *ColdStartSymbol = getSymbol(FF.getFragmentNum());
4105         // If fragment is empty, symbol might have not been emitted
4106         if (FF.empty() && (!ColdStartSymbol || !ColdStartSymbol->isDefined()) &&
4107             !hasConstantIsland())
4108           continue;
4109         assert(ColdStartSymbol && ColdStartSymbol->isDefined() &&
4110                "split function should have defined cold symbol");
4111         const auto ColdStartSymbolInfo =
4112             Linker.lookupSymbolInfo(ColdStartSymbol->getName());
4113         assert(ColdStartSymbolInfo && "Cannot find cold start symbol");
4114         FF.setAddress(ColdStartSymbolInfo->Address);
4115         FF.setImageSize(ColdStartSymbolInfo->Size);
4116         if (hasConstantIsland()) {
4117           const auto DataAddress = Linker.lookupSymbol(
4118               getFunctionColdConstantIslandLabel()->getName());
4119           assert(DataAddress && "Cannot find cold CI symbol");
4120           setOutputColdDataAddress(*DataAddress);
4121         }
4122       }
4123     }
4124   }
4125 
4126   // Update basic block output ranges for the debug info, if we have
4127   // secondary entry points in the symbol table to update or if writing BAT.
4128   if (!requiresAddressMap())
4129     return;
4130 
4131   // Output ranges should match the input if the body hasn't changed.
4132   if (!isSimple() && !BC.HasRelocations)
4133     return;
4134 
4135   // AArch64 may have functions that only contains a constant island (no code).
4136   if (getLayout().block_empty())
4137     return;
4138 
4139   for (FunctionFragment &FF : getLayout().fragments()) {
4140     if (FF.empty())
4141       continue;
4142 
4143     const uint64_t FragmentBaseAddress =
4144         getCodeSection(isSimple() ? FF.getFragmentNum() : FragmentNum::main())
4145             ->getOutputAddress();
4146 
4147     BinaryBasicBlock *PrevBB = nullptr;
4148     for (BinaryBasicBlock *const BB : FF) {
4149       assert(BB->getLabel()->isDefined() && "symbol should be defined");
4150       if (!BC.HasRelocations) {
4151         if (BB->isSplit())
4152           assert(FragmentBaseAddress == FF.getAddress());
4153         else
4154           assert(FragmentBaseAddress == getOutputAddress());
4155         (void)FragmentBaseAddress;
4156       }
4157 
4158       // Injected functions likely will fail lookup, as they have no
4159       // input range. Just assign the BB the output address of the
4160       // function.
4161       auto MaybeBBAddress =
4162           BC.getIOAddressMap().lookup(BB->getInputOffset() + getAddress());
4163       const uint64_t BBAddress = MaybeBBAddress  ? *MaybeBBAddress
4164                                  : BB->isSplit() ? FF.getAddress()
4165                                                  : getOutputAddress();
4166       BB->setOutputStartAddress(BBAddress);
4167 
4168       if (PrevBB)
4169         PrevBB->setOutputEndAddress(BBAddress);
4170       PrevBB = BB;
4171     }
4172 
4173     PrevBB->setOutputEndAddress(PrevBB->isSplit()
4174                                     ? FF.getAddress() + FF.getImageSize()
4175                                     : getOutputAddress() + getOutputSize());
4176   }
4177 }
4178 
4179 DebugAddressRangesVector BinaryFunction::getOutputAddressRanges() const {
4180   DebugAddressRangesVector OutputRanges;
4181 
4182   if (isFolded())
4183     return OutputRanges;
4184 
4185   if (IsFragment)
4186     return OutputRanges;
4187 
4188   OutputRanges.emplace_back(getOutputAddress(),
4189                             getOutputAddress() + getOutputSize());
4190   if (isSplit()) {
4191     assert(isEmitted() && "split function should be emitted");
4192     for (const FunctionFragment &FF : getLayout().getSplitFragments())
4193       OutputRanges.emplace_back(FF.getAddress(),
4194                                 FF.getAddress() + FF.getImageSize());
4195   }
4196 
4197   if (isSimple())
4198     return OutputRanges;
4199 
4200   for (BinaryFunction *Frag : Fragments) {
4201     assert(!Frag->isSimple() &&
4202            "fragment of non-simple function should also be non-simple");
4203     OutputRanges.emplace_back(Frag->getOutputAddress(),
4204                               Frag->getOutputAddress() + Frag->getOutputSize());
4205   }
4206 
4207   return OutputRanges;
4208 }
4209 
4210 uint64_t BinaryFunction::translateInputToOutputAddress(uint64_t Address) const {
4211   if (isFolded())
4212     return 0;
4213 
4214   // If the function hasn't changed return the same address.
4215   if (!isEmitted())
4216     return Address;
4217 
4218   if (Address < getAddress())
4219     return 0;
4220 
4221   // Check if the address is associated with an instruction that is tracked
4222   // by address translation.
4223   if (auto OutputAddress = BC.getIOAddressMap().lookup(Address))
4224     return *OutputAddress;
4225 
4226   // FIXME: #18950828 - we rely on relative offsets inside basic blocks to stay
4227   //        intact. Instead we can use pseudo instructions and/or annotations.
4228   const uint64_t Offset = Address - getAddress();
4229   const BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
4230   if (!BB) {
4231     // Special case for address immediately past the end of the function.
4232     if (Offset == getSize())
4233       return getOutputAddress() + getOutputSize();
4234 
4235     return 0;
4236   }
4237 
4238   return std::min(BB->getOutputAddressRange().first + Offset - BB->getOffset(),
4239                   BB->getOutputAddressRange().second);
4240 }
4241 
4242 DebugAddressRangesVector BinaryFunction::translateInputToOutputRanges(
4243     const DWARFAddressRangesVector &InputRanges) const {
4244   DebugAddressRangesVector OutputRanges;
4245 
4246   if (isFolded())
4247     return OutputRanges;
4248 
4249   // If the function hasn't changed return the same ranges.
4250   if (!isEmitted()) {
4251     OutputRanges.resize(InputRanges.size());
4252     llvm::transform(InputRanges, OutputRanges.begin(),
4253                     [](const DWARFAddressRange &Range) {
4254                       return DebugAddressRange(Range.LowPC, Range.HighPC);
4255                     });
4256     return OutputRanges;
4257   }
4258 
4259   // Even though we will merge ranges in a post-processing pass, we attempt to
4260   // merge them in a main processing loop as it improves the processing time.
4261   uint64_t PrevEndAddress = 0;
4262   for (const DWARFAddressRange &Range : InputRanges) {
4263     if (!containsAddress(Range.LowPC)) {
4264       LLVM_DEBUG(
4265           dbgs() << "BOLT-DEBUG: invalid debug address range detected for "
4266                  << *this << " : [0x" << Twine::utohexstr(Range.LowPC) << ", 0x"
4267                  << Twine::utohexstr(Range.HighPC) << "]\n");
4268       PrevEndAddress = 0;
4269       continue;
4270     }
4271     uint64_t InputOffset = Range.LowPC - getAddress();
4272     const uint64_t InputEndOffset =
4273         std::min(Range.HighPC - getAddress(), getSize());
4274 
4275     auto BBI = llvm::upper_bound(BasicBlockOffsets,
4276                                  BasicBlockOffset(InputOffset, nullptr),
4277                                  CompareBasicBlockOffsets());
4278     --BBI;
4279     do {
4280       const BinaryBasicBlock *BB = BBI->second;
4281       if (InputOffset < BB->getOffset() || InputOffset >= BB->getEndOffset()) {
4282         LLVM_DEBUG(
4283             dbgs() << "BOLT-DEBUG: invalid debug address range detected for "
4284                    << *this << " : [0x" << Twine::utohexstr(Range.LowPC)
4285                    << ", 0x" << Twine::utohexstr(Range.HighPC) << "]\n");
4286         PrevEndAddress = 0;
4287         break;
4288       }
4289 
4290       // Skip the range if the block was deleted.
4291       if (const uint64_t OutputStart = BB->getOutputAddressRange().first) {
4292         const uint64_t StartAddress =
4293             OutputStart + InputOffset - BB->getOffset();
4294         uint64_t EndAddress = BB->getOutputAddressRange().second;
4295         if (InputEndOffset < BB->getEndOffset())
4296           EndAddress = StartAddress + InputEndOffset - InputOffset;
4297 
4298         if (StartAddress == PrevEndAddress) {
4299           OutputRanges.back().HighPC =
4300               std::max(OutputRanges.back().HighPC, EndAddress);
4301         } else {
4302           OutputRanges.emplace_back(StartAddress,
4303                                     std::max(StartAddress, EndAddress));
4304         }
4305         PrevEndAddress = OutputRanges.back().HighPC;
4306       }
4307 
4308       InputOffset = BB->getEndOffset();
4309       ++BBI;
4310     } while (InputOffset < InputEndOffset);
4311   }
4312 
4313   // Post-processing pass to sort and merge ranges.
4314   llvm::sort(OutputRanges);
4315   DebugAddressRangesVector MergedRanges;
4316   PrevEndAddress = 0;
4317   for (const DebugAddressRange &Range : OutputRanges) {
4318     if (Range.LowPC <= PrevEndAddress) {
4319       MergedRanges.back().HighPC =
4320           std::max(MergedRanges.back().HighPC, Range.HighPC);
4321     } else {
4322       MergedRanges.emplace_back(Range.LowPC, Range.HighPC);
4323     }
4324     PrevEndAddress = MergedRanges.back().HighPC;
4325   }
4326 
4327   return MergedRanges;
4328 }
4329 
4330 MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) {
4331   if (CurrentState == State::Disassembled) {
4332     auto II = Instructions.find(Offset);
4333     return (II == Instructions.end()) ? nullptr : &II->second;
4334   } else if (CurrentState == State::CFG) {
4335     BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
4336     if (!BB)
4337       return nullptr;
4338 
4339     for (MCInst &Inst : *BB) {
4340       constexpr uint32_t InvalidOffset = std::numeric_limits<uint32_t>::max();
4341       if (Offset == BC.MIB->getOffsetWithDefault(Inst, InvalidOffset))
4342         return &Inst;
4343     }
4344 
4345     if (MCInst *LastInstr = BB->getLastNonPseudoInstr()) {
4346       const uint32_t Size =
4347           BC.MIB->getAnnotationWithDefault<uint32_t>(*LastInstr, "Size");
4348       if (BB->getEndOffset() - Offset == Size)
4349         return LastInstr;
4350     }
4351 
4352     return nullptr;
4353   } else {
4354     llvm_unreachable("invalid CFG state to use getInstructionAtOffset()");
4355   }
4356 }
4357 
4358 DebugLocationsVector BinaryFunction::translateInputToOutputLocationList(
4359     const DebugLocationsVector &InputLL) const {
4360   DebugLocationsVector OutputLL;
4361 
4362   if (isFolded())
4363     return OutputLL;
4364 
4365   // If the function hasn't changed - there's nothing to update.
4366   if (!isEmitted())
4367     return InputLL;
4368 
4369   uint64_t PrevEndAddress = 0;
4370   SmallVectorImpl<uint8_t> *PrevExpr = nullptr;
4371   for (const DebugLocationEntry &Entry : InputLL) {
4372     const uint64_t Start = Entry.LowPC;
4373     const uint64_t End = Entry.HighPC;
4374     if (!containsAddress(Start)) {
4375       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invalid debug address range detected "
4376                            "for "
4377                         << *this << " : [0x" << Twine::utohexstr(Start)
4378                         << ", 0x" << Twine::utohexstr(End) << "]\n");
4379       continue;
4380     }
4381     uint64_t InputOffset = Start - getAddress();
4382     const uint64_t InputEndOffset = std::min(End - getAddress(), getSize());
4383     auto BBI = llvm::upper_bound(BasicBlockOffsets,
4384                                  BasicBlockOffset(InputOffset, nullptr),
4385                                  CompareBasicBlockOffsets());
4386     --BBI;
4387     do {
4388       const BinaryBasicBlock *BB = BBI->second;
4389       if (InputOffset < BB->getOffset() || InputOffset >= BB->getEndOffset()) {
4390         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invalid debug address range detected "
4391                              "for "
4392                           << *this << " : [0x" << Twine::utohexstr(Start)
4393                           << ", 0x" << Twine::utohexstr(End) << "]\n");
4394         PrevEndAddress = 0;
4395         break;
4396       }
4397 
4398       // Skip the range if the block was deleted.
4399       if (const uint64_t OutputStart = BB->getOutputAddressRange().first) {
4400         const uint64_t StartAddress =
4401             OutputStart + InputOffset - BB->getOffset();
4402         uint64_t EndAddress = BB->getOutputAddressRange().second;
4403         if (InputEndOffset < BB->getEndOffset())
4404           EndAddress = StartAddress + InputEndOffset - InputOffset;
4405 
4406         if (StartAddress == PrevEndAddress && Entry.Expr == *PrevExpr) {
4407           OutputLL.back().HighPC = std::max(OutputLL.back().HighPC, EndAddress);
4408         } else {
4409           OutputLL.emplace_back(DebugLocationEntry{
4410               StartAddress, std::max(StartAddress, EndAddress), Entry.Expr});
4411         }
4412         PrevEndAddress = OutputLL.back().HighPC;
4413         PrevExpr = &OutputLL.back().Expr;
4414       }
4415 
4416       ++BBI;
4417       InputOffset = BB->getEndOffset();
4418     } while (InputOffset < InputEndOffset);
4419   }
4420 
4421   // Sort and merge adjacent entries with identical location.
4422   llvm::stable_sort(
4423       OutputLL, [](const DebugLocationEntry &A, const DebugLocationEntry &B) {
4424         return A.LowPC < B.LowPC;
4425       });
4426   DebugLocationsVector MergedLL;
4427   PrevEndAddress = 0;
4428   PrevExpr = nullptr;
4429   for (const DebugLocationEntry &Entry : OutputLL) {
4430     if (Entry.LowPC <= PrevEndAddress && *PrevExpr == Entry.Expr) {
4431       MergedLL.back().HighPC = std::max(Entry.HighPC, MergedLL.back().HighPC);
4432     } else {
4433       const uint64_t Begin = std::max(Entry.LowPC, PrevEndAddress);
4434       const uint64_t End = std::max(Begin, Entry.HighPC);
4435       MergedLL.emplace_back(DebugLocationEntry{Begin, End, Entry.Expr});
4436     }
4437     PrevEndAddress = MergedLL.back().HighPC;
4438     PrevExpr = &MergedLL.back().Expr;
4439   }
4440 
4441   return MergedLL;
4442 }
4443 
4444 void BinaryFunction::printLoopInfo(raw_ostream &OS) const {
4445   if (!opts::shouldPrint(*this))
4446     return;
4447 
4448   OS << "Loop Info for Function \"" << *this << "\"";
4449   if (hasValidProfile())
4450     OS << " (count: " << getExecutionCount() << ")";
4451   OS << "\n";
4452 
4453   std::stack<BinaryLoop *> St;
4454   for (BinaryLoop *L : *BLI)
4455     St.push(L);
4456   while (!St.empty()) {
4457     BinaryLoop *L = St.top();
4458     St.pop();
4459 
4460     for (BinaryLoop *Inner : *L)
4461       St.push(Inner);
4462 
4463     if (!hasValidProfile())
4464       continue;
4465 
4466     OS << (L->getLoopDepth() > 1 ? "Nested" : "Outer")
4467        << " loop header: " << L->getHeader()->getName();
4468     OS << "\n";
4469     OS << "Loop basic blocks: ";
4470     ListSeparator LS;
4471     for (BinaryBasicBlock *BB : L->blocks())
4472       OS << LS << BB->getName();
4473     OS << "\n";
4474     if (hasValidProfile()) {
4475       OS << "Total back edge count: " << L->TotalBackEdgeCount << "\n";
4476       OS << "Loop entry count: " << L->EntryCount << "\n";
4477       OS << "Loop exit count: " << L->ExitCount << "\n";
4478       if (L->EntryCount > 0) {
4479         OS << "Average iters per entry: "
4480            << format("%.4lf", (double)L->TotalBackEdgeCount / L->EntryCount)
4481            << "\n";
4482       }
4483     }
4484     OS << "----\n";
4485   }
4486 
4487   OS << "Total number of loops: " << BLI->TotalLoops << "\n";
4488   OS << "Number of outer loops: " << BLI->OuterLoops << "\n";
4489   OS << "Maximum nested loop depth: " << BLI->MaximumDepth << "\n\n";
4490 }
4491 
4492 bool BinaryFunction::isAArch64Veneer() const {
4493   if (empty() || hasIslandsInfo())
4494     return false;
4495 
4496   BinaryBasicBlock &BB = **BasicBlocks.begin();
4497   for (MCInst &Inst : BB)
4498     if (!BC.MIB->hasAnnotation(Inst, "AArch64Veneer"))
4499       return false;
4500 
4501   for (auto I = BasicBlocks.begin() + 1, E = BasicBlocks.end(); I != E; ++I) {
4502     for (MCInst &Inst : **I)
4503       if (!BC.MIB->isNoop(Inst))
4504         return false;
4505   }
4506 
4507   return true;
4508 }
4509 
4510 void BinaryFunction::addRelocation(uint64_t Address, MCSymbol *Symbol,
4511                                    uint64_t RelType, uint64_t Addend,
4512                                    uint64_t Value) {
4513   assert(Address >= getAddress() && Address < getAddress() + getMaxSize() &&
4514          "address is outside of the function");
4515   uint64_t Offset = Address - getAddress();
4516   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addRelocation in "
4517                     << formatv("{0}@{1:x} against {2}\n", *this, Offset,
4518                                (Symbol ? Symbol->getName() : "<undef>")));
4519   bool IsCI = BC.isAArch64() && isInConstantIsland(Address);
4520   std::map<uint64_t, Relocation> &Rels =
4521       IsCI ? Islands->Relocations : Relocations;
4522   if (BC.MIB->shouldRecordCodeRelocation(RelType))
4523     Rels[Offset] = Relocation{Offset, Symbol, RelType, Addend, Value};
4524 }
4525 
4526 } // namespace bolt
4527 } // namespace llvm
4528