xref: /llvm-project/bolt/lib/Core/BinaryFunction.cpp (revision d16b21b17d13ecd88a068bb803df43e53d3b04ba)
1 //===- bolt/Core/BinaryFunction.cpp - Low-level function ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryFunction class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryFunction.h"
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/DynoStats.h"
16 #include "bolt/Core/HashUtilities.h"
17 #include "bolt/Core/MCPlusBuilder.h"
18 #include "bolt/Utils/NameResolver.h"
19 #include "bolt/Utils/NameShortener.h"
20 #include "bolt/Utils/Utils.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/Demangle/Demangle.h"
26 #include "llvm/MC/MCAsmInfo.h"
27 #include "llvm/MC/MCContext.h"
28 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
29 #include "llvm/MC/MCExpr.h"
30 #include "llvm/MC/MCInst.h"
31 #include "llvm/MC/MCInstPrinter.h"
32 #include "llvm/MC/MCRegisterInfo.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/Object/ObjectFile.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/GenericDomTreeConstruction.h"
38 #include "llvm/Support/GenericLoopInfoImpl.h"
39 #include "llvm/Support/GraphWriter.h"
40 #include "llvm/Support/LEB128.h"
41 #include "llvm/Support/Regex.h"
42 #include "llvm/Support/Timer.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Support/xxhash.h"
45 #include <functional>
46 #include <limits>
47 #include <numeric>
48 #include <stack>
49 #include <string>
50 
51 #define DEBUG_TYPE "bolt"
52 
53 using namespace llvm;
54 using namespace bolt;
55 
56 namespace opts {
57 
58 extern cl::OptionCategory BoltCategory;
59 extern cl::OptionCategory BoltOptCategory;
60 
61 extern cl::opt<bool> EnableBAT;
62 extern cl::opt<bool> Instrument;
63 extern cl::opt<bool> StrictMode;
64 extern cl::opt<bool> UpdateDebugSections;
65 extern cl::opt<unsigned> Verbosity;
66 
67 extern bool processAllFunctions();
68 
69 cl::opt<bool> CheckEncoding(
70     "check-encoding",
71     cl::desc("perform verification of LLVM instruction encoding/decoding. "
72              "Every instruction in the input is decoded and re-encoded. "
73              "If the resulting bytes do not match the input, a warning message "
74              "is printed."),
75     cl::Hidden, cl::cat(BoltCategory));
76 
77 static cl::opt<bool> DotToolTipCode(
78     "dot-tooltip-code",
79     cl::desc("add basic block instructions as tool tips on nodes"), cl::Hidden,
80     cl::cat(BoltCategory));
81 
82 cl::opt<JumpTableSupportLevel>
83 JumpTables("jump-tables",
84   cl::desc("jump tables support (default=basic)"),
85   cl::init(JTS_BASIC),
86   cl::values(
87       clEnumValN(JTS_NONE, "none",
88                  "do not optimize functions with jump tables"),
89       clEnumValN(JTS_BASIC, "basic",
90                  "optimize functions with jump tables"),
91       clEnumValN(JTS_MOVE, "move",
92                  "move jump tables to a separate section"),
93       clEnumValN(JTS_SPLIT, "split",
94                  "split jump tables section into hot and cold based on "
95                  "function execution frequency"),
96       clEnumValN(JTS_AGGRESSIVE, "aggressive",
97                  "aggressively split jump tables section based on usage "
98                  "of the tables")),
99   cl::ZeroOrMore,
100   cl::cat(BoltOptCategory));
101 
102 static cl::opt<bool> NoScan(
103     "no-scan",
104     cl::desc(
105         "do not scan cold functions for external references (may result in "
106         "slower binary)"),
107     cl::Hidden, cl::cat(BoltOptCategory));
108 
109 cl::opt<bool>
110     PreserveBlocksAlignment("preserve-blocks-alignment",
111                             cl::desc("try to preserve basic block alignment"),
112                             cl::cat(BoltOptCategory));
113 
114 static cl::opt<bool> PrintOutputAddressRange(
115     "print-output-address-range",
116     cl::desc(
117         "print output address range for each basic block in the function when"
118         "BinaryFunction::print is called"),
119     cl::Hidden, cl::cat(BoltOptCategory));
120 
121 cl::opt<bool>
122 PrintDynoStats("dyno-stats",
123   cl::desc("print execution info based on profile"),
124   cl::cat(BoltCategory));
125 
126 static cl::opt<bool>
127 PrintDynoStatsOnly("print-dyno-stats-only",
128   cl::desc("while printing functions output dyno-stats and skip instructions"),
129   cl::init(false),
130   cl::Hidden,
131   cl::cat(BoltCategory));
132 
133 static cl::list<std::string>
134 PrintOnly("print-only",
135   cl::CommaSeparated,
136   cl::desc("list of functions to print"),
137   cl::value_desc("func1,func2,func3,..."),
138   cl::Hidden,
139   cl::cat(BoltCategory));
140 
141 cl::opt<bool>
142     TimeBuild("time-build",
143               cl::desc("print time spent constructing binary functions"),
144               cl::Hidden, cl::cat(BoltCategory));
145 
146 cl::opt<bool>
147 TrapOnAVX512("trap-avx512",
148   cl::desc("in relocation mode trap upon entry to any function that uses "
149             "AVX-512 instructions"),
150   cl::init(false),
151   cl::ZeroOrMore,
152   cl::Hidden,
153   cl::cat(BoltCategory));
154 
155 bool shouldPrint(const BinaryFunction &Function) {
156   if (Function.isIgnored())
157     return false;
158 
159   if (PrintOnly.empty())
160     return true;
161 
162   for (std::string &Name : opts::PrintOnly) {
163     if (Function.hasNameRegex(Name)) {
164       return true;
165     }
166   }
167 
168   return false;
169 }
170 
171 } // namespace opts
172 
173 namespace llvm {
174 namespace bolt {
175 
176 template <typename R> static bool emptyRange(const R &Range) {
177   return Range.begin() == Range.end();
178 }
179 
180 /// Gets debug line information for the instruction located at the given
181 /// address in the original binary. The SMLoc's pointer is used
182 /// to point to this information, which is represented by a
183 /// DebugLineTableRowRef. The returned pointer is null if no debug line
184 /// information for this instruction was found.
185 static SMLoc findDebugLineInformationForInstructionAt(
186     uint64_t Address, DWARFUnit *Unit,
187     const DWARFDebugLine::LineTable *LineTable) {
188   // We use the pointer in SMLoc to store an instance of DebugLineTableRowRef,
189   // which occupies 64 bits. Thus, we can only proceed if the struct fits into
190   // the pointer itself.
191   static_assert(
192       sizeof(decltype(SMLoc().getPointer())) >= sizeof(DebugLineTableRowRef),
193       "Cannot fit instruction debug line information into SMLoc's pointer");
194 
195   SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc();
196   uint32_t RowIndex = LineTable->lookupAddress(
197       {Address, object::SectionedAddress::UndefSection});
198   if (RowIndex == LineTable->UnknownRowIndex)
199     return NullResult;
200 
201   assert(RowIndex < LineTable->Rows.size() &&
202          "Line Table lookup returned invalid index.");
203 
204   decltype(SMLoc().getPointer()) Ptr;
205   DebugLineTableRowRef *InstructionLocation =
206       reinterpret_cast<DebugLineTableRowRef *>(&Ptr);
207 
208   InstructionLocation->DwCompileUnitIndex = Unit->getOffset();
209   InstructionLocation->RowIndex = RowIndex + 1;
210 
211   return SMLoc::getFromPointer(Ptr);
212 }
213 
214 static std::string buildSectionName(StringRef Prefix, StringRef Name,
215                                     const BinaryContext &BC) {
216   if (BC.isELF())
217     return (Prefix + Name).str();
218   static NameShortener NS;
219   return (Prefix + Twine(NS.getID(Name))).str();
220 }
221 
222 static raw_ostream &operator<<(raw_ostream &OS,
223                                const BinaryFunction::State State) {
224   switch (State) {
225   case BinaryFunction::State::Empty:         OS << "empty"; break;
226   case BinaryFunction::State::Disassembled:  OS << "disassembled"; break;
227   case BinaryFunction::State::CFG:           OS << "CFG constructed"; break;
228   case BinaryFunction::State::CFG_Finalized: OS << "CFG finalized"; break;
229   case BinaryFunction::State::EmittedCFG:    OS << "emitted with CFG"; break;
230   case BinaryFunction::State::Emitted:       OS << "emitted"; break;
231   }
232 
233   return OS;
234 }
235 
236 std::string BinaryFunction::buildCodeSectionName(StringRef Name,
237                                                  const BinaryContext &BC) {
238   return buildSectionName(BC.isELF() ? ".local.text." : ".l.text.", Name, BC);
239 }
240 
241 std::string BinaryFunction::buildColdCodeSectionName(StringRef Name,
242                                                      const BinaryContext &BC) {
243   return buildSectionName(BC.isELF() ? ".local.cold.text." : ".l.c.text.", Name,
244                           BC);
245 }
246 
247 uint64_t BinaryFunction::Count = 0;
248 
249 std::optional<StringRef>
250 BinaryFunction::hasNameRegex(const StringRef Name) const {
251   const std::string RegexName = (Twine("^") + StringRef(Name) + "$").str();
252   Regex MatchName(RegexName);
253   return forEachName(
254       [&MatchName](StringRef Name) { return MatchName.match(Name); });
255 }
256 
257 std::optional<StringRef>
258 BinaryFunction::hasRestoredNameRegex(const StringRef Name) const {
259   const std::string RegexName = (Twine("^") + StringRef(Name) + "$").str();
260   Regex MatchName(RegexName);
261   return forEachName([&MatchName](StringRef Name) {
262     return MatchName.match(NameResolver::restore(Name));
263   });
264 }
265 
266 std::string BinaryFunction::getDemangledName() const {
267   StringRef MangledName = NameResolver::restore(getOneName());
268   return demangle(MangledName.str());
269 }
270 
271 BinaryBasicBlock *
272 BinaryFunction::getBasicBlockContainingOffset(uint64_t Offset) {
273   if (Offset > Size)
274     return nullptr;
275 
276   if (BasicBlockOffsets.empty())
277     return nullptr;
278 
279   /*
280    * This is commented out because it makes BOLT too slow.
281    * assert(std::is_sorted(BasicBlockOffsets.begin(),
282    *                       BasicBlockOffsets.end(),
283    *                       CompareBasicBlockOffsets())));
284    */
285   auto I =
286       llvm::upper_bound(BasicBlockOffsets, BasicBlockOffset(Offset, nullptr),
287                         CompareBasicBlockOffsets());
288   assert(I != BasicBlockOffsets.begin() && "first basic block not at offset 0");
289   --I;
290   BinaryBasicBlock *BB = I->second;
291   return (Offset < BB->getOffset() + BB->getOriginalSize()) ? BB : nullptr;
292 }
293 
294 void BinaryFunction::markUnreachableBlocks() {
295   std::stack<BinaryBasicBlock *> Stack;
296 
297   for (BinaryBasicBlock &BB : blocks())
298     BB.markValid(false);
299 
300   // Add all entries and landing pads as roots.
301   for (BinaryBasicBlock *BB : BasicBlocks) {
302     if (isEntryPoint(*BB) || BB->isLandingPad()) {
303       Stack.push(BB);
304       BB->markValid(true);
305       continue;
306     }
307     // FIXME:
308     // Also mark BBs with indirect jumps as reachable, since we do not
309     // support removing unused jump tables yet (GH-issue20).
310     for (const MCInst &Inst : *BB) {
311       if (BC.MIB->getJumpTable(Inst)) {
312         Stack.push(BB);
313         BB->markValid(true);
314         break;
315       }
316     }
317   }
318 
319   // Determine reachable BBs from the entry point
320   while (!Stack.empty()) {
321     BinaryBasicBlock *BB = Stack.top();
322     Stack.pop();
323     for (BinaryBasicBlock *Succ : BB->successors()) {
324       if (Succ->isValid())
325         continue;
326       Succ->markValid(true);
327       Stack.push(Succ);
328     }
329   }
330 }
331 
332 // Any unnecessary fallthrough jumps revealed after calling eraseInvalidBBs
333 // will be cleaned up by fixBranches().
334 std::pair<unsigned, uint64_t>
335 BinaryFunction::eraseInvalidBBs(const MCCodeEmitter *Emitter) {
336   DenseSet<const BinaryBasicBlock *> InvalidBBs;
337   unsigned Count = 0;
338   uint64_t Bytes = 0;
339   for (BinaryBasicBlock *const BB : BasicBlocks) {
340     if (!BB->isValid()) {
341       assert(!isEntryPoint(*BB) && "all entry blocks must be valid");
342       InvalidBBs.insert(BB);
343       ++Count;
344       Bytes += BC.computeCodeSize(BB->begin(), BB->end(), Emitter);
345     }
346   }
347 
348   Layout.eraseBasicBlocks(InvalidBBs);
349 
350   BasicBlockListType NewBasicBlocks;
351   for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) {
352     BinaryBasicBlock *BB = *I;
353     if (InvalidBBs.contains(BB)) {
354       // Make sure the block is removed from the list of predecessors.
355       BB->removeAllSuccessors();
356       DeletedBasicBlocks.push_back(BB);
357     } else {
358       NewBasicBlocks.push_back(BB);
359     }
360   }
361   BasicBlocks = std::move(NewBasicBlocks);
362 
363   assert(BasicBlocks.size() == Layout.block_size());
364 
365   // Update CFG state if needed
366   if (Count > 0)
367     recomputeLandingPads();
368 
369   return std::make_pair(Count, Bytes);
370 }
371 
372 bool BinaryFunction::isForwardCall(const MCSymbol *CalleeSymbol) const {
373   // This function should work properly before and after function reordering.
374   // In order to accomplish this, we use the function index (if it is valid).
375   // If the function indices are not valid, we fall back to the original
376   // addresses.  This should be ok because the functions without valid indices
377   // should have been ordered with a stable sort.
378   const BinaryFunction *CalleeBF = BC.getFunctionForSymbol(CalleeSymbol);
379   if (CalleeBF) {
380     if (CalleeBF->isInjected())
381       return true;
382 
383     if (hasValidIndex() && CalleeBF->hasValidIndex()) {
384       return getIndex() < CalleeBF->getIndex();
385     } else if (hasValidIndex() && !CalleeBF->hasValidIndex()) {
386       return true;
387     } else if (!hasValidIndex() && CalleeBF->hasValidIndex()) {
388       return false;
389     } else {
390       return getAddress() < CalleeBF->getAddress();
391     }
392   } else {
393     // Absolute symbol.
394     ErrorOr<uint64_t> CalleeAddressOrError = BC.getSymbolValue(*CalleeSymbol);
395     assert(CalleeAddressOrError && "unregistered symbol found");
396     return *CalleeAddressOrError > getAddress();
397   }
398 }
399 
400 void BinaryFunction::dump() const {
401   // getDynoStats calls FunctionLayout::updateLayoutIndices and
402   // BasicBlock::analyzeBranch. The former cannot be const, but should be
403   // removed, the latter should be made const, but seems to require refactoring.
404   // Forcing all callers to have a non-const reference to BinaryFunction to call
405   // dump non-const however is not ideal either. Adding this const_cast is right
406   // now the best solution. It is safe, because BinaryFunction itself is not
407   // modified. Only BinaryBasicBlocks are actually modified (if it all) and we
408   // have mutable pointers to those regardless whether this function is
409   // const-qualified or not.
410   const_cast<BinaryFunction &>(*this).print(dbgs(), "");
411 }
412 
413 void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
414   if (!opts::shouldPrint(*this))
415     return;
416 
417   StringRef SectionName =
418       OriginSection ? OriginSection->getName() : "<no origin section>";
419   OS << "Binary Function \"" << *this << "\" " << Annotation << " {";
420   std::vector<StringRef> AllNames = getNames();
421   if (AllNames.size() > 1) {
422     OS << "\n  All names   : ";
423     const char *Sep = "";
424     for (const StringRef &Name : AllNames) {
425       OS << Sep << Name;
426       Sep = "\n                ";
427     }
428   }
429   OS << "\n  Number      : " << FunctionNumber;
430   OS << "\n  State       : " << CurrentState;
431   OS << "\n  Address     : 0x" << Twine::utohexstr(Address);
432   OS << "\n  Size        : 0x" << Twine::utohexstr(Size);
433   OS << "\n  MaxSize     : 0x" << Twine::utohexstr(MaxSize);
434   OS << "\n  Offset      : 0x" << Twine::utohexstr(getFileOffset());
435   OS << "\n  Section     : " << SectionName;
436   OS << "\n  Orc Section : " << getCodeSectionName();
437   OS << "\n  LSDA        : 0x" << Twine::utohexstr(getLSDAAddress());
438   OS << "\n  IsSimple    : " << IsSimple;
439   OS << "\n  IsMultiEntry: " << isMultiEntry();
440   OS << "\n  IsSplit     : " << isSplit();
441   OS << "\n  BB Count    : " << size();
442 
443   if (HasUnknownControlFlow)
444     OS << "\n  Unknown CF  : true";
445   if (getPersonalityFunction())
446     OS << "\n  Personality : " << getPersonalityFunction()->getName();
447   if (IsFragment)
448     OS << "\n  IsFragment  : true";
449   if (isFolded())
450     OS << "\n  FoldedInto  : " << *getFoldedIntoFunction();
451   for (BinaryFunction *ParentFragment : ParentFragments)
452     OS << "\n  Parent      : " << *ParentFragment;
453   if (!Fragments.empty()) {
454     OS << "\n  Fragments   : ";
455     ListSeparator LS;
456     for (BinaryFunction *Frag : Fragments)
457       OS << LS << *Frag;
458   }
459   if (hasCFG())
460     OS << "\n  Hash        : " << Twine::utohexstr(computeHash());
461   if (isMultiEntry()) {
462     OS << "\n  Secondary Entry Points : ";
463     ListSeparator LS;
464     for (const auto &KV : SecondaryEntryPoints)
465       OS << LS << KV.second->getName();
466   }
467   if (FrameInstructions.size())
468     OS << "\n  CFI Instrs  : " << FrameInstructions.size();
469   if (!Layout.block_empty()) {
470     OS << "\n  BB Layout   : ";
471     ListSeparator LS;
472     for (const BinaryBasicBlock *BB : Layout.blocks())
473       OS << LS << BB->getName();
474   }
475   if (getImageAddress())
476     OS << "\n  Image       : 0x" << Twine::utohexstr(getImageAddress());
477   if (ExecutionCount != COUNT_NO_PROFILE) {
478     OS << "\n  Exec Count  : " << ExecutionCount;
479     OS << "\n  Branch Count: " << RawBranchCount;
480     OS << "\n  Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
481   }
482 
483   if (opts::PrintDynoStats && !getLayout().block_empty()) {
484     OS << '\n';
485     DynoStats dynoStats = getDynoStats(*this);
486     OS << dynoStats;
487   }
488 
489   OS << "\n}\n";
490 
491   if (opts::PrintDynoStatsOnly || !BC.InstPrinter)
492     return;
493 
494   // Offset of the instruction in function.
495   uint64_t Offset = 0;
496 
497   if (BasicBlocks.empty() && !Instructions.empty()) {
498     // Print before CFG was built.
499     for (const std::pair<const uint32_t, MCInst> &II : Instructions) {
500       Offset = II.first;
501 
502       // Print label if exists at this offset.
503       auto LI = Labels.find(Offset);
504       if (LI != Labels.end()) {
505         if (const MCSymbol *EntrySymbol =
506                 getSecondaryEntryPointSymbol(LI->second))
507           OS << EntrySymbol->getName() << " (Entry Point):\n";
508         OS << LI->second->getName() << ":\n";
509       }
510 
511       BC.printInstruction(OS, II.second, Offset, this);
512     }
513   }
514 
515   StringRef SplitPointMsg = "";
516   for (const FunctionFragment &FF : Layout.fragments()) {
517     OS << SplitPointMsg;
518     SplitPointMsg = "-------   HOT-COLD SPLIT POINT   -------\n\n";
519     for (const BinaryBasicBlock *BB : FF) {
520       OS << BB->getName() << " (" << BB->size()
521          << " instructions, align : " << BB->getAlignment() << ")\n";
522 
523       if (opts::PrintOutputAddressRange)
524         OS << formatv("  Output Address Range: [{0:x}, {1:x}) ({2} bytes)\n",
525                       BB->getOutputAddressRange().first,
526                       BB->getOutputAddressRange().second, BB->getOutputSize());
527 
528       if (isEntryPoint(*BB)) {
529         if (MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB))
530           OS << "  Secondary Entry Point: " << EntrySymbol->getName() << '\n';
531         else
532           OS << "  Entry Point\n";
533       }
534 
535       if (BB->isLandingPad())
536         OS << "  Landing Pad\n";
537 
538       uint64_t BBExecCount = BB->getExecutionCount();
539       if (hasValidProfile()) {
540         OS << "  Exec Count : ";
541         if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE)
542           OS << BBExecCount << '\n';
543         else
544           OS << "<unknown>\n";
545       }
546       if (hasCFI())
547         OS << "  CFI State : " << BB->getCFIState() << '\n';
548       if (opts::EnableBAT) {
549         OS << "  Input offset: 0x" << Twine::utohexstr(BB->getInputOffset())
550            << "\n";
551       }
552       if (!BB->pred_empty()) {
553         OS << "  Predecessors: ";
554         ListSeparator LS;
555         for (BinaryBasicBlock *Pred : BB->predecessors())
556           OS << LS << Pred->getName();
557         OS << '\n';
558       }
559       if (!BB->throw_empty()) {
560         OS << "  Throwers: ";
561         ListSeparator LS;
562         for (BinaryBasicBlock *Throw : BB->throwers())
563           OS << LS << Throw->getName();
564         OS << '\n';
565       }
566 
567       Offset = alignTo(Offset, BB->getAlignment());
568 
569       // Note: offsets are imprecise since this is happening prior to
570       // relaxation.
571       Offset = BC.printInstructions(OS, BB->begin(), BB->end(), Offset, this);
572 
573       if (!BB->succ_empty()) {
574         OS << "  Successors: ";
575         // For more than 2 successors, sort them based on frequency.
576         std::vector<uint64_t> Indices(BB->succ_size());
577         std::iota(Indices.begin(), Indices.end(), 0);
578         if (BB->succ_size() > 2 && BB->getKnownExecutionCount()) {
579           llvm::stable_sort(Indices, [&](const uint64_t A, const uint64_t B) {
580             return BB->BranchInfo[B] < BB->BranchInfo[A];
581           });
582         }
583         ListSeparator LS;
584         for (unsigned I = 0; I < Indices.size(); ++I) {
585           BinaryBasicBlock *Succ = BB->Successors[Indices[I]];
586           const BinaryBasicBlock::BinaryBranchInfo &BI =
587               BB->BranchInfo[Indices[I]];
588           OS << LS << Succ->getName();
589           if (ExecutionCount != COUNT_NO_PROFILE &&
590               BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
591             OS << " (mispreds: " << BI.MispredictedCount
592                << ", count: " << BI.Count << ")";
593           } else if (ExecutionCount != COUNT_NO_PROFILE &&
594                      BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
595             OS << " (inferred count: " << BI.Count << ")";
596           }
597         }
598         OS << '\n';
599       }
600 
601       if (!BB->lp_empty()) {
602         OS << "  Landing Pads: ";
603         ListSeparator LS;
604         for (BinaryBasicBlock *LP : BB->landing_pads()) {
605           OS << LS << LP->getName();
606           if (ExecutionCount != COUNT_NO_PROFILE) {
607             OS << " (count: " << LP->getExecutionCount() << ")";
608           }
609         }
610         OS << '\n';
611       }
612 
613       // In CFG_Finalized state we can miscalculate CFI state at exit.
614       if (CurrentState == State::CFG && hasCFI()) {
615         const int32_t CFIStateAtExit = BB->getCFIStateAtExit();
616         if (CFIStateAtExit >= 0)
617           OS << "  CFI State: " << CFIStateAtExit << '\n';
618       }
619 
620       OS << '\n';
621     }
622   }
623 
624   // Dump new exception ranges for the function.
625   if (!CallSites.empty()) {
626     OS << "EH table:\n";
627     for (const FunctionFragment &FF : getLayout().fragments()) {
628       for (const auto &FCSI : getCallSites(FF.getFragmentNum())) {
629         const CallSite &CSI = FCSI.second;
630         OS << "  [" << *CSI.Start << ", " << *CSI.End << ") landing pad : ";
631         if (CSI.LP)
632           OS << *CSI.LP;
633         else
634           OS << "0";
635         OS << ", action : " << CSI.Action << '\n';
636       }
637     }
638     OS << '\n';
639   }
640 
641   // Print all jump tables.
642   for (const std::pair<const uint64_t, JumpTable *> &JTI : JumpTables)
643     JTI.second->print(OS);
644 
645   OS << "DWARF CFI Instructions:\n";
646   if (OffsetToCFI.size()) {
647     // Pre-buildCFG information
648     for (const std::pair<const uint32_t, uint32_t> &Elmt : OffsetToCFI) {
649       OS << format("    %08x:\t", Elmt.first);
650       assert(Elmt.second < FrameInstructions.size() && "Incorrect CFI offset");
651       BinaryContext::printCFI(OS, FrameInstructions[Elmt.second]);
652       OS << "\n";
653     }
654   } else {
655     // Post-buildCFG information
656     for (uint32_t I = 0, E = FrameInstructions.size(); I != E; ++I) {
657       const MCCFIInstruction &CFI = FrameInstructions[I];
658       OS << format("    %d:\t", I);
659       BinaryContext::printCFI(OS, CFI);
660       OS << "\n";
661     }
662   }
663   if (FrameInstructions.empty())
664     OS << "    <empty>\n";
665 
666   OS << "End of Function \"" << *this << "\"\n\n";
667 }
668 
669 void BinaryFunction::printRelocations(raw_ostream &OS, uint64_t Offset,
670                                       uint64_t Size) const {
671   const char *Sep = " # Relocs: ";
672 
673   auto RI = Relocations.lower_bound(Offset);
674   while (RI != Relocations.end() && RI->first < Offset + Size) {
675     OS << Sep << "(R: " << RI->second << ")";
676     Sep = ", ";
677     ++RI;
678   }
679 }
680 
681 static std::string mutateDWARFExpressionTargetReg(const MCCFIInstruction &Instr,
682                                                   MCPhysReg NewReg) {
683   StringRef ExprBytes = Instr.getValues();
684   assert(ExprBytes.size() > 1 && "DWARF expression CFI is too short");
685   uint8_t Opcode = ExprBytes[0];
686   assert((Opcode == dwarf::DW_CFA_expression ||
687           Opcode == dwarf::DW_CFA_val_expression) &&
688          "invalid DWARF expression CFI");
689   (void)Opcode;
690   const uint8_t *const Start =
691       reinterpret_cast<const uint8_t *>(ExprBytes.drop_front(1).data());
692   const uint8_t *const End =
693       reinterpret_cast<const uint8_t *>(Start + ExprBytes.size() - 1);
694   unsigned Size = 0;
695   decodeULEB128(Start, &Size, End);
696   assert(Size > 0 && "Invalid reg encoding for DWARF expression CFI");
697   SmallString<8> Tmp;
698   raw_svector_ostream OSE(Tmp);
699   encodeULEB128(NewReg, OSE);
700   return Twine(ExprBytes.slice(0, 1))
701       .concat(OSE.str())
702       .concat(ExprBytes.drop_front(1 + Size))
703       .str();
704 }
705 
706 void BinaryFunction::mutateCFIRegisterFor(const MCInst &Instr,
707                                           MCPhysReg NewReg) {
708   const MCCFIInstruction *OldCFI = getCFIFor(Instr);
709   assert(OldCFI && "invalid CFI instr");
710   switch (OldCFI->getOperation()) {
711   default:
712     llvm_unreachable("Unexpected instruction");
713   case MCCFIInstruction::OpDefCfa:
714     setCFIFor(Instr, MCCFIInstruction::cfiDefCfa(nullptr, NewReg,
715                                                  OldCFI->getOffset()));
716     break;
717   case MCCFIInstruction::OpDefCfaRegister:
718     setCFIFor(Instr, MCCFIInstruction::createDefCfaRegister(nullptr, NewReg));
719     break;
720   case MCCFIInstruction::OpOffset:
721     setCFIFor(Instr, MCCFIInstruction::createOffset(nullptr, NewReg,
722                                                     OldCFI->getOffset()));
723     break;
724   case MCCFIInstruction::OpRegister:
725     setCFIFor(Instr, MCCFIInstruction::createRegister(nullptr, NewReg,
726                                                       OldCFI->getRegister2()));
727     break;
728   case MCCFIInstruction::OpSameValue:
729     setCFIFor(Instr, MCCFIInstruction::createSameValue(nullptr, NewReg));
730     break;
731   case MCCFIInstruction::OpEscape:
732     setCFIFor(Instr,
733               MCCFIInstruction::createEscape(
734                   nullptr,
735                   StringRef(mutateDWARFExpressionTargetReg(*OldCFI, NewReg))));
736     break;
737   case MCCFIInstruction::OpRestore:
738     setCFIFor(Instr, MCCFIInstruction::createRestore(nullptr, NewReg));
739     break;
740   case MCCFIInstruction::OpUndefined:
741     setCFIFor(Instr, MCCFIInstruction::createUndefined(nullptr, NewReg));
742     break;
743   }
744 }
745 
746 const MCCFIInstruction *BinaryFunction::mutateCFIOffsetFor(const MCInst &Instr,
747                                                            int64_t NewOffset) {
748   const MCCFIInstruction *OldCFI = getCFIFor(Instr);
749   assert(OldCFI && "invalid CFI instr");
750   switch (OldCFI->getOperation()) {
751   default:
752     llvm_unreachable("Unexpected instruction");
753   case MCCFIInstruction::OpDefCfaOffset:
754     setCFIFor(Instr, MCCFIInstruction::cfiDefCfaOffset(nullptr, NewOffset));
755     break;
756   case MCCFIInstruction::OpAdjustCfaOffset:
757     setCFIFor(Instr,
758               MCCFIInstruction::createAdjustCfaOffset(nullptr, NewOffset));
759     break;
760   case MCCFIInstruction::OpDefCfa:
761     setCFIFor(Instr, MCCFIInstruction::cfiDefCfa(nullptr, OldCFI->getRegister(),
762                                                  NewOffset));
763     break;
764   case MCCFIInstruction::OpOffset:
765     setCFIFor(Instr, MCCFIInstruction::createOffset(
766                          nullptr, OldCFI->getRegister(), NewOffset));
767     break;
768   }
769   return getCFIFor(Instr);
770 }
771 
772 IndirectBranchType
773 BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
774                                       uint64_t Offset,
775                                       uint64_t &TargetAddress) {
776   const unsigned PtrSize = BC.AsmInfo->getCodePointerSize();
777 
778   // The instruction referencing memory used by the branch instruction.
779   // It could be the branch instruction itself or one of the instructions
780   // setting the value of the register used by the branch.
781   MCInst *MemLocInstr;
782 
783   // Address of the table referenced by MemLocInstr. Could be either an
784   // array of function pointers, or a jump table.
785   uint64_t ArrayStart = 0;
786 
787   unsigned BaseRegNum, IndexRegNum;
788   int64_t DispValue;
789   const MCExpr *DispExpr;
790 
791   // In AArch, identify the instruction adding the PC-relative offset to
792   // jump table entries to correctly decode it.
793   MCInst *PCRelBaseInstr;
794   uint64_t PCRelAddr = 0;
795 
796   auto Begin = Instructions.begin();
797   if (BC.isAArch64()) {
798     PreserveNops = BC.HasRelocations;
799     // Start at the last label as an approximation of the current basic block.
800     // This is a heuristic, since the full set of labels have yet to be
801     // determined
802     for (const uint32_t Offset :
803          llvm::make_first_range(llvm::reverse(Labels))) {
804       auto II = Instructions.find(Offset);
805       if (II != Instructions.end()) {
806         Begin = II;
807         break;
808       }
809     }
810   }
811 
812   IndirectBranchType BranchType = BC.MIB->analyzeIndirectBranch(
813       Instruction, Begin, Instructions.end(), PtrSize, MemLocInstr, BaseRegNum,
814       IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
815 
816   if (BranchType == IndirectBranchType::UNKNOWN && !MemLocInstr)
817     return BranchType;
818 
819   if (MemLocInstr != &Instruction)
820     IndexRegNum = BC.MIB->getNoRegister();
821 
822   if (BC.isAArch64()) {
823     const MCSymbol *Sym = BC.MIB->getTargetSymbol(*PCRelBaseInstr, 1);
824     assert(Sym && "Symbol extraction failed");
825     ErrorOr<uint64_t> SymValueOrError = BC.getSymbolValue(*Sym);
826     if (SymValueOrError) {
827       PCRelAddr = *SymValueOrError;
828     } else {
829       for (std::pair<const uint32_t, MCSymbol *> &Elmt : Labels) {
830         if (Elmt.second == Sym) {
831           PCRelAddr = Elmt.first + getAddress();
832           break;
833         }
834       }
835     }
836     uint64_t InstrAddr = 0;
837     for (auto II = Instructions.rbegin(); II != Instructions.rend(); ++II) {
838       if (&II->second == PCRelBaseInstr) {
839         InstrAddr = II->first + getAddress();
840         break;
841       }
842     }
843     assert(InstrAddr != 0 && "instruction not found");
844     // We do this to avoid spurious references to code locations outside this
845     // function (for example, if the indirect jump lives in the last basic
846     // block of the function, it will create a reference to the next function).
847     // This replaces a symbol reference with an immediate.
848     BC.MIB->replaceMemOperandDisp(*PCRelBaseInstr,
849                                   MCOperand::createImm(PCRelAddr - InstrAddr));
850     // FIXME: Disable full jump table processing for AArch64 until we have a
851     // proper way of determining the jump table limits.
852     return IndirectBranchType::UNKNOWN;
853   }
854 
855   auto getExprValue = [&](const MCExpr *Expr) {
856     const MCSymbol *TargetSym;
857     uint64_t TargetOffset;
858     std::tie(TargetSym, TargetOffset) = BC.MIB->getTargetSymbolInfo(Expr);
859     ErrorOr<uint64_t> SymValueOrError = BC.getSymbolValue(*TargetSym);
860     assert(SymValueOrError && "Global symbol needs a value");
861     return *SymValueOrError + TargetOffset;
862   };
863 
864   // RIP-relative addressing should be converted to symbol form by now
865   // in processed instructions (but not in jump).
866   if (DispExpr) {
867     ArrayStart = getExprValue(DispExpr);
868     BaseRegNum = BC.MIB->getNoRegister();
869     if (BC.isAArch64()) {
870       ArrayStart &= ~0xFFFULL;
871       ArrayStart += DispValue & 0xFFFULL;
872     }
873   } else {
874     ArrayStart = static_cast<uint64_t>(DispValue);
875   }
876 
877   if (BaseRegNum == BC.MRI->getProgramCounter())
878     ArrayStart += getAddress() + Offset + Size;
879 
880   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addressed memory is 0x"
881                     << Twine::utohexstr(ArrayStart) << '\n');
882 
883   ErrorOr<BinarySection &> Section = BC.getSectionForAddress(ArrayStart);
884   if (!Section) {
885     // No section - possibly an absolute address. Since we don't allow
886     // internal function addresses to escape the function scope - we
887     // consider it a tail call.
888     if (opts::Verbosity >= 1) {
889       BC.errs() << "BOLT-WARNING: no section for address 0x"
890                 << Twine::utohexstr(ArrayStart) << " referenced from function "
891                 << *this << '\n';
892     }
893     return IndirectBranchType::POSSIBLE_TAIL_CALL;
894   }
895   if (Section->isVirtual()) {
896     // The contents are filled at runtime.
897     return IndirectBranchType::POSSIBLE_TAIL_CALL;
898   }
899 
900   if (BranchType == IndirectBranchType::POSSIBLE_FIXED_BRANCH) {
901     ErrorOr<uint64_t> Value = BC.getPointerAtAddress(ArrayStart);
902     if (!Value)
903       return IndirectBranchType::UNKNOWN;
904 
905     if (BC.getSectionForAddress(ArrayStart)->isWritable())
906       return IndirectBranchType::UNKNOWN;
907 
908     BC.outs() << "BOLT-INFO: fixed indirect branch detected in " << *this
909               << " at 0x" << Twine::utohexstr(getAddress() + Offset)
910               << " referencing data at 0x" << Twine::utohexstr(ArrayStart)
911               << " the destination value is 0x" << Twine::utohexstr(*Value)
912               << '\n';
913 
914     TargetAddress = *Value;
915     return BranchType;
916   }
917 
918   // Check if there's already a jump table registered at this address.
919   MemoryContentsType MemType;
920   if (JumpTable *JT = BC.getJumpTableContainingAddress(ArrayStart)) {
921     switch (JT->Type) {
922     case JumpTable::JTT_NORMAL:
923       MemType = MemoryContentsType::POSSIBLE_JUMP_TABLE;
924       break;
925     case JumpTable::JTT_PIC:
926       MemType = MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
927       break;
928     }
929   } else {
930     MemType = BC.analyzeMemoryAt(ArrayStart, *this);
931   }
932 
933   // Check that jump table type in instruction pattern matches memory contents.
934   JumpTable::JumpTableType JTType;
935   if (BranchType == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
936     if (MemType != MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
937       return IndirectBranchType::UNKNOWN;
938     JTType = JumpTable::JTT_PIC;
939   } else {
940     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
941       return IndirectBranchType::UNKNOWN;
942 
943     if (MemType == MemoryContentsType::UNKNOWN)
944       return IndirectBranchType::POSSIBLE_TAIL_CALL;
945 
946     BranchType = IndirectBranchType::POSSIBLE_JUMP_TABLE;
947     JTType = JumpTable::JTT_NORMAL;
948   }
949 
950   // Convert the instruction into jump table branch.
951   const MCSymbol *JTLabel = BC.getOrCreateJumpTable(*this, ArrayStart, JTType);
952   BC.MIB->replaceMemOperandDisp(*MemLocInstr, JTLabel, BC.Ctx.get());
953   BC.MIB->setJumpTable(Instruction, ArrayStart, IndexRegNum);
954 
955   JTSites.emplace_back(Offset, ArrayStart);
956 
957   return BranchType;
958 }
959 
960 MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address,
961                                                 bool CreatePastEnd) {
962   const uint64_t Offset = Address - getAddress();
963 
964   if ((Offset == getSize()) && CreatePastEnd)
965     return getFunctionEndLabel();
966 
967   auto LI = Labels.find(Offset);
968   if (LI != Labels.end())
969     return LI->second;
970 
971   // For AArch64, check if this address is part of a constant island.
972   if (BC.isAArch64()) {
973     if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address))
974       return IslandSym;
975   }
976 
977   MCSymbol *Label = BC.Ctx->createNamedTempSymbol();
978   Labels[Offset] = Label;
979 
980   return Label;
981 }
982 
983 ErrorOr<ArrayRef<uint8_t>> BinaryFunction::getData() const {
984   BinarySection &Section = *getOriginSection();
985   assert(Section.containsRange(getAddress(), getMaxSize()) &&
986          "wrong section for function");
987 
988   if (!Section.isText() || Section.isVirtual() || !Section.getSize())
989     return std::make_error_code(std::errc::bad_address);
990 
991   StringRef SectionContents = Section.getContents();
992 
993   assert(SectionContents.size() == Section.getSize() &&
994          "section size mismatch");
995 
996   // Function offset from the section start.
997   uint64_t Offset = getAddress() - Section.getAddress();
998   auto *Bytes = reinterpret_cast<const uint8_t *>(SectionContents.data());
999   return ArrayRef<uint8_t>(Bytes + Offset, getMaxSize());
1000 }
1001 
1002 size_t BinaryFunction::getSizeOfDataInCodeAt(uint64_t Offset) const {
1003   if (!Islands)
1004     return 0;
1005 
1006   if (!llvm::is_contained(Islands->DataOffsets, Offset))
1007     return 0;
1008 
1009   auto Iter = Islands->CodeOffsets.upper_bound(Offset);
1010   if (Iter != Islands->CodeOffsets.end())
1011     return *Iter - Offset;
1012   return getSize() - Offset;
1013 }
1014 
1015 bool BinaryFunction::isZeroPaddingAt(uint64_t Offset) const {
1016   ArrayRef<uint8_t> FunctionData = *getData();
1017   uint64_t EndOfCode = getSize();
1018   if (Islands) {
1019     auto Iter = Islands->DataOffsets.upper_bound(Offset);
1020     if (Iter != Islands->DataOffsets.end())
1021       EndOfCode = *Iter;
1022   }
1023   for (uint64_t I = Offset; I < EndOfCode; ++I)
1024     if (FunctionData[I] != 0)
1025       return false;
1026 
1027   return true;
1028 }
1029 
1030 Error BinaryFunction::handlePCRelOperand(MCInst &Instruction, uint64_t Address,
1031                                          uint64_t Size) {
1032   auto &MIB = BC.MIB;
1033   uint64_t TargetAddress = 0;
1034   if (!MIB->evaluateMemOperandTarget(Instruction, TargetAddress, Address,
1035                                      Size)) {
1036     std::string Msg;
1037     raw_string_ostream SS(Msg);
1038     SS << "BOLT-ERROR: PC-relative operand can't be evaluated:\n";
1039     BC.InstPrinter->printInst(&Instruction, 0, "", *BC.STI, SS);
1040     SS << '\n';
1041     Instruction.dump_pretty(SS, BC.InstPrinter.get());
1042     SS << '\n';
1043     SS << "BOLT-ERROR: cannot handle PC-relative operand at 0x"
1044        << Twine::utohexstr(Address) << ". Skipping function " << *this << ".\n";
1045     if (BC.HasRelocations)
1046       return createFatalBOLTError(Msg);
1047     IsSimple = false;
1048     return createNonFatalBOLTError(Msg);
1049   }
1050   if (TargetAddress == 0 && opts::Verbosity >= 1) {
1051     BC.outs() << "BOLT-INFO: PC-relative operand is zero in function " << *this
1052               << '\n';
1053   }
1054 
1055   const MCSymbol *TargetSymbol;
1056   uint64_t TargetOffset;
1057   std::tie(TargetSymbol, TargetOffset) =
1058       BC.handleAddressRef(TargetAddress, *this, /*IsPCRel*/ true);
1059 
1060   bool ReplaceSuccess = MIB->replaceMemOperandDisp(
1061       Instruction, TargetSymbol, static_cast<int64_t>(TargetOffset), &*BC.Ctx);
1062   (void)ReplaceSuccess;
1063   assert(ReplaceSuccess && "Failed to replace mem operand with symbol+off.");
1064   return Error::success();
1065 }
1066 
1067 MCSymbol *BinaryFunction::handleExternalReference(MCInst &Instruction,
1068                                                   uint64_t Size,
1069                                                   uint64_t Offset,
1070                                                   uint64_t TargetAddress,
1071                                                   bool &IsCall) {
1072   auto &MIB = BC.MIB;
1073 
1074   const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1075   BC.addInterproceduralReference(this, TargetAddress);
1076   if (opts::Verbosity >= 2 && !IsCall && Size == 2 && !BC.HasRelocations) {
1077     BC.errs() << "BOLT-WARNING: relaxed tail call detected at 0x"
1078               << Twine::utohexstr(AbsoluteInstrAddr) << " in function " << *this
1079               << ". Code size will be increased.\n";
1080   }
1081 
1082   assert(!MIB->isTailCall(Instruction) &&
1083          "synthetic tail call instruction found");
1084 
1085   // This is a call regardless of the opcode.
1086   // Assign proper opcode for tail calls, so that they could be
1087   // treated as calls.
1088   if (!IsCall) {
1089     if (!MIB->convertJmpToTailCall(Instruction)) {
1090       assert(MIB->isConditionalBranch(Instruction) &&
1091              "unknown tail call instruction");
1092       if (opts::Verbosity >= 2) {
1093         BC.errs() << "BOLT-WARNING: conditional tail call detected in "
1094                   << "function " << *this << " at 0x"
1095                   << Twine::utohexstr(AbsoluteInstrAddr) << ".\n";
1096       }
1097     }
1098     IsCall = true;
1099   }
1100 
1101   if (opts::Verbosity >= 2 && TargetAddress == 0) {
1102     // We actually see calls to address 0 in presence of weak
1103     // symbols originating from libraries. This code is never meant
1104     // to be executed.
1105     BC.outs() << "BOLT-INFO: Function " << *this
1106               << " has a call to address zero.\n";
1107   }
1108 
1109   return BC.getOrCreateGlobalSymbol(TargetAddress, "FUNCat");
1110 }
1111 
1112 void BinaryFunction::handleIndirectBranch(MCInst &Instruction, uint64_t Size,
1113                                           uint64_t Offset) {
1114   auto &MIB = BC.MIB;
1115   uint64_t IndirectTarget = 0;
1116   IndirectBranchType Result =
1117       processIndirectBranch(Instruction, Size, Offset, IndirectTarget);
1118   switch (Result) {
1119   default:
1120     llvm_unreachable("unexpected result");
1121   case IndirectBranchType::POSSIBLE_TAIL_CALL: {
1122     bool Result = MIB->convertJmpToTailCall(Instruction);
1123     (void)Result;
1124     assert(Result);
1125     break;
1126   }
1127   case IndirectBranchType::POSSIBLE_JUMP_TABLE:
1128   case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE:
1129     if (opts::JumpTables == JTS_NONE)
1130       IsSimple = false;
1131     break;
1132   case IndirectBranchType::POSSIBLE_FIXED_BRANCH: {
1133     if (containsAddress(IndirectTarget)) {
1134       const MCSymbol *TargetSymbol = getOrCreateLocalLabel(IndirectTarget);
1135       Instruction.clear();
1136       MIB->createUncondBranch(Instruction, TargetSymbol, BC.Ctx.get());
1137       TakenBranches.emplace_back(Offset, IndirectTarget - getAddress());
1138       addEntryPointAtOffset(IndirectTarget - getAddress());
1139     } else {
1140       MIB->convertJmpToTailCall(Instruction);
1141       BC.addInterproceduralReference(this, IndirectTarget);
1142     }
1143     break;
1144   }
1145   case IndirectBranchType::UNKNOWN:
1146     // Keep processing. We'll do more checks and fixes in
1147     // postProcessIndirectBranches().
1148     UnknownIndirectBranchOffsets.emplace(Offset);
1149     break;
1150   }
1151 }
1152 
1153 void BinaryFunction::handleAArch64IndirectCall(MCInst &Instruction,
1154                                                const uint64_t Offset) {
1155   auto &MIB = BC.MIB;
1156   const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1157   MCInst *TargetHiBits, *TargetLowBits;
1158   uint64_t TargetAddress, Count;
1159   Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1160                                  AbsoluteInstrAddr, Instruction, TargetHiBits,
1161                                  TargetLowBits, TargetAddress);
1162   if (Count) {
1163     MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1164     --Count;
1165     for (auto It = std::prev(Instructions.end()); Count != 0;
1166          It = std::prev(It), --Count) {
1167       MIB->addAnnotation(It->second, "AArch64Veneer", true);
1168     }
1169 
1170     BC.addAdrpAddRelocAArch64(*this, *TargetLowBits, *TargetHiBits,
1171                               TargetAddress);
1172   }
1173 }
1174 
1175 std::optional<MCInst>
1176 BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const {
1177   assert(CurrentState == State::Empty && "Function should not be disassembled");
1178   assert(Offset < MaxSize && "Invalid offset");
1179   ErrorOr<ArrayRef<unsigned char>> FunctionData = getData();
1180   assert(FunctionData && "Cannot get function as data");
1181   MCInst Instr;
1182   uint64_t InstrSize = 0;
1183   const uint64_t InstrAddress = getAddress() + Offset;
1184   if (BC.DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
1185                                 InstrAddress, nulls()))
1186     return Instr;
1187   return std::nullopt;
1188 }
1189 
1190 Error BinaryFunction::disassemble() {
1191   NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
1192                      "Build Binary Functions", opts::TimeBuild);
1193   ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
1194   assert(ErrorOrFunctionData && "function data is not available");
1195   ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
1196   assert(FunctionData.size() == getMaxSize() &&
1197          "function size does not match raw data size");
1198 
1199   auto &Ctx = BC.Ctx;
1200   auto &MIB = BC.MIB;
1201 
1202   BC.SymbolicDisAsm->setSymbolizer(MIB->createTargetSymbolizer(*this));
1203 
1204   // Insert a label at the beginning of the function. This will be our first
1205   // basic block.
1206   Labels[0] = Ctx->createNamedTempSymbol("BB0");
1207 
1208   // Map offsets in the function to a label that should always point to the
1209   // corresponding instruction. This is used for labels that shouldn't point to
1210   // the start of a basic block but always to a specific instruction. This is
1211   // used, for example, on RISC-V where %pcrel_lo relocations point to the
1212   // corresponding %pcrel_hi.
1213   LabelsMapType InstructionLabels;
1214 
1215   uint64_t Size = 0; // instruction size
1216   for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
1217     MCInst Instruction;
1218     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1219 
1220     // Check for data inside code and ignore it
1221     if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
1222       Size = DataInCodeSize;
1223       continue;
1224     }
1225 
1226     if (!BC.SymbolicDisAsm->getInstruction(Instruction, Size,
1227                                            FunctionData.slice(Offset),
1228                                            AbsoluteInstrAddr, nulls())) {
1229       // Functions with "soft" boundaries, e.g. coming from assembly source,
1230       // can have 0-byte padding at the end.
1231       if (isZeroPaddingAt(Offset))
1232         break;
1233 
1234       BC.errs()
1235           << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
1236           << Twine::utohexstr(Offset) << " (address 0x"
1237           << Twine::utohexstr(AbsoluteInstrAddr) << ") in function " << *this
1238           << '\n';
1239       // Some AVX-512 instructions could not be disassembled at all.
1240       if (BC.HasRelocations && opts::TrapOnAVX512 && BC.isX86()) {
1241         setTrapOnEntry();
1242         BC.TrappedFunctions.push_back(this);
1243       } else {
1244         setIgnored();
1245       }
1246 
1247       break;
1248     }
1249 
1250     // Check integrity of LLVM assembler/disassembler.
1251     if (opts::CheckEncoding && !BC.MIB->isBranch(Instruction) &&
1252         !BC.MIB->isCall(Instruction) && !BC.MIB->isNoop(Instruction)) {
1253       if (!BC.validateInstructionEncoding(FunctionData.slice(Offset, Size))) {
1254         BC.errs() << "BOLT-WARNING: mismatching LLVM encoding detected in "
1255                   << "function " << *this << " for instruction :\n";
1256         BC.printInstruction(BC.errs(), Instruction, AbsoluteInstrAddr);
1257         BC.errs() << '\n';
1258       }
1259     }
1260 
1261     // Special handling for AVX-512 instructions.
1262     if (MIB->hasEVEXEncoding(Instruction)) {
1263       if (BC.HasRelocations && opts::TrapOnAVX512) {
1264         setTrapOnEntry();
1265         BC.TrappedFunctions.push_back(this);
1266         break;
1267       }
1268 
1269       if (!BC.validateInstructionEncoding(FunctionData.slice(Offset, Size))) {
1270         BC.errs() << "BOLT-WARNING: internal assembler/disassembler error "
1271                      "detected for AVX512 instruction:\n";
1272         BC.printInstruction(BC.errs(), Instruction, AbsoluteInstrAddr);
1273         BC.errs() << " in function " << *this << '\n';
1274         setIgnored();
1275         break;
1276       }
1277     }
1278 
1279     if (MIB->isBranch(Instruction) || MIB->isCall(Instruction)) {
1280       uint64_t TargetAddress = 0;
1281       if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
1282                               TargetAddress)) {
1283         // Check if the target is within the same function. Otherwise it's
1284         // a call, possibly a tail call.
1285         //
1286         // If the target *is* the function address it could be either a branch
1287         // or a recursive call.
1288         bool IsCall = MIB->isCall(Instruction);
1289         const bool IsCondBranch = MIB->isConditionalBranch(Instruction);
1290         MCSymbol *TargetSymbol = nullptr;
1291 
1292         if (!BC.MIB->isReversibleBranch(Instruction)) {
1293           setIgnored();
1294           if (BinaryFunction *TargetFunc =
1295                   BC.getBinaryFunctionContainingAddress(TargetAddress))
1296             TargetFunc->setIgnored();
1297         }
1298 
1299         if (IsCall && containsAddress(TargetAddress)) {
1300           if (TargetAddress == getAddress()) {
1301             // Recursive call.
1302             TargetSymbol = getSymbol();
1303           } else {
1304             if (BC.isX86()) {
1305               // Dangerous old-style x86 PIC code. We may need to freeze this
1306               // function, so preserve the function as is for now.
1307               PreserveNops = true;
1308             } else {
1309               BC.errs() << "BOLT-WARNING: internal call detected at 0x"
1310                         << Twine::utohexstr(AbsoluteInstrAddr)
1311                         << " in function " << *this << ". Skipping.\n";
1312               IsSimple = false;
1313             }
1314           }
1315         }
1316 
1317         if (!TargetSymbol) {
1318           // Create either local label or external symbol.
1319           if (containsAddress(TargetAddress)) {
1320             TargetSymbol = getOrCreateLocalLabel(TargetAddress);
1321           } else {
1322             if (TargetAddress == getAddress() + getSize() &&
1323                 TargetAddress < getAddress() + getMaxSize() &&
1324                 !(BC.isAArch64() &&
1325                   BC.handleAArch64Veneer(TargetAddress, /*MatchOnly*/ true))) {
1326               // Result of __builtin_unreachable().
1327               LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump past end detected at 0x"
1328                                 << Twine::utohexstr(AbsoluteInstrAddr)
1329                                 << " in function " << *this
1330                                 << " : replacing with nop.\n");
1331               BC.MIB->createNoop(Instruction);
1332               if (IsCondBranch) {
1333                 // Register branch offset for profile validation.
1334                 IgnoredBranches.emplace_back(Offset, Offset + Size);
1335               }
1336               goto add_instruction;
1337             }
1338             // May update Instruction and IsCall
1339             TargetSymbol = handleExternalReference(Instruction, Size, Offset,
1340                                                    TargetAddress, IsCall);
1341           }
1342         }
1343 
1344         if (!IsCall) {
1345           // Add taken branch info.
1346           TakenBranches.emplace_back(Offset, TargetAddress - getAddress());
1347         }
1348         BC.MIB->replaceBranchTarget(Instruction, TargetSymbol, &*Ctx);
1349 
1350         // Mark CTC.
1351         if (IsCondBranch && IsCall)
1352           MIB->setConditionalTailCall(Instruction, TargetAddress);
1353       } else {
1354         // Could not evaluate branch. Should be an indirect call or an
1355         // indirect branch. Bail out on the latter case.
1356         if (MIB->isIndirectBranch(Instruction))
1357           handleIndirectBranch(Instruction, Size, Offset);
1358         // Indirect call. We only need to fix it if the operand is RIP-relative.
1359         if (IsSimple && MIB->hasPCRelOperand(Instruction)) {
1360           if (auto NewE = handleErrors(
1361                   handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size),
1362                   [&](const BOLTError &E) -> Error {
1363                     if (E.isFatal())
1364                       return Error(std::make_unique<BOLTError>(std::move(E)));
1365                     if (!E.getMessage().empty())
1366                       E.log(BC.errs());
1367                     return Error::success();
1368                   })) {
1369             return Error(std::move(NewE));
1370           }
1371         }
1372 
1373         if (BC.isAArch64())
1374           handleAArch64IndirectCall(Instruction, Offset);
1375       }
1376     } else if (BC.isAArch64() || BC.isRISCV()) {
1377       // Check if there's a relocation associated with this instruction.
1378       bool UsedReloc = false;
1379       for (auto Itr = Relocations.lower_bound(Offset),
1380                 ItrE = Relocations.lower_bound(Offset + Size);
1381            Itr != ItrE; ++Itr) {
1382         const Relocation &Relocation = Itr->second;
1383         MCSymbol *Symbol = Relocation.Symbol;
1384 
1385         if (Relocation::isInstructionReference(Relocation.Type)) {
1386           uint64_t RefOffset = Relocation.Value - getAddress();
1387           LabelsMapType::iterator LI = InstructionLabels.find(RefOffset);
1388 
1389           if (LI == InstructionLabels.end()) {
1390             Symbol = BC.Ctx->createNamedTempSymbol();
1391             InstructionLabels.emplace(RefOffset, Symbol);
1392           } else {
1393             Symbol = LI->second;
1394           }
1395         }
1396 
1397         int64_t Value = Relocation.Value;
1398         const bool Result = BC.MIB->replaceImmWithSymbolRef(
1399             Instruction, Symbol, Relocation.Addend, Ctx.get(), Value,
1400             Relocation.Type);
1401         (void)Result;
1402         assert(Result && "cannot replace immediate with relocation");
1403 
1404         // For aarch64, if we replaced an immediate with a symbol from a
1405         // relocation, we mark it so we do not try to further process a
1406         // pc-relative operand. All we need is the symbol.
1407         UsedReloc = true;
1408       }
1409 
1410       if (!BC.isRISCV() && MIB->hasPCRelOperand(Instruction) && !UsedReloc) {
1411         if (auto NewE = handleErrors(
1412                 handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size),
1413                 [&](const BOLTError &E) -> Error {
1414                   if (E.isFatal())
1415                     return Error(std::make_unique<BOLTError>(std::move(E)));
1416                   if (!E.getMessage().empty())
1417                     E.log(BC.errs());
1418                   return Error::success();
1419                 }))
1420           return Error(std::move(NewE));
1421       }
1422     }
1423 
1424 add_instruction:
1425     if (getDWARFLineTable()) {
1426       Instruction.setLoc(findDebugLineInformationForInstructionAt(
1427           AbsoluteInstrAddr, getDWARFUnit(), getDWARFLineTable()));
1428     }
1429 
1430     // Record offset of the instruction for profile matching.
1431     if (BC.keepOffsetForInstruction(Instruction))
1432       MIB->setOffset(Instruction, static_cast<uint32_t>(Offset));
1433 
1434     if (BC.isX86() && BC.MIB->isNoop(Instruction)) {
1435       // NOTE: disassembly loses the correct size information for noops on x86.
1436       //       E.g. nopw 0x0(%rax,%rax,1) is 9 bytes, but re-encoded it's only
1437       //       5 bytes. Preserve the size info using annotations.
1438       MIB->setSize(Instruction, Size);
1439     }
1440 
1441     addInstruction(Offset, std::move(Instruction));
1442   }
1443 
1444   for (auto [Offset, Label] : InstructionLabels) {
1445     InstrMapType::iterator II = Instructions.find(Offset);
1446     assert(II != Instructions.end() && "reference to non-existing instruction");
1447 
1448     BC.MIB->setInstLabel(II->second, Label);
1449   }
1450 
1451   // Reset symbolizer for the disassembler.
1452   BC.SymbolicDisAsm->setSymbolizer(nullptr);
1453 
1454   if (uint64_t Offset = getFirstInstructionOffset())
1455     Labels[Offset] = BC.Ctx->createNamedTempSymbol();
1456 
1457   clearList(Relocations);
1458 
1459   if (!IsSimple) {
1460     clearList(Instructions);
1461     return createNonFatalBOLTError("");
1462   }
1463 
1464   updateState(State::Disassembled);
1465 
1466   return Error::success();
1467 }
1468 
1469 MCSymbol *BinaryFunction::registerBranch(uint64_t Src, uint64_t Dst) {
1470   assert(CurrentState == State::Disassembled &&
1471          "Cannot register branch unless function is in disassembled state.");
1472   assert(containsAddress(Src) && containsAddress(Dst) &&
1473          "Cannot register external branch.");
1474   MCSymbol *Target = getOrCreateLocalLabel(Dst);
1475   TakenBranches.emplace_back(Src - getAddress(), Dst - getAddress());
1476   return Target;
1477 }
1478 
1479 bool BinaryFunction::scanExternalRefs() {
1480   bool Success = true;
1481   bool DisassemblyFailed = false;
1482 
1483   // Ignore pseudo functions.
1484   if (isPseudo())
1485     return Success;
1486 
1487   if (opts::NoScan) {
1488     clearList(Relocations);
1489     clearList(ExternallyReferencedOffsets);
1490 
1491     return false;
1492   }
1493 
1494   // List of external references for this function.
1495   std::vector<Relocation> FunctionRelocations;
1496 
1497   static BinaryContext::IndependentCodeEmitter Emitter =
1498       BC.createIndependentMCCodeEmitter();
1499 
1500   ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
1501   assert(ErrorOrFunctionData && "function data is not available");
1502   ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
1503   assert(FunctionData.size() == getMaxSize() &&
1504          "function size does not match raw data size");
1505 
1506   BC.SymbolicDisAsm->setSymbolizer(
1507       BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));
1508 
1509   // Disassemble contents of the function. Detect code entry points and create
1510   // relocations for references to code that will be moved.
1511   uint64_t Size = 0; // instruction size
1512   for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
1513     // Check for data inside code and ignore it
1514     if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
1515       Size = DataInCodeSize;
1516       continue;
1517     }
1518 
1519     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1520     MCInst Instruction;
1521     if (!BC.SymbolicDisAsm->getInstruction(Instruction, Size,
1522                                            FunctionData.slice(Offset),
1523                                            AbsoluteInstrAddr, nulls())) {
1524       if (opts::Verbosity >= 1 && !isZeroPaddingAt(Offset)) {
1525         BC.errs()
1526             << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
1527             << Twine::utohexstr(Offset) << " (address 0x"
1528             << Twine::utohexstr(AbsoluteInstrAddr) << ") in function " << *this
1529             << '\n';
1530       }
1531       Success = false;
1532       DisassemblyFailed = true;
1533       break;
1534     }
1535 
1536     // Return true if we can skip handling the Target function reference.
1537     auto ignoreFunctionRef = [&](const BinaryFunction &Target) {
1538       if (&Target == this)
1539         return true;
1540 
1541       // Note that later we may decide not to emit Target function. In that
1542       // case, we conservatively create references that will be ignored or
1543       // resolved to the same function.
1544       if (!BC.shouldEmit(Target))
1545         return true;
1546 
1547       return false;
1548     };
1549 
1550     // Return true if we can ignore reference to the symbol.
1551     auto ignoreReference = [&](const MCSymbol *TargetSymbol) {
1552       if (!TargetSymbol)
1553         return true;
1554 
1555       if (BC.forceSymbolRelocations(TargetSymbol->getName()))
1556         return false;
1557 
1558       BinaryFunction *TargetFunction = BC.getFunctionForSymbol(TargetSymbol);
1559       if (!TargetFunction)
1560         return true;
1561 
1562       return ignoreFunctionRef(*TargetFunction);
1563     };
1564 
1565     // Handle calls and branches separately as symbolization doesn't work for
1566     // them yet.
1567     MCSymbol *BranchTargetSymbol = nullptr;
1568     if (BC.MIB->isCall(Instruction) || BC.MIB->isBranch(Instruction)) {
1569       uint64_t TargetAddress = 0;
1570       BC.MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
1571                              TargetAddress);
1572 
1573       // Create an entry point at reference address if needed.
1574       BinaryFunction *TargetFunction =
1575           BC.getBinaryFunctionContainingAddress(TargetAddress);
1576 
1577       if (!TargetFunction || ignoreFunctionRef(*TargetFunction))
1578         continue;
1579 
1580       const uint64_t FunctionOffset =
1581           TargetAddress - TargetFunction->getAddress();
1582       BranchTargetSymbol =
1583           FunctionOffset ? TargetFunction->addEntryPointAtOffset(FunctionOffset)
1584                          : TargetFunction->getSymbol();
1585     }
1586 
1587     // Can't find more references. Not creating relocations since we are not
1588     // moving code.
1589     if (!BC.HasRelocations)
1590       continue;
1591 
1592     if (BranchTargetSymbol) {
1593       BC.MIB->replaceBranchTarget(Instruction, BranchTargetSymbol,
1594                                   Emitter.LocalCtx.get());
1595     } else if (!llvm::any_of(Instruction,
1596                              [](const MCOperand &Op) { return Op.isExpr(); })) {
1597       // Skip assembly if the instruction may not have any symbolic operands.
1598       continue;
1599     }
1600 
1601     // Emit the instruction using temp emitter and generate relocations.
1602     SmallString<256> Code;
1603     SmallVector<MCFixup, 4> Fixups;
1604     Emitter.MCE->encodeInstruction(Instruction, Code, Fixups, *BC.STI);
1605 
1606     // Create relocation for every fixup.
1607     for (const MCFixup &Fixup : Fixups) {
1608       std::optional<Relocation> Rel = BC.MIB->createRelocation(Fixup, *BC.MAB);
1609       if (!Rel) {
1610         Success = false;
1611         continue;
1612       }
1613 
1614       if (ignoreReference(Rel->Symbol))
1615         continue;
1616 
1617       if (Relocation::getSizeForType(Rel->Type) < 4) {
1618         // If the instruction uses a short form, then we might not be able
1619         // to handle the rewrite without relaxation, and hence cannot reliably
1620         // create an external reference relocation.
1621         Success = false;
1622         continue;
1623       }
1624       Rel->Offset += getAddress() - getOriginSection()->getAddress() + Offset;
1625       FunctionRelocations.push_back(*Rel);
1626     }
1627 
1628     if (!Success)
1629       break;
1630   }
1631 
1632   // Reset symbolizer for the disassembler.
1633   BC.SymbolicDisAsm->setSymbolizer(nullptr);
1634 
1635   // Add relocations unless disassembly failed for this function.
1636   if (!DisassemblyFailed)
1637     for (Relocation &Rel : FunctionRelocations)
1638       getOriginSection()->addPendingRelocation(Rel);
1639 
1640   // Inform BinaryContext that this function symbols will not be defined and
1641   // relocations should not be created against them.
1642   if (BC.HasRelocations) {
1643     for (std::pair<const uint32_t, MCSymbol *> &LI : Labels)
1644       BC.UndefinedSymbols.insert(LI.second);
1645     for (MCSymbol *const EndLabel : FunctionEndLabels)
1646       if (EndLabel)
1647         BC.UndefinedSymbols.insert(EndLabel);
1648   }
1649 
1650   clearList(Relocations);
1651   clearList(ExternallyReferencedOffsets);
1652 
1653   if (Success && BC.HasRelocations)
1654     HasExternalRefRelocations = true;
1655 
1656   if (opts::Verbosity >= 1 && !Success)
1657     BC.outs() << "BOLT-INFO: failed to scan refs for  " << *this << '\n';
1658 
1659   return Success;
1660 }
1661 
1662 void BinaryFunction::postProcessEntryPoints() {
1663   if (!isSimple())
1664     return;
1665 
1666   for (auto &KV : Labels) {
1667     MCSymbol *Label = KV.second;
1668     if (!getSecondaryEntryPointSymbol(Label))
1669       continue;
1670 
1671     // In non-relocation mode there's potentially an external undetectable
1672     // reference to the entry point and hence we cannot move this entry
1673     // point. Optimizing without moving could be difficult.
1674     // In BAT mode, register any known entry points for CFG construction.
1675     if (!BC.HasRelocations && !BC.HasBATSection)
1676       setSimple(false);
1677 
1678     const uint32_t Offset = KV.first;
1679 
1680     // If we are at Offset 0 and there is no instruction associated with it,
1681     // this means this is an empty function. Just ignore. If we find an
1682     // instruction at this offset, this entry point is valid.
1683     if (!Offset || getInstructionAtOffset(Offset))
1684       continue;
1685 
1686     // On AArch64 there are legitimate reasons to have references past the
1687     // end of the function, e.g. jump tables.
1688     if (BC.isAArch64() && Offset == getSize())
1689       continue;
1690 
1691     BC.errs() << "BOLT-WARNING: reference in the middle of instruction "
1692                  "detected in function "
1693               << *this << " at offset 0x" << Twine::utohexstr(Offset) << '\n';
1694     if (BC.HasRelocations)
1695       setIgnored();
1696     setSimple(false);
1697     return;
1698   }
1699 }
1700 
1701 void BinaryFunction::postProcessJumpTables() {
1702   // Create labels for all entries.
1703   for (auto &JTI : JumpTables) {
1704     JumpTable &JT = *JTI.second;
1705     if (JT.Type == JumpTable::JTT_PIC && opts::JumpTables == JTS_BASIC) {
1706       opts::JumpTables = JTS_MOVE;
1707       BC.outs() << "BOLT-INFO: forcing -jump-tables=move as PIC jump table was "
1708                    "detected in function "
1709                 << *this << '\n';
1710     }
1711     const uint64_t BDSize =
1712         BC.getBinaryDataAtAddress(JT.getAddress())->getSize();
1713     if (!BDSize) {
1714       BC.setBinaryDataSize(JT.getAddress(), JT.getSize());
1715     } else {
1716       assert(BDSize >= JT.getSize() &&
1717              "jump table cannot be larger than the containing object");
1718     }
1719     if (!JT.Entries.empty())
1720       continue;
1721 
1722     bool HasOneParent = (JT.Parents.size() == 1);
1723     for (uint64_t EntryAddress : JT.EntriesAsAddress) {
1724       // builtin_unreachable does not belong to any function
1725       // Need to handle separately
1726       bool IsBuiltinUnreachable =
1727           llvm::any_of(JT.Parents, [&](const BinaryFunction *Parent) {
1728             return EntryAddress == Parent->getAddress() + Parent->getSize();
1729           });
1730       if (IsBuiltinUnreachable) {
1731         MCSymbol *Label = getOrCreateLocalLabel(EntryAddress, true);
1732         JT.Entries.push_back(Label);
1733         continue;
1734       }
1735       // Create a local label for targets that cannot be reached by other
1736       // fragments. Otherwise, create a secondary entry point in the target
1737       // function.
1738       BinaryFunction *TargetBF =
1739           BC.getBinaryFunctionContainingAddress(EntryAddress);
1740       MCSymbol *Label;
1741       if (HasOneParent && TargetBF == this) {
1742         Label = getOrCreateLocalLabel(EntryAddress, true);
1743       } else {
1744         const uint64_t Offset = EntryAddress - TargetBF->getAddress();
1745         Label = Offset ? TargetBF->addEntryPointAtOffset(Offset)
1746                        : TargetBF->getSymbol();
1747       }
1748       JT.Entries.push_back(Label);
1749     }
1750   }
1751 
1752   // Add TakenBranches from JumpTables.
1753   //
1754   // We want to do it after initial processing since we don't know jump tables'
1755   // boundaries until we process them all.
1756   for (auto &JTSite : JTSites) {
1757     const uint64_t JTSiteOffset = JTSite.first;
1758     const uint64_t JTAddress = JTSite.second;
1759     const JumpTable *JT = getJumpTableContainingAddress(JTAddress);
1760     assert(JT && "cannot find jump table for address");
1761 
1762     uint64_t EntryOffset = JTAddress - JT->getAddress();
1763     while (EntryOffset < JT->getSize()) {
1764       uint64_t EntryAddress = JT->EntriesAsAddress[EntryOffset / JT->EntrySize];
1765       uint64_t TargetOffset = EntryAddress - getAddress();
1766       if (TargetOffset < getSize()) {
1767         TakenBranches.emplace_back(JTSiteOffset, TargetOffset);
1768 
1769         if (opts::StrictMode)
1770           registerReferencedOffset(TargetOffset);
1771       }
1772 
1773       EntryOffset += JT->EntrySize;
1774 
1775       // A label at the next entry means the end of this jump table.
1776       if (JT->Labels.count(EntryOffset))
1777         break;
1778     }
1779   }
1780   clearList(JTSites);
1781 
1782   // Conservatively populate all possible destinations for unknown indirect
1783   // branches.
1784   if (opts::StrictMode && hasInternalReference()) {
1785     for (uint64_t Offset : UnknownIndirectBranchOffsets) {
1786       for (uint64_t PossibleDestination : ExternallyReferencedOffsets) {
1787         // Ignore __builtin_unreachable().
1788         if (PossibleDestination == getSize())
1789           continue;
1790         TakenBranches.emplace_back(Offset, PossibleDestination);
1791       }
1792     }
1793   }
1794 }
1795 
1796 bool BinaryFunction::validateExternallyReferencedOffsets() {
1797   SmallPtrSet<MCSymbol *, 4> JTTargets;
1798   for (const JumpTable *JT : llvm::make_second_range(JumpTables))
1799     JTTargets.insert(JT->Entries.begin(), JT->Entries.end());
1800 
1801   bool HasUnclaimedReference = false;
1802   for (uint64_t Destination : ExternallyReferencedOffsets) {
1803     // Ignore __builtin_unreachable().
1804     if (Destination == getSize())
1805       continue;
1806     // Ignore constant islands
1807     if (isInConstantIsland(Destination + getAddress()))
1808       continue;
1809 
1810     if (BinaryBasicBlock *BB = getBasicBlockAtOffset(Destination)) {
1811       // Check if the externally referenced offset is a recognized jump table
1812       // target.
1813       if (JTTargets.contains(BB->getLabel()))
1814         continue;
1815 
1816       if (opts::Verbosity >= 1) {
1817         BC.errs() << "BOLT-WARNING: unclaimed data to code reference (possibly "
1818                   << "an unrecognized jump table entry) to " << BB->getName()
1819                   << " in " << *this << "\n";
1820       }
1821       auto L = BC.scopeLock();
1822       addEntryPoint(*BB);
1823     } else {
1824       BC.errs() << "BOLT-WARNING: unknown data to code reference to offset "
1825                 << Twine::utohexstr(Destination) << " in " << *this << "\n";
1826       setIgnored();
1827     }
1828     HasUnclaimedReference = true;
1829   }
1830   return !HasUnclaimedReference;
1831 }
1832 
1833 bool BinaryFunction::postProcessIndirectBranches(
1834     MCPlusBuilder::AllocatorIdTy AllocId) {
1835   auto addUnknownControlFlow = [&](BinaryBasicBlock &BB) {
1836     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding unknown control flow in " << *this
1837                       << " for " << BB.getName() << "\n");
1838     HasUnknownControlFlow = true;
1839     BB.removeAllSuccessors();
1840     for (uint64_t PossibleDestination : ExternallyReferencedOffsets)
1841       if (BinaryBasicBlock *SuccBB = getBasicBlockAtOffset(PossibleDestination))
1842         BB.addSuccessor(SuccBB);
1843   };
1844 
1845   uint64_t NumIndirectJumps = 0;
1846   MCInst *LastIndirectJump = nullptr;
1847   BinaryBasicBlock *LastIndirectJumpBB = nullptr;
1848   uint64_t LastJT = 0;
1849   uint16_t LastJTIndexReg = BC.MIB->getNoRegister();
1850   for (BinaryBasicBlock &BB : blocks()) {
1851     for (BinaryBasicBlock::iterator II = BB.begin(); II != BB.end(); ++II) {
1852       MCInst &Instr = *II;
1853       if (!BC.MIB->isIndirectBranch(Instr))
1854         continue;
1855 
1856       // If there's an indirect branch in a single-block function -
1857       // it must be a tail call.
1858       if (BasicBlocks.size() == 1) {
1859         BC.MIB->convertJmpToTailCall(Instr);
1860         return true;
1861       }
1862 
1863       ++NumIndirectJumps;
1864 
1865       if (opts::StrictMode && !hasInternalReference()) {
1866         BC.MIB->convertJmpToTailCall(Instr);
1867         break;
1868       }
1869 
1870       // Validate the tail call or jump table assumptions now that we know
1871       // basic block boundaries.
1872       if (BC.MIB->isTailCall(Instr) || BC.MIB->getJumpTable(Instr)) {
1873         const unsigned PtrSize = BC.AsmInfo->getCodePointerSize();
1874         MCInst *MemLocInstr;
1875         unsigned BaseRegNum, IndexRegNum;
1876         int64_t DispValue;
1877         const MCExpr *DispExpr;
1878         MCInst *PCRelBaseInstr;
1879         IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
1880             Instr, BB.begin(), II, PtrSize, MemLocInstr, BaseRegNum,
1881             IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
1882         if (Type != IndirectBranchType::UNKNOWN || MemLocInstr != nullptr)
1883           continue;
1884 
1885         if (!opts::StrictMode)
1886           return false;
1887 
1888         if (BC.MIB->isTailCall(Instr)) {
1889           BC.MIB->convertTailCallToJmp(Instr);
1890         } else {
1891           LastIndirectJump = &Instr;
1892           LastIndirectJumpBB = &BB;
1893           LastJT = BC.MIB->getJumpTable(Instr);
1894           LastJTIndexReg = BC.MIB->getJumpTableIndexReg(Instr);
1895           BC.MIB->unsetJumpTable(Instr);
1896 
1897           JumpTable *JT = BC.getJumpTableContainingAddress(LastJT);
1898           if (JT->Type == JumpTable::JTT_NORMAL) {
1899             // Invalidating the jump table may also invalidate other jump table
1900             // boundaries. Until we have/need a support for this, mark the
1901             // function as non-simple.
1902             LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejected jump table reference"
1903                               << JT->getName() << " in " << *this << '\n');
1904             return false;
1905           }
1906         }
1907 
1908         addUnknownControlFlow(BB);
1909         continue;
1910       }
1911 
1912       // If this block contains an epilogue code and has an indirect branch,
1913       // then most likely it's a tail call. Otherwise, we cannot tell for sure
1914       // what it is and conservatively reject the function's CFG.
1915       bool IsEpilogue = llvm::any_of(BB, [&](const MCInst &Instr) {
1916         return BC.MIB->isLeave(Instr) || BC.MIB->isPop(Instr);
1917       });
1918       if (IsEpilogue) {
1919         BC.MIB->convertJmpToTailCall(Instr);
1920         BB.removeAllSuccessors();
1921         continue;
1922       }
1923 
1924       if (opts::Verbosity >= 2) {
1925         BC.outs() << "BOLT-INFO: rejected potential indirect tail call in "
1926                   << "function " << *this << " in basic block " << BB.getName()
1927                   << ".\n";
1928         LLVM_DEBUG(BC.printInstructions(dbgs(), BB.begin(), BB.end(),
1929                                         BB.getOffset(), this, true));
1930       }
1931 
1932       if (!opts::StrictMode)
1933         return false;
1934 
1935       addUnknownControlFlow(BB);
1936     }
1937   }
1938 
1939   if (HasInternalLabelReference)
1940     return false;
1941 
1942   // If there's only one jump table, and one indirect jump, and no other
1943   // references, then we should be able to derive the jump table even if we
1944   // fail to match the pattern.
1945   if (HasUnknownControlFlow && NumIndirectJumps == 1 &&
1946       JumpTables.size() == 1 && LastIndirectJump &&
1947       !BC.getJumpTableContainingAddress(LastJT)->IsSplit) {
1948     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: unsetting unknown control flow in "
1949                       << *this << '\n');
1950     BC.MIB->setJumpTable(*LastIndirectJump, LastJT, LastJTIndexReg, AllocId);
1951     HasUnknownControlFlow = false;
1952 
1953     LastIndirectJumpBB->updateJumpTableSuccessors();
1954   }
1955 
1956   // Validate that all data references to function offsets are claimed by
1957   // recognized jump tables. Register externally referenced blocks as entry
1958   // points.
1959   if (!opts::StrictMode && hasInternalReference()) {
1960     if (!validateExternallyReferencedOffsets())
1961       return false;
1962   }
1963 
1964   if (HasUnknownControlFlow && !BC.HasRelocations)
1965     return false;
1966 
1967   return true;
1968 }
1969 
1970 void BinaryFunction::recomputeLandingPads() {
1971   updateBBIndices(0);
1972 
1973   for (BinaryBasicBlock *BB : BasicBlocks) {
1974     BB->LandingPads.clear();
1975     BB->Throwers.clear();
1976   }
1977 
1978   for (BinaryBasicBlock *BB : BasicBlocks) {
1979     std::unordered_set<const BinaryBasicBlock *> BBLandingPads;
1980     for (MCInst &Instr : *BB) {
1981       if (!BC.MIB->isInvoke(Instr))
1982         continue;
1983 
1984       const std::optional<MCPlus::MCLandingPad> EHInfo =
1985           BC.MIB->getEHInfo(Instr);
1986       if (!EHInfo || !EHInfo->first)
1987         continue;
1988 
1989       BinaryBasicBlock *LPBlock = getBasicBlockForLabel(EHInfo->first);
1990       if (!BBLandingPads.count(LPBlock)) {
1991         BBLandingPads.insert(LPBlock);
1992         BB->LandingPads.emplace_back(LPBlock);
1993         LPBlock->Throwers.emplace_back(BB);
1994       }
1995     }
1996   }
1997 }
1998 
1999 Error BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
2000   auto &MIB = BC.MIB;
2001 
2002   if (!isSimple()) {
2003     assert(!BC.HasRelocations &&
2004            "cannot process file with non-simple function in relocs mode");
2005     return createNonFatalBOLTError("");
2006   }
2007 
2008   if (CurrentState != State::Disassembled)
2009     return createNonFatalBOLTError("");
2010 
2011   assert(BasicBlocks.empty() && "basic block list should be empty");
2012   assert((Labels.find(getFirstInstructionOffset()) != Labels.end()) &&
2013          "first instruction should always have a label");
2014 
2015   // Create basic blocks in the original layout order:
2016   //
2017   //  * Every instruction with associated label marks
2018   //    the beginning of a basic block.
2019   //  * Conditional instruction marks the end of a basic block,
2020   //    except when the following instruction is an
2021   //    unconditional branch, and the unconditional branch is not
2022   //    a destination of another branch. In the latter case, the
2023   //    basic block will consist of a single unconditional branch
2024   //    (missed "double-jump" optimization).
2025   //
2026   // Created basic blocks are sorted in layout order since they are
2027   // created in the same order as instructions, and instructions are
2028   // sorted by offsets.
2029   BinaryBasicBlock *InsertBB = nullptr;
2030   BinaryBasicBlock *PrevBB = nullptr;
2031   bool IsLastInstrNop = false;
2032   // Offset of the last non-nop instruction.
2033   uint64_t LastInstrOffset = 0;
2034 
2035   auto addCFIPlaceholders = [this](uint64_t CFIOffset,
2036                                    BinaryBasicBlock *InsertBB) {
2037     for (auto FI = OffsetToCFI.lower_bound(CFIOffset),
2038               FE = OffsetToCFI.upper_bound(CFIOffset);
2039          FI != FE; ++FI) {
2040       addCFIPseudo(InsertBB, InsertBB->end(), FI->second);
2041     }
2042   };
2043 
2044   // For profiling purposes we need to save the offset of the last instruction
2045   // in the basic block.
2046   // NOTE: nops always have an Offset annotation. Annotate the last non-nop as
2047   //       older profiles ignored nops.
2048   auto updateOffset = [&](uint64_t Offset) {
2049     assert(PrevBB && PrevBB != InsertBB && "invalid previous block");
2050     MCInst *LastNonNop = nullptr;
2051     for (BinaryBasicBlock::reverse_iterator RII = PrevBB->getLastNonPseudo(),
2052                                             E = PrevBB->rend();
2053          RII != E; ++RII) {
2054       if (!BC.MIB->isPseudo(*RII) && !BC.MIB->isNoop(*RII)) {
2055         LastNonNop = &*RII;
2056         break;
2057       }
2058     }
2059     if (LastNonNop && !MIB->getOffset(*LastNonNop))
2060       MIB->setOffset(*LastNonNop, static_cast<uint32_t>(Offset));
2061   };
2062 
2063   for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) {
2064     const uint32_t Offset = I->first;
2065     MCInst &Instr = I->second;
2066 
2067     auto LI = Labels.find(Offset);
2068     if (LI != Labels.end()) {
2069       // Always create new BB at branch destination.
2070       PrevBB = InsertBB ? InsertBB : PrevBB;
2071       InsertBB = addBasicBlockAt(LI->first, LI->second);
2072       if (opts::PreserveBlocksAlignment && IsLastInstrNop)
2073         InsertBB->setDerivedAlignment();
2074 
2075       if (PrevBB)
2076         updateOffset(LastInstrOffset);
2077     }
2078 
2079     // Mark all nops with Offset for profile tracking purposes.
2080     if (MIB->isNoop(Instr) && !MIB->getOffset(Instr)) {
2081       // If "Offset" annotation is not present, set it and mark the nop for
2082       // deletion.
2083       MIB->setOffset(Instr, static_cast<uint32_t>(Offset));
2084       // Annotate ordinary nops, so we can safely delete them if required.
2085       MIB->addAnnotation(Instr, "NOP", static_cast<uint32_t>(1), AllocatorId);
2086     }
2087 
2088     if (!InsertBB) {
2089       // It must be a fallthrough or unreachable code. Create a new block unless
2090       // we see an unconditional branch following a conditional one. The latter
2091       // should not be a conditional tail call.
2092       assert(PrevBB && "no previous basic block for a fall through");
2093       MCInst *PrevInstr = PrevBB->getLastNonPseudoInstr();
2094       assert(PrevInstr && "no previous instruction for a fall through");
2095       if (MIB->isUnconditionalBranch(Instr) &&
2096           !MIB->isIndirectBranch(*PrevInstr) &&
2097           !MIB->isUnconditionalBranch(*PrevInstr) &&
2098           !MIB->getConditionalTailCall(*PrevInstr) &&
2099           !MIB->isReturn(*PrevInstr)) {
2100         // Temporarily restore inserter basic block.
2101         InsertBB = PrevBB;
2102       } else {
2103         MCSymbol *Label;
2104         {
2105           auto L = BC.scopeLock();
2106           Label = BC.Ctx->createNamedTempSymbol("FT");
2107         }
2108         InsertBB = addBasicBlockAt(Offset, Label);
2109         if (opts::PreserveBlocksAlignment && IsLastInstrNop)
2110           InsertBB->setDerivedAlignment();
2111         updateOffset(LastInstrOffset);
2112       }
2113     }
2114     if (Offset == getFirstInstructionOffset()) {
2115       // Add associated CFI pseudos in the first offset
2116       addCFIPlaceholders(Offset, InsertBB);
2117     }
2118 
2119     const bool IsBlockEnd = MIB->isTerminator(Instr);
2120     IsLastInstrNop = MIB->isNoop(Instr);
2121     if (!IsLastInstrNop)
2122       LastInstrOffset = Offset;
2123     InsertBB->addInstruction(std::move(Instr));
2124 
2125     // Add associated CFI instrs. We always add the CFI instruction that is
2126     // located immediately after this instruction, since the next CFI
2127     // instruction reflects the change in state caused by this instruction.
2128     auto NextInstr = std::next(I);
2129     uint64_t CFIOffset;
2130     if (NextInstr != E)
2131       CFIOffset = NextInstr->first;
2132     else
2133       CFIOffset = getSize();
2134 
2135     // Note: this potentially invalidates instruction pointers/iterators.
2136     addCFIPlaceholders(CFIOffset, InsertBB);
2137 
2138     if (IsBlockEnd) {
2139       PrevBB = InsertBB;
2140       InsertBB = nullptr;
2141     }
2142   }
2143 
2144   if (BasicBlocks.empty()) {
2145     setSimple(false);
2146     return createNonFatalBOLTError("");
2147   }
2148 
2149   // Intermediate dump.
2150   LLVM_DEBUG(print(dbgs(), "after creating basic blocks"));
2151 
2152   // TODO: handle properly calls to no-return functions,
2153   // e.g. exit(3), etc. Otherwise we'll see a false fall-through
2154   // blocks.
2155 
2156   // Remove duplicates branches. We can get a bunch of them from jump tables.
2157   // Without doing jump table value profiling we don't have a use for extra
2158   // (duplicate) branches.
2159   llvm::sort(TakenBranches);
2160   auto NewEnd = std::unique(TakenBranches.begin(), TakenBranches.end());
2161   TakenBranches.erase(NewEnd, TakenBranches.end());
2162 
2163   for (std::pair<uint32_t, uint32_t> &Branch : TakenBranches) {
2164     LLVM_DEBUG(dbgs() << "registering branch [0x"
2165                       << Twine::utohexstr(Branch.first) << "] -> [0x"
2166                       << Twine::utohexstr(Branch.second) << "]\n");
2167     BinaryBasicBlock *FromBB = getBasicBlockContainingOffset(Branch.first);
2168     BinaryBasicBlock *ToBB = getBasicBlockAtOffset(Branch.second);
2169     if (!FromBB || !ToBB) {
2170       if (!FromBB)
2171         BC.errs() << "BOLT-ERROR: cannot find BB containing the branch.\n";
2172       if (!ToBB)
2173         BC.errs()
2174             << "BOLT-ERROR: cannot find BB containing branch destination.\n";
2175       return createFatalBOLTError(BC.generateBugReportMessage(
2176           "disassembly failed - inconsistent branch found.", *this));
2177     }
2178 
2179     FromBB->addSuccessor(ToBB);
2180   }
2181 
2182   // Add fall-through branches.
2183   PrevBB = nullptr;
2184   bool IsPrevFT = false; // Is previous block a fall-through.
2185   for (BinaryBasicBlock *BB : BasicBlocks) {
2186     if (IsPrevFT)
2187       PrevBB->addSuccessor(BB);
2188 
2189     if (BB->empty()) {
2190       IsPrevFT = true;
2191       PrevBB = BB;
2192       continue;
2193     }
2194 
2195     MCInst *LastInstr = BB->getLastNonPseudoInstr();
2196     assert(LastInstr &&
2197            "should have non-pseudo instruction in non-empty block");
2198 
2199     if (BB->succ_size() == 0) {
2200       // Since there's no existing successors, we know the last instruction is
2201       // not a conditional branch. Thus if it's a terminator, it shouldn't be a
2202       // fall-through.
2203       //
2204       // Conditional tail call is a special case since we don't add a taken
2205       // branch successor for it.
2206       IsPrevFT = !MIB->isTerminator(*LastInstr) ||
2207                  MIB->getConditionalTailCall(*LastInstr);
2208     } else if (BB->succ_size() == 1) {
2209       IsPrevFT = MIB->isConditionalBranch(*LastInstr);
2210     } else {
2211       IsPrevFT = false;
2212     }
2213 
2214     PrevBB = BB;
2215   }
2216 
2217   // Assign landing pads and throwers info.
2218   recomputeLandingPads();
2219 
2220   // Assign CFI information to each BB entry.
2221   annotateCFIState();
2222 
2223   // Annotate invoke instructions with GNU_args_size data.
2224   propagateGnuArgsSizeInfo(AllocatorId);
2225 
2226   // Set the basic block layout to the original order and set end offsets.
2227   PrevBB = nullptr;
2228   for (BinaryBasicBlock *BB : BasicBlocks) {
2229     Layout.addBasicBlock(BB);
2230     if (PrevBB)
2231       PrevBB->setEndOffset(BB->getOffset());
2232     PrevBB = BB;
2233   }
2234   PrevBB->setEndOffset(getSize());
2235 
2236   Layout.updateLayoutIndices();
2237 
2238   normalizeCFIState();
2239 
2240   // Clean-up memory taken by intermediate structures.
2241   //
2242   // NB: don't clear Labels list as we may need them if we mark the function
2243   //     as non-simple later in the process of discovering extra entry points.
2244   clearList(Instructions);
2245   clearList(OffsetToCFI);
2246   clearList(TakenBranches);
2247 
2248   // Update the state.
2249   CurrentState = State::CFG;
2250 
2251   // Make any necessary adjustments for indirect branches.
2252   if (!postProcessIndirectBranches(AllocatorId)) {
2253     if (opts::Verbosity) {
2254       BC.errs() << "BOLT-WARNING: failed to post-process indirect branches for "
2255                 << *this << '\n';
2256     }
2257     // In relocation mode we want to keep processing the function but avoid
2258     // optimizing it.
2259     setSimple(false);
2260   }
2261 
2262   clearList(ExternallyReferencedOffsets);
2263   clearList(UnknownIndirectBranchOffsets);
2264 
2265   return Error::success();
2266 }
2267 
2268 void BinaryFunction::postProcessCFG() {
2269   if (isSimple() && !BasicBlocks.empty()) {
2270     // Convert conditional tail call branches to conditional branches that jump
2271     // to a tail call.
2272     removeConditionalTailCalls();
2273 
2274     postProcessProfile();
2275 
2276     // Eliminate inconsistencies between branch instructions and CFG.
2277     postProcessBranches();
2278   }
2279 
2280   calculateMacroOpFusionStats();
2281 
2282   // The final cleanup of intermediate structures.
2283   clearList(IgnoredBranches);
2284 
2285   // Remove "Offset" annotations, unless we need an address-translation table
2286   // later. This has no cost, since annotations are allocated by a bumpptr
2287   // allocator and won't be released anyway until late in the pipeline.
2288   if (!requiresAddressTranslation() && !opts::Instrument) {
2289     for (BinaryBasicBlock &BB : blocks())
2290       for (MCInst &Inst : BB)
2291         BC.MIB->clearOffset(Inst);
2292   }
2293 
2294   assert((!isSimple() || validateCFG()) &&
2295          "invalid CFG detected after post-processing");
2296 }
2297 
2298 void BinaryFunction::calculateMacroOpFusionStats() {
2299   if (!getBinaryContext().isX86())
2300     return;
2301   for (const BinaryBasicBlock &BB : blocks()) {
2302     auto II = BB.getMacroOpFusionPair();
2303     if (II == BB.end())
2304       continue;
2305 
2306     // Check offset of the second instruction.
2307     // FIXME: arch-specific.
2308     const uint32_t Offset = BC.MIB->getOffsetWithDefault(*std::next(II), 0);
2309     if (!Offset || (getAddress() + Offset) % 64)
2310       continue;
2311 
2312     LLVM_DEBUG(dbgs() << "\nmissed macro-op fusion at address 0x"
2313                       << Twine::utohexstr(getAddress() + Offset)
2314                       << " in function " << *this << "; executed "
2315                       << BB.getKnownExecutionCount() << " times.\n");
2316     ++BC.Stats.MissedMacroFusionPairs;
2317     BC.Stats.MissedMacroFusionExecCount += BB.getKnownExecutionCount();
2318   }
2319 }
2320 
2321 void BinaryFunction::removeTagsFromProfile() {
2322   for (BinaryBasicBlock *BB : BasicBlocks) {
2323     if (BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
2324       BB->ExecutionCount = 0;
2325     for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) {
2326       if (BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
2327           BI.MispredictedCount != BinaryBasicBlock::COUNT_NO_PROFILE)
2328         continue;
2329       BI.Count = 0;
2330       BI.MispredictedCount = 0;
2331     }
2332   }
2333 }
2334 
2335 void BinaryFunction::removeConditionalTailCalls() {
2336   // Blocks to be appended at the end.
2337   std::vector<std::unique_ptr<BinaryBasicBlock>> NewBlocks;
2338 
2339   for (auto BBI = begin(); BBI != end(); ++BBI) {
2340     BinaryBasicBlock &BB = *BBI;
2341     MCInst *CTCInstr = BB.getLastNonPseudoInstr();
2342     if (!CTCInstr)
2343       continue;
2344 
2345     std::optional<uint64_t> TargetAddressOrNone =
2346         BC.MIB->getConditionalTailCall(*CTCInstr);
2347     if (!TargetAddressOrNone)
2348       continue;
2349 
2350     // Gather all necessary information about CTC instruction before
2351     // annotations are destroyed.
2352     const int32_t CFIStateBeforeCTC = BB.getCFIStateAtInstr(CTCInstr);
2353     uint64_t CTCTakenCount = BinaryBasicBlock::COUNT_NO_PROFILE;
2354     uint64_t CTCMispredCount = BinaryBasicBlock::COUNT_NO_PROFILE;
2355     if (hasValidProfile()) {
2356       CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
2357           *CTCInstr, "CTCTakenCount");
2358       CTCMispredCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
2359           *CTCInstr, "CTCMispredCount");
2360     }
2361 
2362     // Assert that the tail call does not throw.
2363     assert(!BC.MIB->getEHInfo(*CTCInstr) &&
2364            "found tail call with associated landing pad");
2365 
2366     // Create a basic block with an unconditional tail call instruction using
2367     // the same destination.
2368     const MCSymbol *CTCTargetLabel = BC.MIB->getTargetSymbol(*CTCInstr);
2369     assert(CTCTargetLabel && "symbol expected for conditional tail call");
2370     MCInst TailCallInstr;
2371     BC.MIB->createTailCall(TailCallInstr, CTCTargetLabel, BC.Ctx.get());
2372 
2373     // Move offset from CTCInstr to TailCallInstr.
2374     if (const std::optional<uint32_t> Offset = BC.MIB->getOffset(*CTCInstr)) {
2375       BC.MIB->setOffset(TailCallInstr, *Offset);
2376       BC.MIB->clearOffset(*CTCInstr);
2377     }
2378 
2379     // Link new BBs to the original input offset of the BB where the CTC
2380     // is, so we can map samples recorded in new BBs back to the original BB
2381     // seem in the input binary (if using BAT)
2382     std::unique_ptr<BinaryBasicBlock> TailCallBB =
2383         createBasicBlock(BC.Ctx->createNamedTempSymbol("TC"));
2384     TailCallBB->setOffset(BB.getInputOffset());
2385     TailCallBB->addInstruction(TailCallInstr);
2386     TailCallBB->setCFIState(CFIStateBeforeCTC);
2387 
2388     // Add CFG edge with profile info from BB to TailCallBB.
2389     BB.addSuccessor(TailCallBB.get(), CTCTakenCount, CTCMispredCount);
2390 
2391     // Add execution count for the block.
2392     TailCallBB->setExecutionCount(CTCTakenCount);
2393 
2394     BC.MIB->convertTailCallToJmp(*CTCInstr);
2395 
2396     BC.MIB->replaceBranchTarget(*CTCInstr, TailCallBB->getLabel(),
2397                                 BC.Ctx.get());
2398 
2399     // Add basic block to the list that will be added to the end.
2400     NewBlocks.emplace_back(std::move(TailCallBB));
2401 
2402     // Swap edges as the TailCallBB corresponds to the taken branch.
2403     BB.swapConditionalSuccessors();
2404 
2405     // This branch is no longer a conditional tail call.
2406     BC.MIB->unsetConditionalTailCall(*CTCInstr);
2407   }
2408 
2409   insertBasicBlocks(std::prev(end()), std::move(NewBlocks),
2410                     /* UpdateLayout */ true,
2411                     /* UpdateCFIState */ false);
2412 }
2413 
2414 uint64_t BinaryFunction::getFunctionScore() const {
2415   if (FunctionScore != -1)
2416     return FunctionScore;
2417 
2418   if (!isSimple() || !hasValidProfile()) {
2419     FunctionScore = 0;
2420     return FunctionScore;
2421   }
2422 
2423   uint64_t TotalScore = 0ULL;
2424   for (const BinaryBasicBlock &BB : blocks()) {
2425     uint64_t BBExecCount = BB.getExecutionCount();
2426     if (BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
2427       continue;
2428     TotalScore += BBExecCount * BB.getNumNonPseudos();
2429   }
2430   FunctionScore = TotalScore;
2431   return FunctionScore;
2432 }
2433 
2434 void BinaryFunction::annotateCFIState() {
2435   assert(CurrentState == State::Disassembled && "unexpected function state");
2436   assert(!BasicBlocks.empty() && "basic block list should not be empty");
2437 
2438   // This is an index of the last processed CFI in FDE CFI program.
2439   uint32_t State = 0;
2440 
2441   // This is an index of RememberState CFI reflecting effective state right
2442   // after execution of RestoreState CFI.
2443   //
2444   // It differs from State iff the CFI at (State-1)
2445   // was RestoreState (modulo GNU_args_size CFIs, which are ignored).
2446   //
2447   // This allows us to generate shorter replay sequences when producing new
2448   // CFI programs.
2449   uint32_t EffectiveState = 0;
2450 
2451   // For tracking RememberState/RestoreState sequences.
2452   std::stack<uint32_t> StateStack;
2453 
2454   for (BinaryBasicBlock *BB : BasicBlocks) {
2455     BB->setCFIState(EffectiveState);
2456 
2457     for (const MCInst &Instr : *BB) {
2458       const MCCFIInstruction *CFI = getCFIFor(Instr);
2459       if (!CFI)
2460         continue;
2461 
2462       ++State;
2463 
2464       switch (CFI->getOperation()) {
2465       case MCCFIInstruction::OpRememberState:
2466         StateStack.push(EffectiveState);
2467         EffectiveState = State;
2468         break;
2469       case MCCFIInstruction::OpRestoreState:
2470         assert(!StateStack.empty() && "corrupt CFI stack");
2471         EffectiveState = StateStack.top();
2472         StateStack.pop();
2473         break;
2474       case MCCFIInstruction::OpGnuArgsSize:
2475         // OpGnuArgsSize CFIs do not affect the CFI state.
2476         break;
2477       default:
2478         // Any other CFI updates the state.
2479         EffectiveState = State;
2480         break;
2481       }
2482     }
2483   }
2484 
2485   assert(StateStack.empty() && "corrupt CFI stack");
2486 }
2487 
2488 namespace {
2489 
2490 /// Our full interpretation of a DWARF CFI machine state at a given point
2491 struct CFISnapshot {
2492   /// CFA register number and offset defining the canonical frame at this
2493   /// point, or the number of a rule (CFI state) that computes it with a
2494   /// DWARF expression. This number will be negative if it refers to a CFI
2495   /// located in the CIE instead of the FDE.
2496   uint32_t CFAReg;
2497   int32_t CFAOffset;
2498   int32_t CFARule;
2499   /// Mapping of rules (CFI states) that define the location of each
2500   /// register. If absent, no rule defining the location of such register
2501   /// was ever read. This number will be negative if it refers to a CFI
2502   /// located in the CIE instead of the FDE.
2503   DenseMap<int32_t, int32_t> RegRule;
2504 
2505   /// References to CIE, FDE and expanded instructions after a restore state
2506   const BinaryFunction::CFIInstrMapType &CIE;
2507   const BinaryFunction::CFIInstrMapType &FDE;
2508   const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents;
2509 
2510   /// Current FDE CFI number representing the state where the snapshot is at
2511   int32_t CurState;
2512 
2513   /// Used when we don't have information about which state/rule to apply
2514   /// to recover the location of either the CFA or a specific register
2515   constexpr static int32_t UNKNOWN = std::numeric_limits<int32_t>::min();
2516 
2517 private:
2518   /// Update our snapshot by executing a single CFI
2519   void update(const MCCFIInstruction &Instr, int32_t RuleNumber) {
2520     switch (Instr.getOperation()) {
2521     case MCCFIInstruction::OpSameValue:
2522     case MCCFIInstruction::OpRelOffset:
2523     case MCCFIInstruction::OpOffset:
2524     case MCCFIInstruction::OpRestore:
2525     case MCCFIInstruction::OpUndefined:
2526     case MCCFIInstruction::OpRegister:
2527       RegRule[Instr.getRegister()] = RuleNumber;
2528       break;
2529     case MCCFIInstruction::OpDefCfaRegister:
2530       CFAReg = Instr.getRegister();
2531       CFARule = UNKNOWN;
2532 
2533       // This shouldn't happen according to the spec but GNU binutils on RISC-V
2534       // emits a DW_CFA_def_cfa_register in CIE's which leaves the offset
2535       // unspecified. Both readelf and llvm-dwarfdump interpret the offset as 0
2536       // in this case so let's do the same.
2537       if (CFAOffset == UNKNOWN)
2538         CFAOffset = 0;
2539       break;
2540     case MCCFIInstruction::OpDefCfaOffset:
2541       CFAOffset = Instr.getOffset();
2542       CFARule = UNKNOWN;
2543       break;
2544     case MCCFIInstruction::OpDefCfa:
2545       CFAReg = Instr.getRegister();
2546       CFAOffset = Instr.getOffset();
2547       CFARule = UNKNOWN;
2548       break;
2549     case MCCFIInstruction::OpEscape: {
2550       std::optional<uint8_t> Reg =
2551           readDWARFExpressionTargetReg(Instr.getValues());
2552       // Handle DW_CFA_def_cfa_expression
2553       if (!Reg) {
2554         CFARule = RuleNumber;
2555         break;
2556       }
2557       RegRule[*Reg] = RuleNumber;
2558       break;
2559     }
2560     case MCCFIInstruction::OpAdjustCfaOffset:
2561     case MCCFIInstruction::OpWindowSave:
2562     case MCCFIInstruction::OpNegateRAState:
2563     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2564       llvm_unreachable("unsupported CFI opcode");
2565       break;
2566     case MCCFIInstruction::OpRememberState:
2567     case MCCFIInstruction::OpRestoreState:
2568     case MCCFIInstruction::OpGnuArgsSize:
2569       // do not affect CFI state
2570       break;
2571     }
2572   }
2573 
2574 public:
2575   /// Advance state reading FDE CFI instructions up to State number
2576   void advanceTo(int32_t State) {
2577     for (int32_t I = CurState, E = State; I != E; ++I) {
2578       const MCCFIInstruction &Instr = FDE[I];
2579       if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) {
2580         update(Instr, I);
2581         continue;
2582       }
2583       // If restore state instruction, fetch the equivalent CFIs that have
2584       // the same effect of this restore. This is used to ensure remember-
2585       // restore pairs are completely removed.
2586       auto Iter = FrameRestoreEquivalents.find(I);
2587       if (Iter == FrameRestoreEquivalents.end())
2588         continue;
2589       for (int32_t RuleNumber : Iter->second)
2590         update(FDE[RuleNumber], RuleNumber);
2591     }
2592 
2593     assert(((CFAReg != (uint32_t)UNKNOWN && CFAOffset != UNKNOWN) ||
2594             CFARule != UNKNOWN) &&
2595            "CIE did not define default CFA?");
2596 
2597     CurState = State;
2598   }
2599 
2600   /// Interpret all CIE and FDE instructions up until CFI State number and
2601   /// populate this snapshot
2602   CFISnapshot(
2603       const BinaryFunction::CFIInstrMapType &CIE,
2604       const BinaryFunction::CFIInstrMapType &FDE,
2605       const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents,
2606       int32_t State)
2607       : CIE(CIE), FDE(FDE), FrameRestoreEquivalents(FrameRestoreEquivalents) {
2608     CFAReg = UNKNOWN;
2609     CFAOffset = UNKNOWN;
2610     CFARule = UNKNOWN;
2611     CurState = 0;
2612 
2613     for (int32_t I = 0, E = CIE.size(); I != E; ++I) {
2614       const MCCFIInstruction &Instr = CIE[I];
2615       update(Instr, -I);
2616     }
2617 
2618     advanceTo(State);
2619   }
2620 };
2621 
2622 /// A CFI snapshot with the capability of checking if incremental additions to
2623 /// it are redundant. This is used to ensure we do not emit two CFI instructions
2624 /// back-to-back that are doing the same state change, or to avoid emitting a
2625 /// CFI at all when the state at that point would not be modified after that CFI
2626 struct CFISnapshotDiff : public CFISnapshot {
2627   bool RestoredCFAReg{false};
2628   bool RestoredCFAOffset{false};
2629   DenseMap<int32_t, bool> RestoredRegs;
2630 
2631   CFISnapshotDiff(const CFISnapshot &S) : CFISnapshot(S) {}
2632 
2633   CFISnapshotDiff(
2634       const BinaryFunction::CFIInstrMapType &CIE,
2635       const BinaryFunction::CFIInstrMapType &FDE,
2636       const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents,
2637       int32_t State)
2638       : CFISnapshot(CIE, FDE, FrameRestoreEquivalents, State) {}
2639 
2640   /// Return true if applying Instr to this state is redundant and can be
2641   /// dismissed.
2642   bool isRedundant(const MCCFIInstruction &Instr) {
2643     switch (Instr.getOperation()) {
2644     case MCCFIInstruction::OpSameValue:
2645     case MCCFIInstruction::OpRelOffset:
2646     case MCCFIInstruction::OpOffset:
2647     case MCCFIInstruction::OpRestore:
2648     case MCCFIInstruction::OpUndefined:
2649     case MCCFIInstruction::OpRegister:
2650     case MCCFIInstruction::OpEscape: {
2651       uint32_t Reg;
2652       if (Instr.getOperation() != MCCFIInstruction::OpEscape) {
2653         Reg = Instr.getRegister();
2654       } else {
2655         std::optional<uint8_t> R =
2656             readDWARFExpressionTargetReg(Instr.getValues());
2657         // Handle DW_CFA_def_cfa_expression
2658         if (!R) {
2659           if (RestoredCFAReg && RestoredCFAOffset)
2660             return true;
2661           RestoredCFAReg = true;
2662           RestoredCFAOffset = true;
2663           return false;
2664         }
2665         Reg = *R;
2666       }
2667       if (RestoredRegs[Reg])
2668         return true;
2669       RestoredRegs[Reg] = true;
2670       const int32_t CurRegRule = RegRule.contains(Reg) ? RegRule[Reg] : UNKNOWN;
2671       if (CurRegRule == UNKNOWN) {
2672         if (Instr.getOperation() == MCCFIInstruction::OpRestore ||
2673             Instr.getOperation() == MCCFIInstruction::OpSameValue)
2674           return true;
2675         return false;
2676       }
2677       const MCCFIInstruction &LastDef =
2678           CurRegRule < 0 ? CIE[-CurRegRule] : FDE[CurRegRule];
2679       return LastDef == Instr;
2680     }
2681     case MCCFIInstruction::OpDefCfaRegister:
2682       if (RestoredCFAReg)
2683         return true;
2684       RestoredCFAReg = true;
2685       return CFAReg == Instr.getRegister();
2686     case MCCFIInstruction::OpDefCfaOffset:
2687       if (RestoredCFAOffset)
2688         return true;
2689       RestoredCFAOffset = true;
2690       return CFAOffset == Instr.getOffset();
2691     case MCCFIInstruction::OpDefCfa:
2692       if (RestoredCFAReg && RestoredCFAOffset)
2693         return true;
2694       RestoredCFAReg = true;
2695       RestoredCFAOffset = true;
2696       return CFAReg == Instr.getRegister() && CFAOffset == Instr.getOffset();
2697     case MCCFIInstruction::OpAdjustCfaOffset:
2698     case MCCFIInstruction::OpWindowSave:
2699     case MCCFIInstruction::OpNegateRAState:
2700     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2701       llvm_unreachable("unsupported CFI opcode");
2702       return false;
2703     case MCCFIInstruction::OpRememberState:
2704     case MCCFIInstruction::OpRestoreState:
2705     case MCCFIInstruction::OpGnuArgsSize:
2706       // do not affect CFI state
2707       return true;
2708     }
2709     return false;
2710   }
2711 };
2712 
2713 } // end anonymous namespace
2714 
2715 bool BinaryFunction::replayCFIInstrs(int32_t FromState, int32_t ToState,
2716                                      BinaryBasicBlock *InBB,
2717                                      BinaryBasicBlock::iterator InsertIt) {
2718   if (FromState == ToState)
2719     return true;
2720   assert(FromState < ToState && "can only replay CFIs forward");
2721 
2722   CFISnapshotDiff CFIDiff(CIEFrameInstructions, FrameInstructions,
2723                           FrameRestoreEquivalents, FromState);
2724 
2725   std::vector<uint32_t> NewCFIs;
2726   for (int32_t CurState = FromState; CurState < ToState; ++CurState) {
2727     MCCFIInstruction *Instr = &FrameInstructions[CurState];
2728     if (Instr->getOperation() == MCCFIInstruction::OpRestoreState) {
2729       auto Iter = FrameRestoreEquivalents.find(CurState);
2730       assert(Iter != FrameRestoreEquivalents.end());
2731       NewCFIs.insert(NewCFIs.end(), Iter->second.begin(), Iter->second.end());
2732       // RestoreState / Remember will be filtered out later by CFISnapshotDiff,
2733       // so we might as well fall-through here.
2734     }
2735     NewCFIs.push_back(CurState);
2736   }
2737 
2738   // Replay instructions while avoiding duplicates
2739   for (int32_t State : llvm::reverse(NewCFIs)) {
2740     if (CFIDiff.isRedundant(FrameInstructions[State]))
2741       continue;
2742     InsertIt = addCFIPseudo(InBB, InsertIt, State);
2743   }
2744 
2745   return true;
2746 }
2747 
2748 SmallVector<int32_t, 4>
2749 BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState,
2750                                BinaryBasicBlock *InBB,
2751                                BinaryBasicBlock::iterator &InsertIt) {
2752   SmallVector<int32_t, 4> NewStates;
2753 
2754   CFISnapshot ToCFITable(CIEFrameInstructions, FrameInstructions,
2755                          FrameRestoreEquivalents, ToState);
2756   CFISnapshotDiff FromCFITable(ToCFITable);
2757   FromCFITable.advanceTo(FromState);
2758 
2759   auto undoStateDefCfa = [&]() {
2760     if (ToCFITable.CFARule == CFISnapshot::UNKNOWN) {
2761       FrameInstructions.emplace_back(MCCFIInstruction::cfiDefCfa(
2762           nullptr, ToCFITable.CFAReg, ToCFITable.CFAOffset));
2763       if (FromCFITable.isRedundant(FrameInstructions.back())) {
2764         FrameInstructions.pop_back();
2765         return;
2766       }
2767       NewStates.push_back(FrameInstructions.size() - 1);
2768       InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size() - 1);
2769       ++InsertIt;
2770     } else if (ToCFITable.CFARule < 0) {
2771       if (FromCFITable.isRedundant(CIEFrameInstructions[-ToCFITable.CFARule]))
2772         return;
2773       NewStates.push_back(FrameInstructions.size());
2774       InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size());
2775       ++InsertIt;
2776       FrameInstructions.emplace_back(CIEFrameInstructions[-ToCFITable.CFARule]);
2777     } else if (!FromCFITable.isRedundant(
2778                    FrameInstructions[ToCFITable.CFARule])) {
2779       NewStates.push_back(ToCFITable.CFARule);
2780       InsertIt = addCFIPseudo(InBB, InsertIt, ToCFITable.CFARule);
2781       ++InsertIt;
2782     }
2783   };
2784 
2785   auto undoState = [&](const MCCFIInstruction &Instr) {
2786     switch (Instr.getOperation()) {
2787     case MCCFIInstruction::OpRememberState:
2788     case MCCFIInstruction::OpRestoreState:
2789       break;
2790     case MCCFIInstruction::OpSameValue:
2791     case MCCFIInstruction::OpRelOffset:
2792     case MCCFIInstruction::OpOffset:
2793     case MCCFIInstruction::OpRestore:
2794     case MCCFIInstruction::OpUndefined:
2795     case MCCFIInstruction::OpEscape:
2796     case MCCFIInstruction::OpRegister: {
2797       uint32_t Reg;
2798       if (Instr.getOperation() != MCCFIInstruction::OpEscape) {
2799         Reg = Instr.getRegister();
2800       } else {
2801         std::optional<uint8_t> R =
2802             readDWARFExpressionTargetReg(Instr.getValues());
2803         // Handle DW_CFA_def_cfa_expression
2804         if (!R) {
2805           undoStateDefCfa();
2806           return;
2807         }
2808         Reg = *R;
2809       }
2810 
2811       if (!ToCFITable.RegRule.contains(Reg)) {
2812         FrameInstructions.emplace_back(
2813             MCCFIInstruction::createRestore(nullptr, Reg));
2814         if (FromCFITable.isRedundant(FrameInstructions.back())) {
2815           FrameInstructions.pop_back();
2816           break;
2817         }
2818         NewStates.push_back(FrameInstructions.size() - 1);
2819         InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size() - 1);
2820         ++InsertIt;
2821         break;
2822       }
2823       const int32_t Rule = ToCFITable.RegRule[Reg];
2824       if (Rule < 0) {
2825         if (FromCFITable.isRedundant(CIEFrameInstructions[-Rule]))
2826           break;
2827         NewStates.push_back(FrameInstructions.size());
2828         InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size());
2829         ++InsertIt;
2830         FrameInstructions.emplace_back(CIEFrameInstructions[-Rule]);
2831         break;
2832       }
2833       if (FromCFITable.isRedundant(FrameInstructions[Rule]))
2834         break;
2835       NewStates.push_back(Rule);
2836       InsertIt = addCFIPseudo(InBB, InsertIt, Rule);
2837       ++InsertIt;
2838       break;
2839     }
2840     case MCCFIInstruction::OpDefCfaRegister:
2841     case MCCFIInstruction::OpDefCfaOffset:
2842     case MCCFIInstruction::OpDefCfa:
2843       undoStateDefCfa();
2844       break;
2845     case MCCFIInstruction::OpAdjustCfaOffset:
2846     case MCCFIInstruction::OpWindowSave:
2847     case MCCFIInstruction::OpNegateRAState:
2848     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2849       llvm_unreachable("unsupported CFI opcode");
2850       break;
2851     case MCCFIInstruction::OpGnuArgsSize:
2852       // do not affect CFI state
2853       break;
2854     }
2855   };
2856 
2857   // Undo all modifications from ToState to FromState
2858   for (int32_t I = ToState, E = FromState; I != E; ++I) {
2859     const MCCFIInstruction &Instr = FrameInstructions[I];
2860     if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) {
2861       undoState(Instr);
2862       continue;
2863     }
2864     auto Iter = FrameRestoreEquivalents.find(I);
2865     if (Iter == FrameRestoreEquivalents.end())
2866       continue;
2867     for (int32_t State : Iter->second)
2868       undoState(FrameInstructions[State]);
2869   }
2870 
2871   return NewStates;
2872 }
2873 
2874 void BinaryFunction::normalizeCFIState() {
2875   // Reordering blocks with remember-restore state instructions can be specially
2876   // tricky. When rewriting the CFI, we omit remember-restore state instructions
2877   // entirely. For restore state, we build a map expanding each restore to the
2878   // equivalent unwindCFIState sequence required at that point to achieve the
2879   // same effect of the restore. All remember state are then just ignored.
2880   std::stack<int32_t> Stack;
2881   for (BinaryBasicBlock *CurBB : Layout.blocks()) {
2882     for (auto II = CurBB->begin(); II != CurBB->end(); ++II) {
2883       if (const MCCFIInstruction *CFI = getCFIFor(*II)) {
2884         if (CFI->getOperation() == MCCFIInstruction::OpRememberState) {
2885           Stack.push(II->getOperand(0).getImm());
2886           continue;
2887         }
2888         if (CFI->getOperation() == MCCFIInstruction::OpRestoreState) {
2889           const int32_t RememberState = Stack.top();
2890           const int32_t CurState = II->getOperand(0).getImm();
2891           FrameRestoreEquivalents[CurState] =
2892               unwindCFIState(CurState, RememberState, CurBB, II);
2893           Stack.pop();
2894         }
2895       }
2896     }
2897   }
2898 }
2899 
2900 bool BinaryFunction::finalizeCFIState() {
2901   LLVM_DEBUG(
2902       dbgs() << "Trying to fix CFI states for each BB after reordering.\n");
2903   LLVM_DEBUG(dbgs() << "This is the list of CFI states for each BB of " << *this
2904                     << ": ");
2905 
2906   const char *Sep = "";
2907   (void)Sep;
2908   for (FunctionFragment &FF : Layout.fragments()) {
2909     // Hot-cold border: at start of each region (with a different FDE) we need
2910     // to reset the CFI state.
2911     int32_t State = 0;
2912 
2913     for (BinaryBasicBlock *BB : FF) {
2914       const int32_t CFIStateAtExit = BB->getCFIStateAtExit();
2915 
2916       // We need to recover the correct state if it doesn't match expected
2917       // state at BB entry point.
2918       if (BB->getCFIState() < State) {
2919         // In this case, State is currently higher than what this BB expect it
2920         // to be. To solve this, we need to insert CFI instructions to undo
2921         // the effect of all CFI from BB's state to current State.
2922         auto InsertIt = BB->begin();
2923         unwindCFIState(State, BB->getCFIState(), BB, InsertIt);
2924       } else if (BB->getCFIState() > State) {
2925         // If BB's CFI state is greater than State, it means we are behind in
2926         // the state. Just emit all instructions to reach this state at the
2927         // beginning of this BB. If this sequence of instructions involve
2928         // remember state or restore state, bail out.
2929         if (!replayCFIInstrs(State, BB->getCFIState(), BB, BB->begin()))
2930           return false;
2931       }
2932 
2933       State = CFIStateAtExit;
2934       LLVM_DEBUG(dbgs() << Sep << State; Sep = ", ");
2935     }
2936   }
2937   LLVM_DEBUG(dbgs() << "\n");
2938 
2939   for (BinaryBasicBlock &BB : blocks()) {
2940     for (auto II = BB.begin(); II != BB.end();) {
2941       const MCCFIInstruction *CFI = getCFIFor(*II);
2942       if (CFI && (CFI->getOperation() == MCCFIInstruction::OpRememberState ||
2943                   CFI->getOperation() == MCCFIInstruction::OpRestoreState)) {
2944         II = BB.eraseInstruction(II);
2945       } else {
2946         ++II;
2947       }
2948     }
2949   }
2950 
2951   return true;
2952 }
2953 
2954 bool BinaryFunction::requiresAddressTranslation() const {
2955   return opts::EnableBAT || hasSDTMarker() || hasPseudoProbe();
2956 }
2957 
2958 bool BinaryFunction::requiresAddressMap() const {
2959   if (isInjected())
2960     return false;
2961 
2962   return opts::UpdateDebugSections || isMultiEntry() ||
2963          requiresAddressTranslation();
2964 }
2965 
2966 uint64_t BinaryFunction::getInstructionCount() const {
2967   uint64_t Count = 0;
2968   for (const BinaryBasicBlock &BB : blocks())
2969     Count += BB.getNumNonPseudos();
2970   return Count;
2971 }
2972 
2973 void BinaryFunction::clearDisasmState() {
2974   clearList(Instructions);
2975   clearList(IgnoredBranches);
2976   clearList(TakenBranches);
2977 
2978   if (BC.HasRelocations) {
2979     for (std::pair<const uint32_t, MCSymbol *> &LI : Labels)
2980       BC.UndefinedSymbols.insert(LI.second);
2981     for (MCSymbol *const EndLabel : FunctionEndLabels)
2982       if (EndLabel)
2983         BC.UndefinedSymbols.insert(EndLabel);
2984   }
2985 }
2986 
2987 void BinaryFunction::setTrapOnEntry() {
2988   clearDisasmState();
2989 
2990   forEachEntryPoint([&](uint64_t Offset, const MCSymbol *Label) -> bool {
2991     MCInst TrapInstr;
2992     BC.MIB->createTrap(TrapInstr);
2993     addInstruction(Offset, std::move(TrapInstr));
2994     return true;
2995   });
2996 
2997   TrapsOnEntry = true;
2998 }
2999 
3000 void BinaryFunction::setIgnored() {
3001   if (opts::processAllFunctions()) {
3002     // We can accept ignored functions before they've been disassembled.
3003     // In that case, they would still get disassembled and emited, but not
3004     // optimized.
3005     assert(CurrentState == State::Empty &&
3006            "cannot ignore non-empty functions in current mode");
3007     IsIgnored = true;
3008     return;
3009   }
3010 
3011   clearDisasmState();
3012 
3013   // Clear CFG state too.
3014   if (hasCFG()) {
3015     releaseCFG();
3016 
3017     for (BinaryBasicBlock *BB : BasicBlocks)
3018       delete BB;
3019     clearList(BasicBlocks);
3020 
3021     for (BinaryBasicBlock *BB : DeletedBasicBlocks)
3022       delete BB;
3023     clearList(DeletedBasicBlocks);
3024 
3025     Layout.clear();
3026   }
3027 
3028   CurrentState = State::Empty;
3029 
3030   IsIgnored = true;
3031   IsSimple = false;
3032   LLVM_DEBUG(dbgs() << "Ignoring " << getPrintName() << '\n');
3033 }
3034 
3035 void BinaryFunction::duplicateConstantIslands() {
3036   assert(Islands && "function expected to have constant islands");
3037 
3038   for (BinaryBasicBlock *BB : getLayout().blocks()) {
3039     if (!BB->isCold())
3040       continue;
3041 
3042     for (MCInst &Inst : *BB) {
3043       int OpNum = 0;
3044       for (MCOperand &Operand : Inst) {
3045         if (!Operand.isExpr()) {
3046           ++OpNum;
3047           continue;
3048         }
3049         const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Inst, OpNum);
3050         // Check if this is an island symbol
3051         if (!Islands->Symbols.count(Symbol) &&
3052             !Islands->ProxySymbols.count(Symbol))
3053           continue;
3054 
3055         // Create cold symbol, if missing
3056         auto ISym = Islands->ColdSymbols.find(Symbol);
3057         MCSymbol *ColdSymbol;
3058         if (ISym != Islands->ColdSymbols.end()) {
3059           ColdSymbol = ISym->second;
3060         } else {
3061           ColdSymbol = BC.Ctx->getOrCreateSymbol(Symbol->getName() + ".cold");
3062           Islands->ColdSymbols[Symbol] = ColdSymbol;
3063           // Check if this is a proxy island symbol and update owner proxy map
3064           if (Islands->ProxySymbols.count(Symbol)) {
3065             BinaryFunction *Owner = Islands->ProxySymbols[Symbol];
3066             auto IProxiedSym = Owner->Islands->Proxies[this].find(Symbol);
3067             Owner->Islands->ColdProxies[this][IProxiedSym->second] = ColdSymbol;
3068           }
3069         }
3070 
3071         // Update instruction reference
3072         Operand = MCOperand::createExpr(BC.MIB->getTargetExprFor(
3073             Inst,
3074             MCSymbolRefExpr::create(ColdSymbol, MCSymbolRefExpr::VK_None,
3075                                     *BC.Ctx),
3076             *BC.Ctx, 0));
3077         ++OpNum;
3078       }
3079     }
3080   }
3081 }
3082 
3083 #ifndef MAX_PATH
3084 #define MAX_PATH 255
3085 #endif
3086 
3087 static std::string constructFilename(std::string Filename,
3088                                      std::string Annotation,
3089                                      std::string Suffix) {
3090   std::replace(Filename.begin(), Filename.end(), '/', '-');
3091   if (!Annotation.empty())
3092     Annotation.insert(0, "-");
3093   if (Filename.size() + Annotation.size() + Suffix.size() > MAX_PATH) {
3094     assert(Suffix.size() + Annotation.size() <= MAX_PATH);
3095     Filename.resize(MAX_PATH - (Suffix.size() + Annotation.size()));
3096   }
3097   Filename += Annotation;
3098   Filename += Suffix;
3099   return Filename;
3100 }
3101 
3102 static std::string formatEscapes(const std::string &Str) {
3103   std::string Result;
3104   for (unsigned I = 0; I < Str.size(); ++I) {
3105     char C = Str[I];
3106     switch (C) {
3107     case '\n':
3108       Result += "&#13;";
3109       break;
3110     case '"':
3111       break;
3112     default:
3113       Result += C;
3114       break;
3115     }
3116   }
3117   return Result;
3118 }
3119 
3120 void BinaryFunction::dumpGraph(raw_ostream &OS) const {
3121   OS << "digraph \"" << getPrintName() << "\" {\n"
3122      << "node [fontname=courier, shape=box, style=filled, colorscheme=brbg9]\n";
3123   uint64_t Offset = Address;
3124   for (BinaryBasicBlock *BB : BasicBlocks) {
3125     auto LayoutPos = find(Layout.blocks(), BB);
3126     unsigned LayoutIndex = LayoutPos - Layout.block_begin();
3127     const char *ColdStr = BB->isCold() ? " (cold)" : "";
3128     std::vector<std::string> Attrs;
3129     // Bold box for entry points
3130     if (isEntryPoint(*BB))
3131       Attrs.push_back("penwidth=2");
3132     if (BLI && BLI->getLoopFor(BB)) {
3133       // Distinguish innermost loops
3134       const BinaryLoop *Loop = BLI->getLoopFor(BB);
3135       if (Loop->isInnermost())
3136         Attrs.push_back("fillcolor=6");
3137       else // some outer loop
3138         Attrs.push_back("fillcolor=4");
3139     } else { // non-loopy code
3140       Attrs.push_back("fillcolor=5");
3141     }
3142     ListSeparator LS;
3143     OS << "\"" << BB->getName() << "\" [";
3144     for (StringRef Attr : Attrs)
3145       OS << LS << Attr;
3146     OS << "]\n";
3147     OS << format("\"%s\" [label=\"%s%s\\n(C:%lu,O:%lu,I:%u,L:%u,CFI:%u)\\n",
3148                  BB->getName().data(), BB->getName().data(), ColdStr,
3149                  BB->getKnownExecutionCount(), BB->getOffset(), getIndex(BB),
3150                  LayoutIndex, BB->getCFIState());
3151 
3152     if (opts::DotToolTipCode) {
3153       std::string Str;
3154       raw_string_ostream CS(Str);
3155       Offset = BC.printInstructions(CS, BB->begin(), BB->end(), Offset, this,
3156                                     /* PrintMCInst = */ false,
3157                                     /* PrintMemData = */ false,
3158                                     /* PrintRelocations = */ false,
3159                                     /* Endl = */ R"(\\l)");
3160       OS << formatEscapes(CS.str()) << '\n';
3161     }
3162     OS << "\"]\n";
3163 
3164     // analyzeBranch is just used to get the names of the branch
3165     // opcodes.
3166     const MCSymbol *TBB = nullptr;
3167     const MCSymbol *FBB = nullptr;
3168     MCInst *CondBranch = nullptr;
3169     MCInst *UncondBranch = nullptr;
3170     const bool Success = BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch);
3171 
3172     const MCInst *LastInstr = BB->getLastNonPseudoInstr();
3173     const bool IsJumpTable = LastInstr && BC.MIB->getJumpTable(*LastInstr);
3174 
3175     auto BI = BB->branch_info_begin();
3176     for (BinaryBasicBlock *Succ : BB->successors()) {
3177       std::string Branch;
3178       if (Success) {
3179         if (Succ == BB->getConditionalSuccessor(true)) {
3180           Branch = CondBranch ? std::string(BC.InstPrinter->getOpcodeName(
3181                                     CondBranch->getOpcode()))
3182                               : "TB";
3183         } else if (Succ == BB->getConditionalSuccessor(false)) {
3184           Branch = UncondBranch ? std::string(BC.InstPrinter->getOpcodeName(
3185                                       UncondBranch->getOpcode()))
3186                                 : "FB";
3187         } else {
3188           Branch = "FT";
3189         }
3190       }
3191       if (IsJumpTable)
3192         Branch = "JT";
3193       OS << format("\"%s\" -> \"%s\" [label=\"%s", BB->getName().data(),
3194                    Succ->getName().data(), Branch.c_str());
3195 
3196       if (BB->getExecutionCount() != COUNT_NO_PROFILE &&
3197           BI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
3198         OS << "\\n(C:" << BI->Count << ",M:" << BI->MispredictedCount << ")";
3199       } else if (ExecutionCount != COUNT_NO_PROFILE &&
3200                  BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
3201         OS << "\\n(IC:" << BI->Count << ")";
3202       }
3203       OS << "\"]\n";
3204 
3205       ++BI;
3206     }
3207     for (BinaryBasicBlock *LP : BB->landing_pads()) {
3208       OS << format("\"%s\" -> \"%s\" [constraint=false style=dashed]\n",
3209                    BB->getName().data(), LP->getName().data());
3210     }
3211   }
3212   OS << "}\n";
3213 }
3214 
3215 void BinaryFunction::viewGraph() const {
3216   SmallString<MAX_PATH> Filename;
3217   if (std::error_code EC =
3218           sys::fs::createTemporaryFile("bolt-cfg", "dot", Filename)) {
3219     BC.errs() << "BOLT-ERROR: " << EC.message() << ", unable to create "
3220               << " bolt-cfg-XXXXX.dot temporary file.\n";
3221     return;
3222   }
3223   dumpGraphToFile(std::string(Filename));
3224   if (DisplayGraph(Filename))
3225     BC.errs() << "BOLT-ERROR: Can't display " << Filename
3226               << " with graphviz.\n";
3227   if (std::error_code EC = sys::fs::remove(Filename)) {
3228     BC.errs() << "BOLT-WARNING: " << EC.message() << ", failed to remove "
3229               << Filename << "\n";
3230   }
3231 }
3232 
3233 void BinaryFunction::dumpGraphForPass(std::string Annotation) const {
3234   if (!opts::shouldPrint(*this))
3235     return;
3236 
3237   std::string Filename = constructFilename(getPrintName(), Annotation, ".dot");
3238   if (opts::Verbosity >= 1)
3239     BC.outs() << "BOLT-INFO: dumping CFG to " << Filename << "\n";
3240   dumpGraphToFile(Filename);
3241 }
3242 
3243 void BinaryFunction::dumpGraphToFile(std::string Filename) const {
3244   std::error_code EC;
3245   raw_fd_ostream of(Filename, EC, sys::fs::OF_None);
3246   if (EC) {
3247     if (opts::Verbosity >= 1) {
3248       BC.errs() << "BOLT-WARNING: " << EC.message() << ", unable to open "
3249                 << Filename << " for output.\n";
3250     }
3251     return;
3252   }
3253   dumpGraph(of);
3254 }
3255 
3256 bool BinaryFunction::validateCFG() const {
3257   // Skip the validation of CFG after it is finalized
3258   if (CurrentState == State::CFG_Finalized)
3259     return true;
3260 
3261   for (BinaryBasicBlock *BB : BasicBlocks)
3262     if (!BB->validateSuccessorInvariants())
3263       return false;
3264 
3265   // Make sure all blocks in CFG are valid.
3266   auto validateBlock = [this](const BinaryBasicBlock *BB, StringRef Desc) {
3267     if (!BB->isValid()) {
3268       BC.errs() << "BOLT-ERROR: deleted " << Desc << " " << BB->getName()
3269                 << " detected in:\n";
3270       this->dump();
3271       return false;
3272     }
3273     return true;
3274   };
3275   for (const BinaryBasicBlock *BB : BasicBlocks) {
3276     if (!validateBlock(BB, "block"))
3277       return false;
3278     for (const BinaryBasicBlock *PredBB : BB->predecessors())
3279       if (!validateBlock(PredBB, "predecessor"))
3280         return false;
3281     for (const BinaryBasicBlock *SuccBB : BB->successors())
3282       if (!validateBlock(SuccBB, "successor"))
3283         return false;
3284     for (const BinaryBasicBlock *LP : BB->landing_pads())
3285       if (!validateBlock(LP, "landing pad"))
3286         return false;
3287     for (const BinaryBasicBlock *Thrower : BB->throwers())
3288       if (!validateBlock(Thrower, "thrower"))
3289         return false;
3290   }
3291 
3292   for (const BinaryBasicBlock *BB : BasicBlocks) {
3293     std::unordered_set<const BinaryBasicBlock *> BBLandingPads;
3294     for (const BinaryBasicBlock *LP : BB->landing_pads()) {
3295       if (BBLandingPads.count(LP)) {
3296         BC.errs() << "BOLT-ERROR: duplicate landing pad detected in"
3297                   << BB->getName() << " in function " << *this << '\n';
3298         return false;
3299       }
3300       BBLandingPads.insert(LP);
3301     }
3302 
3303     std::unordered_set<const BinaryBasicBlock *> BBThrowers;
3304     for (const BinaryBasicBlock *Thrower : BB->throwers()) {
3305       if (BBThrowers.count(Thrower)) {
3306         BC.errs() << "BOLT-ERROR: duplicate thrower detected in"
3307                   << BB->getName() << " in function " << *this << '\n';
3308         return false;
3309       }
3310       BBThrowers.insert(Thrower);
3311     }
3312 
3313     for (const BinaryBasicBlock *LPBlock : BB->landing_pads()) {
3314       if (!llvm::is_contained(LPBlock->throwers(), BB)) {
3315         BC.errs() << "BOLT-ERROR: inconsistent landing pad detected in "
3316                   << *this << ": " << BB->getName()
3317                   << " is in LandingPads but not in " << LPBlock->getName()
3318                   << " Throwers\n";
3319         return false;
3320       }
3321     }
3322     for (const BinaryBasicBlock *Thrower : BB->throwers()) {
3323       if (!llvm::is_contained(Thrower->landing_pads(), BB)) {
3324         BC.errs() << "BOLT-ERROR: inconsistent thrower detected in " << *this
3325                   << ": " << BB->getName() << " is in Throwers list but not in "
3326                   << Thrower->getName() << " LandingPads\n";
3327         return false;
3328       }
3329     }
3330   }
3331 
3332   return true;
3333 }
3334 
3335 void BinaryFunction::fixBranches() {
3336   auto &MIB = BC.MIB;
3337   MCContext *Ctx = BC.Ctx.get();
3338 
3339   for (BinaryBasicBlock *BB : BasicBlocks) {
3340     const MCSymbol *TBB = nullptr;
3341     const MCSymbol *FBB = nullptr;
3342     MCInst *CondBranch = nullptr;
3343     MCInst *UncondBranch = nullptr;
3344     if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch))
3345       continue;
3346 
3347     // We will create unconditional branch with correct destination if needed.
3348     if (UncondBranch)
3349       BB->eraseInstruction(BB->findInstruction(UncondBranch));
3350 
3351     // Basic block that follows the current one in the final layout.
3352     const BinaryBasicBlock *const NextBB =
3353         Layout.getBasicBlockAfter(BB, /*IgnoreSplits=*/false);
3354 
3355     if (BB->succ_size() == 1) {
3356       // __builtin_unreachable() could create a conditional branch that
3357       // falls-through into the next function - hence the block will have only
3358       // one valid successor. Since behaviour is undefined - we replace
3359       // the conditional branch with an unconditional if required.
3360       if (CondBranch)
3361         BB->eraseInstruction(BB->findInstruction(CondBranch));
3362       if (BB->getSuccessor() == NextBB)
3363         continue;
3364       BB->addBranchInstruction(BB->getSuccessor());
3365     } else if (BB->succ_size() == 2) {
3366       assert(CondBranch && "conditional branch expected");
3367       const BinaryBasicBlock *TSuccessor = BB->getConditionalSuccessor(true);
3368       const BinaryBasicBlock *FSuccessor = BB->getConditionalSuccessor(false);
3369 
3370       // Eliminate unnecessary conditional branch.
3371       if (TSuccessor == FSuccessor) {
3372         // FIXME: at the moment, we cannot safely remove static key branches.
3373         if (MIB->isDynamicBranch(*CondBranch)) {
3374           if (opts::Verbosity) {
3375             BC.outs()
3376                 << "BOLT-INFO: unable to remove redundant dynamic branch in "
3377                 << *this << '\n';
3378           }
3379           continue;
3380         }
3381 
3382         BB->removeDuplicateConditionalSuccessor(CondBranch);
3383         if (TSuccessor != NextBB)
3384           BB->addBranchInstruction(TSuccessor);
3385         continue;
3386       }
3387 
3388       // Reverse branch condition and swap successors.
3389       auto swapSuccessors = [&]() {
3390         if (!MIB->isReversibleBranch(*CondBranch)) {
3391           if (opts::Verbosity) {
3392             BC.outs() << "BOLT-INFO: unable to swap successors in " << *this
3393                       << '\n';
3394           }
3395           return false;
3396         }
3397         std::swap(TSuccessor, FSuccessor);
3398         BB->swapConditionalSuccessors();
3399         auto L = BC.scopeLock();
3400         MIB->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(), Ctx);
3401         return true;
3402       };
3403 
3404       // Check whether the next block is a "taken" target and try to swap it
3405       // with a "fall-through" target.
3406       if (TSuccessor == NextBB && swapSuccessors())
3407         continue;
3408 
3409       // Update conditional branch destination if needed.
3410       if (MIB->getTargetSymbol(*CondBranch) != TSuccessor->getLabel()) {
3411         auto L = BC.scopeLock();
3412         MIB->replaceBranchTarget(*CondBranch, TSuccessor->getLabel(), Ctx);
3413       }
3414 
3415       // No need for the unconditional branch.
3416       if (FSuccessor == NextBB)
3417         continue;
3418 
3419       if (BC.isX86()) {
3420         // We are going to generate two branches. Check if their targets are in
3421         // the same fragment as this block. If only one target is in the same
3422         // fragment, make it the destination of the conditional branch. There
3423         // is a chance it will be a short branch which takes 4 bytes fewer than
3424         // a long conditional branch. For unconditional branch, the difference
3425         // is 3 bytes.
3426         if (BB->getFragmentNum() != TSuccessor->getFragmentNum() &&
3427             BB->getFragmentNum() == FSuccessor->getFragmentNum())
3428           swapSuccessors();
3429       }
3430 
3431       BB->addBranchInstruction(FSuccessor);
3432     }
3433     // Cases where the number of successors is 0 (block ends with a
3434     // terminator) or more than 2 (switch table) don't require branch
3435     // instruction adjustments.
3436   }
3437   assert((!isSimple() || validateCFG()) &&
3438          "Invalid CFG detected after fixing branches");
3439 }
3440 
3441 void BinaryFunction::propagateGnuArgsSizeInfo(
3442     MCPlusBuilder::AllocatorIdTy AllocId) {
3443   assert(CurrentState == State::Disassembled && "unexpected function state");
3444 
3445   if (!hasEHRanges() || !usesGnuArgsSize())
3446     return;
3447 
3448   // The current value of DW_CFA_GNU_args_size affects all following
3449   // invoke instructions until the next CFI overrides it.
3450   // It is important to iterate basic blocks in the original order when
3451   // assigning the value.
3452   uint64_t CurrentGnuArgsSize = 0;
3453   for (BinaryBasicBlock *BB : BasicBlocks) {
3454     for (auto II = BB->begin(); II != BB->end();) {
3455       MCInst &Instr = *II;
3456       if (BC.MIB->isCFI(Instr)) {
3457         const MCCFIInstruction *CFI = getCFIFor(Instr);
3458         if (CFI->getOperation() == MCCFIInstruction::OpGnuArgsSize) {
3459           CurrentGnuArgsSize = CFI->getOffset();
3460           // Delete DW_CFA_GNU_args_size instructions and only regenerate
3461           // during the final code emission. The information is embedded
3462           // inside call instructions.
3463           II = BB->erasePseudoInstruction(II);
3464           continue;
3465         }
3466       } else if (BC.MIB->isInvoke(Instr)) {
3467         // Add the value of GNU_args_size as an extra operand to invokes.
3468         BC.MIB->addGnuArgsSize(Instr, CurrentGnuArgsSize);
3469       }
3470       ++II;
3471     }
3472   }
3473 }
3474 
3475 void BinaryFunction::postProcessBranches() {
3476   if (!isSimple())
3477     return;
3478   for (BinaryBasicBlock &BB : blocks()) {
3479     auto LastInstrRI = BB.getLastNonPseudo();
3480     if (BB.succ_size() == 1) {
3481       if (LastInstrRI != BB.rend() &&
3482           BC.MIB->isConditionalBranch(*LastInstrRI)) {
3483         // __builtin_unreachable() could create a conditional branch that
3484         // falls-through into the next function - hence the block will have only
3485         // one valid successor. Such behaviour is undefined and thus we remove
3486         // the conditional branch while leaving a valid successor.
3487         BB.eraseInstruction(std::prev(LastInstrRI.base()));
3488         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: erasing conditional branch in "
3489                           << BB.getName() << " in function " << *this << '\n');
3490       }
3491     } else if (BB.succ_size() == 0) {
3492       // Ignore unreachable basic blocks.
3493       if (BB.pred_size() == 0 || BB.isLandingPad())
3494         continue;
3495 
3496       // If it's the basic block that does not end up with a terminator - we
3497       // insert a return instruction unless it's a call instruction.
3498       if (LastInstrRI == BB.rend()) {
3499         LLVM_DEBUG(
3500             dbgs() << "BOLT-DEBUG: at least one instruction expected in BB "
3501                    << BB.getName() << " in function " << *this << '\n');
3502         continue;
3503       }
3504       if (!BC.MIB->isTerminator(*LastInstrRI) &&
3505           !BC.MIB->isCall(*LastInstrRI)) {
3506         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding return to basic block "
3507                           << BB.getName() << " in function " << *this << '\n');
3508         MCInst ReturnInstr;
3509         BC.MIB->createReturn(ReturnInstr);
3510         BB.addInstruction(ReturnInstr);
3511       }
3512     }
3513   }
3514   assert(validateCFG() && "invalid CFG");
3515 }
3516 
3517 MCSymbol *BinaryFunction::addEntryPointAtOffset(uint64_t Offset) {
3518   assert(Offset && "cannot add primary entry point");
3519   assert(CurrentState == State::Empty || CurrentState == State::Disassembled);
3520 
3521   const uint64_t EntryPointAddress = getAddress() + Offset;
3522   MCSymbol *LocalSymbol = getOrCreateLocalLabel(EntryPointAddress);
3523 
3524   MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(LocalSymbol);
3525   if (EntrySymbol)
3526     return EntrySymbol;
3527 
3528   if (BinaryData *EntryBD = BC.getBinaryDataAtAddress(EntryPointAddress)) {
3529     EntrySymbol = EntryBD->getSymbol();
3530   } else {
3531     EntrySymbol = BC.getOrCreateGlobalSymbol(
3532         EntryPointAddress, Twine("__ENTRY_") + getOneName() + "@");
3533   }
3534   SecondaryEntryPoints[LocalSymbol] = EntrySymbol;
3535 
3536   BC.setSymbolToFunctionMap(EntrySymbol, this);
3537 
3538   return EntrySymbol;
3539 }
3540 
3541 MCSymbol *BinaryFunction::addEntryPoint(const BinaryBasicBlock &BB) {
3542   assert(CurrentState == State::CFG &&
3543          "basic block can be added as an entry only in a function with CFG");
3544 
3545   if (&BB == BasicBlocks.front())
3546     return getSymbol();
3547 
3548   MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(BB);
3549   if (EntrySymbol)
3550     return EntrySymbol;
3551 
3552   EntrySymbol =
3553       BC.Ctx->getOrCreateSymbol("__ENTRY_" + BB.getLabel()->getName());
3554 
3555   SecondaryEntryPoints[BB.getLabel()] = EntrySymbol;
3556 
3557   BC.setSymbolToFunctionMap(EntrySymbol, this);
3558 
3559   return EntrySymbol;
3560 }
3561 
3562 MCSymbol *BinaryFunction::getSymbolForEntryID(uint64_t EntryID) {
3563   if (EntryID == 0)
3564     return getSymbol();
3565 
3566   if (!isMultiEntry())
3567     return nullptr;
3568 
3569   uint64_t NumEntries = 1;
3570   if (hasCFG()) {
3571     for (BinaryBasicBlock *BB : BasicBlocks) {
3572       MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
3573       if (!EntrySymbol)
3574         continue;
3575       if (NumEntries == EntryID)
3576         return EntrySymbol;
3577       ++NumEntries;
3578     }
3579   } else {
3580     for (std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3581       MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3582       if (!EntrySymbol)
3583         continue;
3584       if (NumEntries == EntryID)
3585         return EntrySymbol;
3586       ++NumEntries;
3587     }
3588   }
3589 
3590   return nullptr;
3591 }
3592 
3593 uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
3594   if (!isMultiEntry())
3595     return 0;
3596 
3597   for (const MCSymbol *FunctionSymbol : getSymbols())
3598     if (FunctionSymbol == Symbol)
3599       return 0;
3600 
3601   // Check all secondary entries available as either basic blocks or lables.
3602   uint64_t NumEntries = 1;
3603   for (const BinaryBasicBlock *BB : BasicBlocks) {
3604     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
3605     if (!EntrySymbol)
3606       continue;
3607     if (EntrySymbol == Symbol)
3608       return NumEntries;
3609     ++NumEntries;
3610   }
3611   NumEntries = 1;
3612   for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3613     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3614     if (!EntrySymbol)
3615       continue;
3616     if (EntrySymbol == Symbol)
3617       return NumEntries;
3618     ++NumEntries;
3619   }
3620 
3621   llvm_unreachable("symbol not found");
3622 }
3623 
3624 bool BinaryFunction::forEachEntryPoint(EntryPointCallbackTy Callback) const {
3625   bool Status = Callback(0, getSymbol());
3626   if (!isMultiEntry())
3627     return Status;
3628 
3629   for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3630     if (!Status)
3631       break;
3632 
3633     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3634     if (!EntrySymbol)
3635       continue;
3636 
3637     Status = Callback(KV.first, EntrySymbol);
3638   }
3639 
3640   return Status;
3641 }
3642 
3643 BinaryFunction::BasicBlockListType BinaryFunction::dfs() const {
3644   BasicBlockListType DFS;
3645   std::stack<BinaryBasicBlock *> Stack;
3646   std::set<BinaryBasicBlock *> Visited;
3647 
3648   // Push entry points to the stack in reverse order.
3649   //
3650   // NB: we rely on the original order of entries to match.
3651   SmallVector<BinaryBasicBlock *> EntryPoints;
3652   llvm::copy_if(BasicBlocks, std::back_inserter(EntryPoints),
3653           [&](const BinaryBasicBlock *const BB) { return isEntryPoint(*BB); });
3654   // Sort entry points by their offset to make sure we got them in the right
3655   // order.
3656   llvm::stable_sort(EntryPoints, [](const BinaryBasicBlock *const A,
3657                               const BinaryBasicBlock *const B) {
3658     return A->getOffset() < B->getOffset();
3659   });
3660   for (BinaryBasicBlock *const BB : reverse(EntryPoints))
3661     Stack.push(BB);
3662 
3663   while (!Stack.empty()) {
3664     BinaryBasicBlock *BB = Stack.top();
3665     Stack.pop();
3666 
3667     if (Visited.find(BB) != Visited.end())
3668       continue;
3669     Visited.insert(BB);
3670     DFS.push_back(BB);
3671 
3672     for (BinaryBasicBlock *SuccBB : BB->landing_pads()) {
3673       Stack.push(SuccBB);
3674     }
3675 
3676     const MCSymbol *TBB = nullptr;
3677     const MCSymbol *FBB = nullptr;
3678     MCInst *CondBranch = nullptr;
3679     MCInst *UncondBranch = nullptr;
3680     if (BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch) && CondBranch &&
3681         BB->succ_size() == 2) {
3682       if (BC.MIB->getCanonicalBranchCondCode(BC.MIB->getCondCode(
3683               *CondBranch)) == BC.MIB->getCondCode(*CondBranch)) {
3684         Stack.push(BB->getConditionalSuccessor(true));
3685         Stack.push(BB->getConditionalSuccessor(false));
3686       } else {
3687         Stack.push(BB->getConditionalSuccessor(false));
3688         Stack.push(BB->getConditionalSuccessor(true));
3689       }
3690     } else {
3691       for (BinaryBasicBlock *SuccBB : BB->successors()) {
3692         Stack.push(SuccBB);
3693       }
3694     }
3695   }
3696 
3697   return DFS;
3698 }
3699 
3700 size_t BinaryFunction::computeHash(bool UseDFS, HashFunction HashFunction,
3701                                    OperandHashFuncTy OperandHashFunc) const {
3702   LLVM_DEBUG({
3703     dbgs() << "BOLT-DEBUG: computeHash " << getPrintName() << ' '
3704            << (UseDFS ? "dfs" : "bin") << " order "
3705            << (HashFunction == HashFunction::StdHash ? "std::hash" : "xxh3")
3706            << '\n';
3707   });
3708 
3709   if (size() == 0)
3710     return 0;
3711 
3712   assert(hasCFG() && "function is expected to have CFG");
3713 
3714   SmallVector<const BinaryBasicBlock *, 0> Order;
3715   if (UseDFS)
3716     llvm::copy(dfs(), std::back_inserter(Order));
3717   else
3718     llvm::copy(Layout.blocks(), std::back_inserter(Order));
3719 
3720   // The hash is computed by creating a string of all instruction opcodes and
3721   // possibly their operands and then hashing that string with std::hash.
3722   std::string HashString;
3723   for (const BinaryBasicBlock *BB : Order)
3724     HashString.append(hashBlock(BC, *BB, OperandHashFunc));
3725 
3726   switch (HashFunction) {
3727   case HashFunction::StdHash:
3728     return Hash = std::hash<std::string>{}(HashString);
3729   case HashFunction::XXH3:
3730     return Hash = llvm::xxh3_64bits(HashString);
3731   }
3732   llvm_unreachable("Unhandled HashFunction");
3733 }
3734 
3735 void BinaryFunction::insertBasicBlocks(
3736     BinaryBasicBlock *Start,
3737     std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
3738     const bool UpdateLayout, const bool UpdateCFIState,
3739     const bool RecomputeLandingPads) {
3740   const int64_t StartIndex = Start ? getIndex(Start) : -1LL;
3741   const size_t NumNewBlocks = NewBBs.size();
3742 
3743   BasicBlocks.insert(BasicBlocks.begin() + (StartIndex + 1), NumNewBlocks,
3744                      nullptr);
3745 
3746   int64_t I = StartIndex + 1;
3747   for (std::unique_ptr<BinaryBasicBlock> &BB : NewBBs) {
3748     assert(!BasicBlocks[I]);
3749     BasicBlocks[I++] = BB.release();
3750   }
3751 
3752   if (RecomputeLandingPads)
3753     recomputeLandingPads();
3754   else
3755     updateBBIndices(0);
3756 
3757   if (UpdateLayout)
3758     updateLayout(Start, NumNewBlocks);
3759 
3760   if (UpdateCFIState)
3761     updateCFIState(Start, NumNewBlocks);
3762 }
3763 
3764 BinaryFunction::iterator BinaryFunction::insertBasicBlocks(
3765     BinaryFunction::iterator StartBB,
3766     std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
3767     const bool UpdateLayout, const bool UpdateCFIState,
3768     const bool RecomputeLandingPads) {
3769   const unsigned StartIndex = getIndex(&*StartBB);
3770   const size_t NumNewBlocks = NewBBs.size();
3771 
3772   BasicBlocks.insert(BasicBlocks.begin() + StartIndex + 1, NumNewBlocks,
3773                      nullptr);
3774   auto RetIter = BasicBlocks.begin() + StartIndex + 1;
3775 
3776   unsigned I = StartIndex + 1;
3777   for (std::unique_ptr<BinaryBasicBlock> &BB : NewBBs) {
3778     assert(!BasicBlocks[I]);
3779     BasicBlocks[I++] = BB.release();
3780   }
3781 
3782   if (RecomputeLandingPads)
3783     recomputeLandingPads();
3784   else
3785     updateBBIndices(0);
3786 
3787   if (UpdateLayout)
3788     updateLayout(*std::prev(RetIter), NumNewBlocks);
3789 
3790   if (UpdateCFIState)
3791     updateCFIState(*std::prev(RetIter), NumNewBlocks);
3792 
3793   return RetIter;
3794 }
3795 
3796 void BinaryFunction::updateBBIndices(const unsigned StartIndex) {
3797   for (unsigned I = StartIndex; I < BasicBlocks.size(); ++I)
3798     BasicBlocks[I]->Index = I;
3799 }
3800 
3801 void BinaryFunction::updateCFIState(BinaryBasicBlock *Start,
3802                                     const unsigned NumNewBlocks) {
3803   const int32_t CFIState = Start->getCFIStateAtExit();
3804   const unsigned StartIndex = getIndex(Start) + 1;
3805   for (unsigned I = 0; I < NumNewBlocks; ++I)
3806     BasicBlocks[StartIndex + I]->setCFIState(CFIState);
3807 }
3808 
3809 void BinaryFunction::updateLayout(BinaryBasicBlock *Start,
3810                                   const unsigned NumNewBlocks) {
3811   BasicBlockListType::iterator Begin;
3812   BasicBlockListType::iterator End;
3813 
3814   // If start not provided copy new blocks from the beginning of BasicBlocks
3815   if (!Start) {
3816     Begin = BasicBlocks.begin();
3817     End = BasicBlocks.begin() + NumNewBlocks;
3818   } else {
3819     unsigned StartIndex = getIndex(Start);
3820     Begin = std::next(BasicBlocks.begin(), StartIndex + 1);
3821     End = std::next(BasicBlocks.begin(), StartIndex + NumNewBlocks + 1);
3822   }
3823 
3824   // Insert new blocks in the layout immediately after Start.
3825   Layout.insertBasicBlocks(Start, {Begin, End});
3826   Layout.updateLayoutIndices();
3827 }
3828 
3829 bool BinaryFunction::checkForAmbiguousJumpTables() {
3830   SmallSet<uint64_t, 4> JumpTables;
3831   for (BinaryBasicBlock *&BB : BasicBlocks) {
3832     for (MCInst &Inst : *BB) {
3833       if (!BC.MIB->isIndirectBranch(Inst))
3834         continue;
3835       uint64_t JTAddress = BC.MIB->getJumpTable(Inst);
3836       if (!JTAddress)
3837         continue;
3838       // This address can be inside another jump table, but we only consider
3839       // it ambiguous when the same start address is used, not the same JT
3840       // object.
3841       if (!JumpTables.count(JTAddress)) {
3842         JumpTables.insert(JTAddress);
3843         continue;
3844       }
3845       return true;
3846     }
3847   }
3848   return false;
3849 }
3850 
3851 void BinaryFunction::disambiguateJumpTables(
3852     MCPlusBuilder::AllocatorIdTy AllocId) {
3853   assert((opts::JumpTables != JTS_BASIC && isSimple()) || !BC.HasRelocations);
3854   SmallPtrSet<JumpTable *, 4> JumpTables;
3855   for (BinaryBasicBlock *&BB : BasicBlocks) {
3856     for (MCInst &Inst : *BB) {
3857       if (!BC.MIB->isIndirectBranch(Inst))
3858         continue;
3859       JumpTable *JT = getJumpTable(Inst);
3860       if (!JT)
3861         continue;
3862       auto Iter = JumpTables.find(JT);
3863       if (Iter == JumpTables.end()) {
3864         JumpTables.insert(JT);
3865         continue;
3866       }
3867       // This instruction is an indirect jump using a jump table, but it is
3868       // using the same jump table of another jump. Try all our tricks to
3869       // extract the jump table symbol and make it point to a new, duplicated JT
3870       MCPhysReg BaseReg1;
3871       uint64_t Scale;
3872       const MCSymbol *Target;
3873       // In case we match if our first matcher, first instruction is the one to
3874       // patch
3875       MCInst *JTLoadInst = &Inst;
3876       // Try a standard indirect jump matcher, scale 8
3877       std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher =
3878           BC.MIB->matchIndJmp(BC.MIB->matchReg(BaseReg1),
3879                               BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3880                               /*Offset=*/BC.MIB->matchSymbol(Target));
3881       if (!IndJmpMatcher->match(
3882               *BC.MRI, *BC.MIB,
3883               MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3884           BaseReg1 != BC.MIB->getNoRegister() || Scale != 8) {
3885         MCPhysReg BaseReg2;
3886         uint64_t Offset;
3887         // Standard JT matching failed. Trying now:
3888         //     movq  "jt.2397/1"(,%rax,8), %rax
3889         //     jmpq  *%rax
3890         std::unique_ptr<MCPlusBuilder::MCInstMatcher> LoadMatcherOwner =
3891             BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg1),
3892                               BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3893                               /*Offset=*/BC.MIB->matchSymbol(Target));
3894         MCPlusBuilder::MCInstMatcher *LoadMatcher = LoadMatcherOwner.get();
3895         std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher2 =
3896             BC.MIB->matchIndJmp(std::move(LoadMatcherOwner));
3897         if (!IndJmpMatcher2->match(
3898                 *BC.MRI, *BC.MIB,
3899                 MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3900             BaseReg1 != BC.MIB->getNoRegister() || Scale != 8) {
3901           // JT matching failed. Trying now:
3902           // PIC-style matcher, scale 4
3903           //    addq    %rdx, %rsi
3904           //    addq    %rdx, %rdi
3905           //    leaq    DATAat0x402450(%rip), %r11
3906           //    movslq  (%r11,%rdx,4), %rcx
3907           //    addq    %r11, %rcx
3908           //    jmpq    *%rcx # JUMPTABLE @0x402450
3909           std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICIndJmpMatcher =
3910               BC.MIB->matchIndJmp(BC.MIB->matchAdd(
3911                   BC.MIB->matchReg(BaseReg1),
3912                   BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg2),
3913                                     BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3914                                     BC.MIB->matchImm(Offset))));
3915           std::unique_ptr<MCPlusBuilder::MCInstMatcher> LEAMatcherOwner =
3916               BC.MIB->matchLoadAddr(BC.MIB->matchSymbol(Target));
3917           MCPlusBuilder::MCInstMatcher *LEAMatcher = LEAMatcherOwner.get();
3918           std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICBaseAddrMatcher =
3919               BC.MIB->matchIndJmp(BC.MIB->matchAdd(std::move(LEAMatcherOwner),
3920                                                    BC.MIB->matchAnyOperand()));
3921           if (!PICIndJmpMatcher->match(
3922                   *BC.MRI, *BC.MIB,
3923                   MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3924               Scale != 4 || BaseReg1 != BaseReg2 || Offset != 0 ||
3925               !PICBaseAddrMatcher->match(
3926                   *BC.MRI, *BC.MIB,
3927                   MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1)) {
3928             llvm_unreachable("Failed to extract jump table base");
3929             continue;
3930           }
3931           // Matched PIC, identify the instruction with the reference to the JT
3932           JTLoadInst = LEAMatcher->CurInst;
3933         } else {
3934           // Matched non-PIC
3935           JTLoadInst = LoadMatcher->CurInst;
3936         }
3937       }
3938 
3939       uint64_t NewJumpTableID = 0;
3940       const MCSymbol *NewJTLabel;
3941       std::tie(NewJumpTableID, NewJTLabel) =
3942           BC.duplicateJumpTable(*this, JT, Target);
3943       {
3944         auto L = BC.scopeLock();
3945         BC.MIB->replaceMemOperandDisp(*JTLoadInst, NewJTLabel, BC.Ctx.get());
3946       }
3947       // We use a unique ID with the high bit set as address for this "injected"
3948       // jump table (not originally in the input binary).
3949       BC.MIB->setJumpTable(Inst, NewJumpTableID, 0, AllocId);
3950     }
3951   }
3952 }
3953 
3954 bool BinaryFunction::replaceJumpTableEntryIn(BinaryBasicBlock *BB,
3955                                              BinaryBasicBlock *OldDest,
3956                                              BinaryBasicBlock *NewDest) {
3957   MCInst *Instr = BB->getLastNonPseudoInstr();
3958   if (!Instr || !BC.MIB->isIndirectBranch(*Instr))
3959     return false;
3960   uint64_t JTAddress = BC.MIB->getJumpTable(*Instr);
3961   assert(JTAddress && "Invalid jump table address");
3962   JumpTable *JT = getJumpTableContainingAddress(JTAddress);
3963   assert(JT && "No jump table structure for this indirect branch");
3964   bool Patched = JT->replaceDestination(JTAddress, OldDest->getLabel(),
3965                                         NewDest->getLabel());
3966   (void)Patched;
3967   assert(Patched && "Invalid entry to be replaced in jump table");
3968   return true;
3969 }
3970 
3971 BinaryBasicBlock *BinaryFunction::splitEdge(BinaryBasicBlock *From,
3972                                             BinaryBasicBlock *To) {
3973   // Create intermediate BB
3974   MCSymbol *Tmp;
3975   {
3976     auto L = BC.scopeLock();
3977     Tmp = BC.Ctx->createNamedTempSymbol("SplitEdge");
3978   }
3979   // Link new BBs to the original input offset of the From BB, so we can map
3980   // samples recorded in new BBs back to the original BB seem in the input
3981   // binary (if using BAT)
3982   std::unique_ptr<BinaryBasicBlock> NewBB = createBasicBlock(Tmp);
3983   NewBB->setOffset(From->getInputOffset());
3984   BinaryBasicBlock *NewBBPtr = NewBB.get();
3985 
3986   // Update "From" BB
3987   auto I = From->succ_begin();
3988   auto BI = From->branch_info_begin();
3989   for (; I != From->succ_end(); ++I) {
3990     if (*I == To)
3991       break;
3992     ++BI;
3993   }
3994   assert(I != From->succ_end() && "Invalid CFG edge in splitEdge!");
3995   uint64_t OrigCount = BI->Count;
3996   uint64_t OrigMispreds = BI->MispredictedCount;
3997   replaceJumpTableEntryIn(From, To, NewBBPtr);
3998   From->replaceSuccessor(To, NewBBPtr, OrigCount, OrigMispreds);
3999 
4000   NewBB->addSuccessor(To, OrigCount, OrigMispreds);
4001   NewBB->setExecutionCount(OrigCount);
4002   NewBB->setIsCold(From->isCold());
4003 
4004   // Update CFI and BB layout with new intermediate BB
4005   std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs;
4006   NewBBs.emplace_back(std::move(NewBB));
4007   insertBasicBlocks(From, std::move(NewBBs), true, true,
4008                     /*RecomputeLandingPads=*/false);
4009   return NewBBPtr;
4010 }
4011 
4012 void BinaryFunction::deleteConservativeEdges() {
4013   // Our goal is to aggressively remove edges from the CFG that we believe are
4014   // wrong. This is used for instrumentation, where it is safe to remove
4015   // fallthrough edges because we won't reorder blocks.
4016   for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) {
4017     BinaryBasicBlock *BB = *I;
4018     if (BB->succ_size() != 1 || BB->size() == 0)
4019       continue;
4020 
4021     auto NextBB = std::next(I);
4022     MCInst *Last = BB->getLastNonPseudoInstr();
4023     // Fallthrough is a landing pad? Delete this edge (as long as we don't
4024     // have a direct jump to it)
4025     if ((*BB->succ_begin())->isLandingPad() && NextBB != E &&
4026         *BB->succ_begin() == *NextBB && Last && !BC.MIB->isBranch(*Last)) {
4027       BB->removeAllSuccessors();
4028       continue;
4029     }
4030 
4031     // Look for suspicious calls at the end of BB where gcc may optimize it and
4032     // remove the jump to the epilogue when it knows the call won't return.
4033     if (!Last || !BC.MIB->isCall(*Last))
4034       continue;
4035 
4036     const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(*Last);
4037     if (!CalleeSymbol)
4038       continue;
4039 
4040     StringRef CalleeName = CalleeSymbol->getName();
4041     if (CalleeName != "__cxa_throw@PLT" && CalleeName != "_Unwind_Resume@PLT" &&
4042         CalleeName != "__cxa_rethrow@PLT" && CalleeName != "exit@PLT" &&
4043         CalleeName != "abort@PLT")
4044       continue;
4045 
4046     BB->removeAllSuccessors();
4047   }
4048 }
4049 
4050 bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
4051                                           uint64_t SymbolSize) const {
4052   // If this symbol is in a different section from the one where the
4053   // function symbol is, don't consider it as valid.
4054   if (!getOriginSection()->containsAddress(
4055           cantFail(Symbol.getAddress(), "cannot get symbol address")))
4056     return false;
4057 
4058   // Some symbols are tolerated inside function bodies, others are not.
4059   // The real function boundaries may not be known at this point.
4060   if (BC.isMarker(Symbol))
4061     return true;
4062 
4063   // It's okay to have a zero-sized symbol in the middle of non-zero-sized
4064   // function.
4065   if (SymbolSize == 0 && containsAddress(cantFail(Symbol.getAddress())))
4066     return true;
4067 
4068   if (cantFail(Symbol.getType()) != SymbolRef::ST_Unknown)
4069     return false;
4070 
4071   if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)
4072     return false;
4073 
4074   return true;
4075 }
4076 
4077 void BinaryFunction::adjustExecutionCount(uint64_t Count) {
4078   if (getKnownExecutionCount() == 0 || Count == 0)
4079     return;
4080 
4081   if (ExecutionCount < Count)
4082     Count = ExecutionCount;
4083 
4084   double AdjustmentRatio = ((double)ExecutionCount - Count) / ExecutionCount;
4085   if (AdjustmentRatio < 0.0)
4086     AdjustmentRatio = 0.0;
4087 
4088   for (BinaryBasicBlock &BB : blocks())
4089     BB.adjustExecutionCount(AdjustmentRatio);
4090 
4091   ExecutionCount -= Count;
4092 }
4093 
4094 BinaryFunction::~BinaryFunction() {
4095   for (BinaryBasicBlock *BB : BasicBlocks)
4096     delete BB;
4097   for (BinaryBasicBlock *BB : DeletedBasicBlocks)
4098     delete BB;
4099 }
4100 
4101 void BinaryFunction::constructDomTree() {
4102   BDT.reset(new BinaryDominatorTree);
4103   BDT->recalculate(*this);
4104 }
4105 
4106 void BinaryFunction::calculateLoopInfo() {
4107   if (!hasDomTree())
4108     constructDomTree();
4109   // Discover loops.
4110   BLI.reset(new BinaryLoopInfo());
4111   BLI->analyze(getDomTree());
4112 
4113   // Traverse discovered loops and add depth and profile information.
4114   std::stack<BinaryLoop *> St;
4115   for (auto I = BLI->begin(), E = BLI->end(); I != E; ++I) {
4116     St.push(*I);
4117     ++BLI->OuterLoops;
4118   }
4119 
4120   while (!St.empty()) {
4121     BinaryLoop *L = St.top();
4122     St.pop();
4123     ++BLI->TotalLoops;
4124     BLI->MaximumDepth = std::max(L->getLoopDepth(), BLI->MaximumDepth);
4125 
4126     // Add nested loops in the stack.
4127     for (BinaryLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
4128       St.push(*I);
4129 
4130     // Skip if no valid profile is found.
4131     if (!hasValidProfile()) {
4132       L->EntryCount = COUNT_NO_PROFILE;
4133       L->ExitCount = COUNT_NO_PROFILE;
4134       L->TotalBackEdgeCount = COUNT_NO_PROFILE;
4135       continue;
4136     }
4137 
4138     // Compute back edge count.
4139     SmallVector<BinaryBasicBlock *, 1> Latches;
4140     L->getLoopLatches(Latches);
4141 
4142     for (BinaryBasicBlock *Latch : Latches) {
4143       auto BI = Latch->branch_info_begin();
4144       for (BinaryBasicBlock *Succ : Latch->successors()) {
4145         if (Succ == L->getHeader()) {
4146           assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
4147                  "profile data not found");
4148           L->TotalBackEdgeCount += BI->Count;
4149         }
4150         ++BI;
4151       }
4152     }
4153 
4154     // Compute entry count.
4155     L->EntryCount = L->getHeader()->getExecutionCount() - L->TotalBackEdgeCount;
4156 
4157     // Compute exit count.
4158     SmallVector<BinaryLoop::Edge, 1> ExitEdges;
4159     L->getExitEdges(ExitEdges);
4160     for (BinaryLoop::Edge &Exit : ExitEdges) {
4161       const BinaryBasicBlock *Exiting = Exit.first;
4162       const BinaryBasicBlock *ExitTarget = Exit.second;
4163       auto BI = Exiting->branch_info_begin();
4164       for (BinaryBasicBlock *Succ : Exiting->successors()) {
4165         if (Succ == ExitTarget) {
4166           assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
4167                  "profile data not found");
4168           L->ExitCount += BI->Count;
4169         }
4170         ++BI;
4171       }
4172     }
4173   }
4174 }
4175 
4176 void BinaryFunction::updateOutputValues(const BOLTLinker &Linker) {
4177   if (!isEmitted()) {
4178     assert(!isInjected() && "injected function should be emitted");
4179     setOutputAddress(getAddress());
4180     setOutputSize(getSize());
4181     return;
4182   }
4183 
4184   const auto SymbolInfo = Linker.lookupSymbolInfo(getSymbol()->getName());
4185   assert(SymbolInfo && "Cannot find function entry symbol");
4186   setOutputAddress(SymbolInfo->Address);
4187   setOutputSize(SymbolInfo->Size);
4188 
4189   if (BC.HasRelocations || isInjected()) {
4190     if (hasConstantIsland()) {
4191       const auto DataAddress =
4192           Linker.lookupSymbol(getFunctionConstantIslandLabel()->getName());
4193       assert(DataAddress && "Cannot find function CI symbol");
4194       setOutputDataAddress(*DataAddress);
4195       for (auto It : Islands->Offsets) {
4196         const uint64_t OldOffset = It.first;
4197         BinaryData *BD = BC.getBinaryDataAtAddress(getAddress() + OldOffset);
4198         if (!BD)
4199           continue;
4200 
4201         MCSymbol *Symbol = It.second;
4202         const auto NewAddress = Linker.lookupSymbol(Symbol->getName());
4203         assert(NewAddress && "Cannot find CI symbol");
4204         auto &Section = *getCodeSection();
4205         const auto NewOffset = *NewAddress - Section.getOutputAddress();
4206         BD->setOutputLocation(Section, NewOffset);
4207       }
4208     }
4209     if (isSplit()) {
4210       for (FunctionFragment &FF : getLayout().getSplitFragments()) {
4211         ErrorOr<BinarySection &> ColdSection =
4212             getCodeSection(FF.getFragmentNum());
4213         // If fragment is empty, cold section might not exist
4214         if (FF.empty() && ColdSection.getError())
4215           continue;
4216 
4217         const MCSymbol *ColdStartSymbol = getSymbol(FF.getFragmentNum());
4218         // If fragment is empty, symbol might have not been emitted
4219         if (FF.empty() && (!ColdStartSymbol || !ColdStartSymbol->isDefined()) &&
4220             !hasConstantIsland())
4221           continue;
4222         assert(ColdStartSymbol && ColdStartSymbol->isDefined() &&
4223                "split function should have defined cold symbol");
4224         const auto ColdStartSymbolInfo =
4225             Linker.lookupSymbolInfo(ColdStartSymbol->getName());
4226         assert(ColdStartSymbolInfo && "Cannot find cold start symbol");
4227         FF.setAddress(ColdStartSymbolInfo->Address);
4228         FF.setImageSize(ColdStartSymbolInfo->Size);
4229         if (hasConstantIsland()) {
4230           const auto DataAddress = Linker.lookupSymbol(
4231               getFunctionColdConstantIslandLabel()->getName());
4232           assert(DataAddress && "Cannot find cold CI symbol");
4233           setOutputColdDataAddress(*DataAddress);
4234         }
4235       }
4236     }
4237   }
4238 
4239   // Update basic block output ranges for the debug info, if we have
4240   // secondary entry points in the symbol table to update or if writing BAT.
4241   if (!requiresAddressMap())
4242     return;
4243 
4244   // AArch64 may have functions that only contains a constant island (no code).
4245   if (getLayout().block_empty())
4246     return;
4247 
4248   for (FunctionFragment &FF : getLayout().fragments()) {
4249     if (FF.empty())
4250       continue;
4251 
4252     const uint64_t FragmentBaseAddress =
4253         getCodeSection(isSimple() ? FF.getFragmentNum() : FragmentNum::main())
4254             ->getOutputAddress();
4255 
4256     BinaryBasicBlock *PrevBB = nullptr;
4257     for (BinaryBasicBlock *const BB : FF) {
4258       assert(BB->getLabel()->isDefined() && "symbol should be defined");
4259       if (!BC.HasRelocations) {
4260         if (BB->isSplit())
4261           assert(FragmentBaseAddress == FF.getAddress());
4262         else
4263           assert(FragmentBaseAddress == getOutputAddress());
4264         (void)FragmentBaseAddress;
4265       }
4266 
4267       // Injected functions likely will fail lookup, as they have no
4268       // input range. Just assign the BB the output address of the
4269       // function.
4270       auto MaybeBBAddress = BC.getIOAddressMap().lookup(BB->getLabel());
4271       const uint64_t BBAddress = MaybeBBAddress  ? *MaybeBBAddress
4272                                  : BB->isSplit() ? FF.getAddress()
4273                                                  : getOutputAddress();
4274       BB->setOutputStartAddress(BBAddress);
4275 
4276       if (PrevBB) {
4277         assert(PrevBB->getOutputAddressRange().first <= BBAddress &&
4278                "Bad output address for basic block.");
4279         assert((PrevBB->getOutputAddressRange().first != BBAddress ||
4280                 !hasInstructions() || !PrevBB->getNumNonPseudos()) &&
4281                "Bad output address for basic block.");
4282         PrevBB->setOutputEndAddress(BBAddress);
4283       }
4284       PrevBB = BB;
4285     }
4286 
4287     PrevBB->setOutputEndAddress(PrevBB->isSplit()
4288                                     ? FF.getAddress() + FF.getImageSize()
4289                                     : getOutputAddress() + getOutputSize());
4290   }
4291 
4292   // Reset output addresses for deleted blocks.
4293   for (BinaryBasicBlock *BB : DeletedBasicBlocks) {
4294     BB->setOutputStartAddress(0);
4295     BB->setOutputEndAddress(0);
4296   }
4297 }
4298 
4299 DebugAddressRangesVector BinaryFunction::getOutputAddressRanges() const {
4300   DebugAddressRangesVector OutputRanges;
4301 
4302   if (isFolded())
4303     return OutputRanges;
4304 
4305   if (IsFragment)
4306     return OutputRanges;
4307 
4308   OutputRanges.emplace_back(getOutputAddress(),
4309                             getOutputAddress() + getOutputSize());
4310   if (isSplit()) {
4311     assert(isEmitted() && "split function should be emitted");
4312     for (const FunctionFragment &FF : getLayout().getSplitFragments())
4313       OutputRanges.emplace_back(FF.getAddress(),
4314                                 FF.getAddress() + FF.getImageSize());
4315   }
4316 
4317   if (isSimple())
4318     return OutputRanges;
4319 
4320   for (BinaryFunction *Frag : Fragments) {
4321     assert(!Frag->isSimple() &&
4322            "fragment of non-simple function should also be non-simple");
4323     OutputRanges.emplace_back(Frag->getOutputAddress(),
4324                               Frag->getOutputAddress() + Frag->getOutputSize());
4325   }
4326 
4327   return OutputRanges;
4328 }
4329 
4330 uint64_t BinaryFunction::translateInputToOutputAddress(uint64_t Address) const {
4331   if (isFolded())
4332     return 0;
4333 
4334   // If the function hasn't changed return the same address.
4335   if (!isEmitted())
4336     return Address;
4337 
4338   if (Address < getAddress())
4339     return 0;
4340 
4341   // Check if the address is associated with an instruction that is tracked
4342   // by address translation.
4343   if (auto OutputAddress = BC.getIOAddressMap().lookup(Address))
4344     return *OutputAddress;
4345 
4346   // FIXME: #18950828 - we rely on relative offsets inside basic blocks to stay
4347   //        intact. Instead we can use pseudo instructions and/or annotations.
4348   const uint64_t Offset = Address - getAddress();
4349   const BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
4350   if (!BB) {
4351     // Special case for address immediately past the end of the function.
4352     if (Offset == getSize())
4353       return getOutputAddress() + getOutputSize();
4354 
4355     return 0;
4356   }
4357 
4358   return std::min(BB->getOutputAddressRange().first + Offset - BB->getOffset(),
4359                   BB->getOutputAddressRange().second);
4360 }
4361 
4362 DebugAddressRangesVector
4363 BinaryFunction::translateInputToOutputRange(DebugAddressRange InRange) const {
4364   DebugAddressRangesVector OutRanges;
4365 
4366   // The function was removed from the output. Return an empty range.
4367   if (isFolded())
4368     return OutRanges;
4369 
4370   // If the function hasn't changed return the same range.
4371   if (!isEmitted()) {
4372     OutRanges.emplace_back(InRange);
4373     return OutRanges;
4374   }
4375 
4376   if (!containsAddress(InRange.LowPC))
4377     return OutRanges;
4378 
4379   // Special case of an empty range [X, X). Some tools expect X to be updated.
4380   if (InRange.LowPC == InRange.HighPC) {
4381     if (uint64_t NewPC = translateInputToOutputAddress(InRange.LowPC))
4382       OutRanges.push_back(DebugAddressRange{NewPC, NewPC});
4383     return OutRanges;
4384   }
4385 
4386   uint64_t InputOffset = InRange.LowPC - getAddress();
4387   const uint64_t InputEndOffset =
4388       std::min(InRange.HighPC - getAddress(), getSize());
4389 
4390   auto BBI = llvm::upper_bound(BasicBlockOffsets,
4391                                BasicBlockOffset(InputOffset, nullptr),
4392                                CompareBasicBlockOffsets());
4393   assert(BBI != BasicBlockOffsets.begin());
4394 
4395   // Iterate over blocks in the input order using BasicBlockOffsets.
4396   for (--BBI; InputOffset < InputEndOffset && BBI != BasicBlockOffsets.end();
4397        InputOffset = BBI->second->getEndOffset(), ++BBI) {
4398     const BinaryBasicBlock &BB = *BBI->second;
4399     if (InputOffset < BB.getOffset() || InputOffset >= BB.getEndOffset()) {
4400       LLVM_DEBUG(
4401           dbgs() << "BOLT-DEBUG: invalid debug address range detected for "
4402                  << *this << " : [0x" << Twine::utohexstr(InRange.LowPC)
4403                  << ", 0x" << Twine::utohexstr(InRange.HighPC) << "]\n");
4404       break;
4405     }
4406 
4407     // Skip the block if it wasn't emitted.
4408     if (!BB.getOutputAddressRange().first)
4409       continue;
4410 
4411     // Find output address for an instruction with an offset greater or equal
4412     // to /p Offset. The output address should fall within the same basic
4413     // block boundaries.
4414     auto translateBlockOffset = [&](const uint64_t Offset) {
4415       const uint64_t OutAddress = BB.getOutputAddressRange().first + Offset;
4416       return std::min(OutAddress, BB.getOutputAddressRange().second);
4417     };
4418 
4419     uint64_t OutLowPC = BB.getOutputAddressRange().first;
4420     if (InputOffset > BB.getOffset())
4421       OutLowPC = translateBlockOffset(InputOffset - BB.getOffset());
4422 
4423     uint64_t OutHighPC = BB.getOutputAddressRange().second;
4424     if (InputEndOffset < BB.getEndOffset()) {
4425       assert(InputEndOffset >= BB.getOffset());
4426       OutHighPC = translateBlockOffset(InputEndOffset - BB.getOffset());
4427     }
4428 
4429     // Check if we can expand the last translated range.
4430     if (!OutRanges.empty() && OutRanges.back().HighPC == OutLowPC)
4431       OutRanges.back().HighPC = std::max(OutRanges.back().HighPC, OutHighPC);
4432     else
4433       OutRanges.emplace_back(OutLowPC, std::max(OutLowPC, OutHighPC));
4434   }
4435 
4436   LLVM_DEBUG({
4437     dbgs() << "BOLT-DEBUG: translated address range " << InRange << " -> ";
4438     for (const DebugAddressRange &R : OutRanges)
4439       dbgs() << R << ' ';
4440     dbgs() << '\n';
4441   });
4442 
4443   return OutRanges;
4444 }
4445 
4446 MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) {
4447   if (CurrentState == State::Disassembled) {
4448     auto II = Instructions.find(Offset);
4449     return (II == Instructions.end()) ? nullptr : &II->second;
4450   } else if (CurrentState == State::CFG) {
4451     BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
4452     if (!BB)
4453       return nullptr;
4454 
4455     for (MCInst &Inst : *BB) {
4456       constexpr uint32_t InvalidOffset = std::numeric_limits<uint32_t>::max();
4457       if (Offset == BC.MIB->getOffsetWithDefault(Inst, InvalidOffset))
4458         return &Inst;
4459     }
4460 
4461     if (MCInst *LastInstr = BB->getLastNonPseudoInstr()) {
4462       if (std::optional<uint32_t> Size = BC.MIB->getSize(*LastInstr)) {
4463         if (BB->getEndOffset() - Offset == Size) {
4464           return LastInstr;
4465         }
4466       }
4467     }
4468 
4469     return nullptr;
4470   } else {
4471     llvm_unreachable("invalid CFG state to use getInstructionAtOffset()");
4472   }
4473 }
4474 
4475 MCInst *BinaryFunction::getInstructionContainingOffset(uint64_t Offset) {
4476   assert(CurrentState == State::Disassembled && "Wrong function state");
4477 
4478   if (Offset > Size)
4479     return nullptr;
4480 
4481   auto II = Instructions.upper_bound(Offset);
4482   assert(II != Instructions.begin() && "First instruction not at offset 0");
4483   --II;
4484   return &II->second;
4485 }
4486 
4487 void BinaryFunction::printLoopInfo(raw_ostream &OS) const {
4488   if (!opts::shouldPrint(*this))
4489     return;
4490 
4491   OS << "Loop Info for Function \"" << *this << "\"";
4492   if (hasValidProfile())
4493     OS << " (count: " << getExecutionCount() << ")";
4494   OS << "\n";
4495 
4496   std::stack<BinaryLoop *> St;
4497   for (BinaryLoop *L : *BLI)
4498     St.push(L);
4499   while (!St.empty()) {
4500     BinaryLoop *L = St.top();
4501     St.pop();
4502 
4503     for (BinaryLoop *Inner : *L)
4504       St.push(Inner);
4505 
4506     if (!hasValidProfile())
4507       continue;
4508 
4509     OS << (L->getLoopDepth() > 1 ? "Nested" : "Outer")
4510        << " loop header: " << L->getHeader()->getName();
4511     OS << "\n";
4512     OS << "Loop basic blocks: ";
4513     ListSeparator LS;
4514     for (BinaryBasicBlock *BB : L->blocks())
4515       OS << LS << BB->getName();
4516     OS << "\n";
4517     if (hasValidProfile()) {
4518       OS << "Total back edge count: " << L->TotalBackEdgeCount << "\n";
4519       OS << "Loop entry count: " << L->EntryCount << "\n";
4520       OS << "Loop exit count: " << L->ExitCount << "\n";
4521       if (L->EntryCount > 0) {
4522         OS << "Average iters per entry: "
4523            << format("%.4lf", (double)L->TotalBackEdgeCount / L->EntryCount)
4524            << "\n";
4525       }
4526     }
4527     OS << "----\n";
4528   }
4529 
4530   OS << "Total number of loops: " << BLI->TotalLoops << "\n";
4531   OS << "Number of outer loops: " << BLI->OuterLoops << "\n";
4532   OS << "Maximum nested loop depth: " << BLI->MaximumDepth << "\n\n";
4533 }
4534 
4535 bool BinaryFunction::isAArch64Veneer() const {
4536   if (empty() || hasIslandsInfo())
4537     return false;
4538 
4539   BinaryBasicBlock &BB = **BasicBlocks.begin();
4540   for (MCInst &Inst : BB)
4541     if (!BC.MIB->hasAnnotation(Inst, "AArch64Veneer"))
4542       return false;
4543 
4544   for (auto I = BasicBlocks.begin() + 1, E = BasicBlocks.end(); I != E; ++I) {
4545     for (MCInst &Inst : **I)
4546       if (!BC.MIB->isNoop(Inst))
4547         return false;
4548   }
4549 
4550   return true;
4551 }
4552 
4553 void BinaryFunction::addRelocation(uint64_t Address, MCSymbol *Symbol,
4554                                    uint64_t RelType, uint64_t Addend,
4555                                    uint64_t Value) {
4556   assert(Address >= getAddress() && Address < getAddress() + getMaxSize() &&
4557          "address is outside of the function");
4558   uint64_t Offset = Address - getAddress();
4559   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addRelocation in "
4560                     << formatv("{0}@{1:x} against {2}\n", *this, Offset,
4561                                (Symbol ? Symbol->getName() : "<undef>")));
4562   bool IsCI = BC.isAArch64() && isInConstantIsland(Address);
4563   std::map<uint64_t, Relocation> &Rels =
4564       IsCI ? Islands->Relocations : Relocations;
4565   if (BC.MIB->shouldRecordCodeRelocation(RelType))
4566     Rels[Offset] = Relocation{Offset, Symbol, RelType, Addend, Value};
4567 }
4568 
4569 } // namespace bolt
4570 } // namespace llvm
4571