xref: /llvm-project/bolt/lib/Core/BinaryFunction.cpp (revision 5d8247d4c7c40d3f83fdffd1e337ee8cae7841d1)
1 //===- bolt/Core/BinaryFunction.cpp - Low-level function ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryFunction class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryFunction.h"
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryDomTree.h"
16 #include "bolt/Core/DynoStats.h"
17 #include "bolt/Core/MCPlusBuilder.h"
18 #include "bolt/Utils/NameResolver.h"
19 #include "bolt/Utils/NameShortener.h"
20 #include "bolt/Utils/Utils.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/ADT/edit_distance.h"
25 #include "llvm/Demangle/Demangle.h"
26 #include "llvm/MC/MCAsmInfo.h"
27 #include "llvm/MC/MCAsmLayout.h"
28 #include "llvm/MC/MCContext.h"
29 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/MC/MCInst.h"
32 #include "llvm/MC/MCInstPrinter.h"
33 #include "llvm/MC/MCRegisterInfo.h"
34 #include "llvm/Object/ObjectFile.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/GraphWriter.h"
38 #include "llvm/Support/LEB128.h"
39 #include "llvm/Support/Regex.h"
40 #include "llvm/Support/Timer.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include <functional>
43 #include <limits>
44 #include <numeric>
45 #include <string>
46 
47 #define DEBUG_TYPE "bolt"
48 
49 using namespace llvm;
50 using namespace bolt;
51 
52 namespace opts {
53 
54 extern cl::OptionCategory BoltCategory;
55 extern cl::OptionCategory BoltOptCategory;
56 extern cl::OptionCategory BoltRelocCategory;
57 
58 extern cl::opt<bool> EnableBAT;
59 extern cl::opt<bool> Instrument;
60 extern cl::opt<bool> StrictMode;
61 extern cl::opt<bool> UpdateDebugSections;
62 extern cl::opt<unsigned> Verbosity;
63 
64 extern bool processAllFunctions();
65 
66 cl::opt<bool>
67 CheckEncoding("check-encoding",
68   cl::desc("perform verification of LLVM instruction encoding/decoding. "
69            "Every instruction in the input is decoded and re-encoded. "
70            "If the resulting bytes do not match the input, a warning message "
71            "is printed."),
72   cl::init(false),
73   cl::ZeroOrMore,
74   cl::Hidden,
75   cl::cat(BoltCategory));
76 
77 static cl::opt<bool>
78 DotToolTipCode("dot-tooltip-code",
79   cl::desc("add basic block instructions as tool tips on nodes"),
80   cl::ZeroOrMore,
81   cl::Hidden,
82   cl::cat(BoltCategory));
83 
84 cl::opt<JumpTableSupportLevel>
85 JumpTables("jump-tables",
86   cl::desc("jump tables support (default=basic)"),
87   cl::init(JTS_BASIC),
88   cl::values(
89       clEnumValN(JTS_NONE, "none",
90                  "do not optimize functions with jump tables"),
91       clEnumValN(JTS_BASIC, "basic",
92                  "optimize functions with jump tables"),
93       clEnumValN(JTS_MOVE, "move",
94                  "move jump tables to a separate section"),
95       clEnumValN(JTS_SPLIT, "split",
96                  "split jump tables section into hot and cold based on "
97                  "function execution frequency"),
98       clEnumValN(JTS_AGGRESSIVE, "aggressive",
99                  "aggressively split jump tables section based on usage "
100                  "of the tables")),
101   cl::ZeroOrMore,
102   cl::cat(BoltOptCategory));
103 
104 static cl::opt<bool>
105 NoScan("no-scan",
106   cl::desc("do not scan cold functions for external references (may result in "
107            "slower binary)"),
108   cl::init(false),
109   cl::ZeroOrMore,
110   cl::Hidden,
111   cl::cat(BoltOptCategory));
112 
113 cl::opt<bool>
114 PreserveBlocksAlignment("preserve-blocks-alignment",
115   cl::desc("try to preserve basic block alignment"),
116   cl::init(false),
117   cl::ZeroOrMore,
118   cl::cat(BoltOptCategory));
119 
120 cl::opt<bool>
121 PrintDynoStats("dyno-stats",
122   cl::desc("print execution info based on profile"),
123   cl::cat(BoltCategory));
124 
125 static cl::opt<bool>
126 PrintDynoStatsOnly("print-dyno-stats-only",
127   cl::desc("while printing functions output dyno-stats and skip instructions"),
128   cl::init(false),
129   cl::Hidden,
130   cl::cat(BoltCategory));
131 
132 static cl::list<std::string>
133 PrintOnly("print-only",
134   cl::CommaSeparated,
135   cl::desc("list of functions to print"),
136   cl::value_desc("func1,func2,func3,..."),
137   cl::Hidden,
138   cl::cat(BoltCategory));
139 
140 cl::opt<bool>
141 TimeBuild("time-build",
142   cl::desc("print time spent constructing binary functions"),
143   cl::ZeroOrMore,
144   cl::Hidden,
145   cl::cat(BoltCategory));
146 
147 cl::opt<bool>
148 TrapOnAVX512("trap-avx512",
149   cl::desc("in relocation mode trap upon entry to any function that uses "
150             "AVX-512 instructions"),
151   cl::init(false),
152   cl::ZeroOrMore,
153   cl::Hidden,
154   cl::cat(BoltCategory));
155 
156 bool shouldPrint(const BinaryFunction &Function) {
157   if (Function.isIgnored())
158     return false;
159 
160   if (PrintOnly.empty())
161     return true;
162 
163   for (std::string &Name : opts::PrintOnly) {
164     if (Function.hasNameRegex(Name)) {
165       return true;
166     }
167   }
168 
169   return false;
170 }
171 
172 } // namespace opts
173 
174 namespace llvm {
175 namespace bolt {
176 
177 constexpr unsigned BinaryFunction::MinAlign;
178 
179 namespace {
180 
181 template <typename R> bool emptyRange(const R &Range) {
182   return Range.begin() == Range.end();
183 }
184 
185 /// Gets debug line information for the instruction located at the given
186 /// address in the original binary. The SMLoc's pointer is used
187 /// to point to this information, which is represented by a
188 /// DebugLineTableRowRef. The returned pointer is null if no debug line
189 /// information for this instruction was found.
190 SMLoc findDebugLineInformationForInstructionAt(
191     uint64_t Address, DWARFUnit *Unit,
192     const DWARFDebugLine::LineTable *LineTable) {
193   // We use the pointer in SMLoc to store an instance of DebugLineTableRowRef,
194   // which occupies 64 bits. Thus, we can only proceed if the struct fits into
195   // the pointer itself.
196   assert(sizeof(decltype(SMLoc().getPointer())) >=
197              sizeof(DebugLineTableRowRef) &&
198          "Cannot fit instruction debug line information into SMLoc's pointer");
199 
200   SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc();
201   uint32_t RowIndex = LineTable->lookupAddress(
202       {Address, object::SectionedAddress::UndefSection});
203   if (RowIndex == LineTable->UnknownRowIndex)
204     return NullResult;
205 
206   assert(RowIndex < LineTable->Rows.size() &&
207          "Line Table lookup returned invalid index.");
208 
209   decltype(SMLoc().getPointer()) Ptr;
210   DebugLineTableRowRef *InstructionLocation =
211       reinterpret_cast<DebugLineTableRowRef *>(&Ptr);
212 
213   InstructionLocation->DwCompileUnitIndex = Unit->getOffset();
214   InstructionLocation->RowIndex = RowIndex + 1;
215 
216   return SMLoc::getFromPointer(Ptr);
217 }
218 
219 std::string buildSectionName(StringRef Prefix, StringRef Name,
220                              const BinaryContext &BC) {
221   if (BC.isELF())
222     return (Prefix + Name).str();
223   static NameShortener NS;
224   return (Prefix + Twine(NS.getID(Name))).str();
225 }
226 
227 raw_ostream &operator<<(raw_ostream &OS, const BinaryFunction::State State) {
228   switch (State) {
229   case BinaryFunction::State::Empty:         OS << "empty"; break;
230   case BinaryFunction::State::Disassembled:  OS << "disassembled"; break;
231   case BinaryFunction::State::CFG:           OS << "CFG constructed"; break;
232   case BinaryFunction::State::CFG_Finalized: OS << "CFG finalized"; break;
233   case BinaryFunction::State::EmittedCFG:    OS << "emitted with CFG"; break;
234   case BinaryFunction::State::Emitted:       OS << "emitted"; break;
235   }
236 
237   return OS;
238 }
239 
240 } // namespace
241 
242 std::string BinaryFunction::buildCodeSectionName(StringRef Name,
243                                                  const BinaryContext &BC) {
244   return buildSectionName(BC.isELF() ? ".local.text." : ".l.text.", Name, BC);
245 }
246 
247 std::string BinaryFunction::buildColdCodeSectionName(StringRef Name,
248                                                      const BinaryContext &BC) {
249   return buildSectionName(BC.isELF() ? ".local.cold.text." : ".l.c.text.", Name,
250                           BC);
251 }
252 
253 uint64_t BinaryFunction::Count = 0;
254 
255 Optional<StringRef> BinaryFunction::hasNameRegex(const StringRef Name) const {
256   const std::string RegexName = (Twine("^") + StringRef(Name) + "$").str();
257   Regex MatchName(RegexName);
258   Optional<StringRef> Match = forEachName(
259       [&MatchName](StringRef Name) { return MatchName.match(Name); });
260 
261   return Match;
262 }
263 
264 Optional<StringRef>
265 BinaryFunction::hasRestoredNameRegex(const StringRef Name) const {
266   const std::string RegexName = (Twine("^") + StringRef(Name) + "$").str();
267   Regex MatchName(RegexName);
268   Optional<StringRef> Match = forEachName([&MatchName](StringRef Name) {
269     return MatchName.match(NameResolver::restore(Name));
270   });
271 
272   return Match;
273 }
274 
275 std::string BinaryFunction::getDemangledName() const {
276   StringRef MangledName = NameResolver::restore(getOneName());
277   return demangle(MangledName.str());
278 }
279 
280 BinaryBasicBlock *
281 BinaryFunction::getBasicBlockContainingOffset(uint64_t Offset) {
282   if (Offset > Size)
283     return nullptr;
284 
285   if (BasicBlockOffsets.empty())
286     return nullptr;
287 
288   /*
289    * This is commented out because it makes BOLT too slow.
290    * assert(std::is_sorted(BasicBlockOffsets.begin(),
291    *                       BasicBlockOffsets.end(),
292    *                       CompareBasicBlockOffsets())));
293    */
294   auto I = std::upper_bound(BasicBlockOffsets.begin(), BasicBlockOffsets.end(),
295                             BasicBlockOffset(Offset, nullptr),
296                             CompareBasicBlockOffsets());
297   assert(I != BasicBlockOffsets.begin() && "first basic block not at offset 0");
298   --I;
299   BinaryBasicBlock *BB = I->second;
300   return (Offset < BB->getOffset() + BB->getOriginalSize()) ? BB : nullptr;
301 }
302 
303 void BinaryFunction::markUnreachableBlocks() {
304   std::stack<BinaryBasicBlock *> Stack;
305 
306   for (BinaryBasicBlock *BB : layout())
307     BB->markValid(false);
308 
309   // Add all entries and landing pads as roots.
310   for (BinaryBasicBlock *BB : BasicBlocks) {
311     if (isEntryPoint(*BB) || BB->isLandingPad()) {
312       Stack.push(BB);
313       BB->markValid(true);
314       continue;
315     }
316     // FIXME:
317     // Also mark BBs with indirect jumps as reachable, since we do not
318     // support removing unused jump tables yet (GH-issue20).
319     for (const MCInst &Inst : *BB) {
320       if (BC.MIB->getJumpTable(Inst)) {
321         Stack.push(BB);
322         BB->markValid(true);
323         break;
324       }
325     }
326   }
327 
328   // Determine reachable BBs from the entry point
329   while (!Stack.empty()) {
330     BinaryBasicBlock *BB = Stack.top();
331     Stack.pop();
332     for (BinaryBasicBlock *Succ : BB->successors()) {
333       if (Succ->isValid())
334         continue;
335       Succ->markValid(true);
336       Stack.push(Succ);
337     }
338   }
339 }
340 
341 // Any unnecessary fallthrough jumps revealed after calling eraseInvalidBBs
342 // will be cleaned up by fixBranches().
343 std::pair<unsigned, uint64_t> BinaryFunction::eraseInvalidBBs() {
344   BasicBlockOrderType NewLayout;
345   unsigned Count = 0;
346   uint64_t Bytes = 0;
347   for (BinaryBasicBlock *BB : layout()) {
348     if (BB->isValid()) {
349       NewLayout.push_back(BB);
350     } else {
351       assert(!isEntryPoint(*BB) && "all entry blocks must be valid");
352       ++Count;
353       Bytes += BC.computeCodeSize(BB->begin(), BB->end());
354     }
355   }
356   BasicBlocksLayout = std::move(NewLayout);
357 
358   BasicBlockListType NewBasicBlocks;
359   for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) {
360     BinaryBasicBlock *BB = *I;
361     if (BB->isValid()) {
362       NewBasicBlocks.push_back(BB);
363     } else {
364       // Make sure the block is removed from the list of predecessors.
365       BB->removeAllSuccessors();
366       DeletedBasicBlocks.push_back(BB);
367     }
368   }
369   BasicBlocks = std::move(NewBasicBlocks);
370 
371   assert(BasicBlocks.size() == BasicBlocksLayout.size());
372 
373   // Update CFG state if needed
374   if (Count > 0)
375     recomputeLandingPads();
376 
377   return std::make_pair(Count, Bytes);
378 }
379 
380 bool BinaryFunction::isForwardCall(const MCSymbol *CalleeSymbol) const {
381   // This function should work properly before and after function reordering.
382   // In order to accomplish this, we use the function index (if it is valid).
383   // If the function indices are not valid, we fall back to the original
384   // addresses.  This should be ok because the functions without valid indices
385   // should have been ordered with a stable sort.
386   const BinaryFunction *CalleeBF = BC.getFunctionForSymbol(CalleeSymbol);
387   if (CalleeBF) {
388     if (CalleeBF->isInjected())
389       return true;
390 
391     if (hasValidIndex() && CalleeBF->hasValidIndex()) {
392       return getIndex() < CalleeBF->getIndex();
393     } else if (hasValidIndex() && !CalleeBF->hasValidIndex()) {
394       return true;
395     } else if (!hasValidIndex() && CalleeBF->hasValidIndex()) {
396       return false;
397     } else {
398       return getAddress() < CalleeBF->getAddress();
399     }
400   } else {
401     // Absolute symbol.
402     ErrorOr<uint64_t> CalleeAddressOrError = BC.getSymbolValue(*CalleeSymbol);
403     assert(CalleeAddressOrError && "unregistered symbol found");
404     return *CalleeAddressOrError > getAddress();
405   }
406 }
407 
408 void BinaryFunction::dump(bool PrintInstructions) const {
409   print(dbgs(), "", PrintInstructions);
410 }
411 
412 void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
413                            bool PrintInstructions) const {
414   if (!opts::shouldPrint(*this))
415     return;
416 
417   StringRef SectionName =
418       OriginSection ? OriginSection->getName() : "<no origin section>";
419   OS << "Binary Function \"" << *this << "\" " << Annotation << " {";
420   std::vector<StringRef> AllNames = getNames();
421   if (AllNames.size() > 1) {
422     OS << "\n  All names   : ";
423     const char *Sep = "";
424     for (const StringRef &Name : AllNames) {
425       OS << Sep << Name;
426       Sep = "\n                ";
427     }
428   }
429   OS << "\n  Number      : "   << FunctionNumber
430      << "\n  State       : "   << CurrentState
431      << "\n  Address     : 0x" << Twine::utohexstr(Address)
432      << "\n  Size        : 0x" << Twine::utohexstr(Size)
433      << "\n  MaxSize     : 0x" << Twine::utohexstr(MaxSize)
434      << "\n  Offset      : 0x" << Twine::utohexstr(FileOffset)
435      << "\n  Section     : "   << SectionName
436      << "\n  Orc Section : "   << getCodeSectionName()
437      << "\n  LSDA        : 0x" << Twine::utohexstr(getLSDAAddress())
438      << "\n  IsSimple    : "   << IsSimple
439      << "\n  IsMultiEntry: "   << isMultiEntry()
440      << "\n  IsSplit     : "   << isSplit()
441      << "\n  BB Count    : "   << size();
442 
443   if (HasFixedIndirectBranch)
444     OS << "\n  HasFixedIndirectBranch : true";
445   if (HasUnknownControlFlow)
446     OS << "\n  Unknown CF  : true";
447   if (getPersonalityFunction())
448     OS << "\n  Personality : " << getPersonalityFunction()->getName();
449   if (IsFragment)
450     OS << "\n  IsFragment  : true";
451   if (isFolded())
452     OS << "\n  FoldedInto  : " << *getFoldedIntoFunction();
453   for (BinaryFunction *ParentFragment : ParentFragments)
454     OS << "\n  Parent      : " << *ParentFragment;
455   if (!Fragments.empty()) {
456     OS << "\n  Fragments   : ";
457     const char *Sep = "";
458     for (BinaryFunction *Frag : Fragments) {
459       OS << Sep << *Frag;
460       Sep = ", ";
461     }
462   }
463   if (hasCFG())
464     OS << "\n  Hash        : " << Twine::utohexstr(computeHash());
465   if (isMultiEntry()) {
466     OS << "\n  Secondary Entry Points : ";
467     const char *Sep = "";
468     for (const auto &KV : SecondaryEntryPoints) {
469       OS << Sep << KV.second->getName();
470       Sep = ", ";
471     }
472   }
473   if (FrameInstructions.size())
474     OS << "\n  CFI Instrs  : " << FrameInstructions.size();
475   if (BasicBlocksLayout.size()) {
476     OS << "\n  BB Layout   : ";
477     const char *Sep = "";
478     for (BinaryBasicBlock *BB : BasicBlocksLayout) {
479       OS << Sep << BB->getName();
480       Sep = ", ";
481     }
482   }
483   if (ImageAddress)
484     OS << "\n  Image       : 0x" << Twine::utohexstr(ImageAddress);
485   if (ExecutionCount != COUNT_NO_PROFILE) {
486     OS << "\n  Exec Count  : " << ExecutionCount;
487     OS << "\n  Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
488   }
489 
490   if (opts::PrintDynoStats && !BasicBlocksLayout.empty()) {
491     OS << '\n';
492     DynoStats dynoStats = getDynoStats(*this);
493     OS << dynoStats;
494   }
495 
496   OS << "\n}\n";
497 
498   if (opts::PrintDynoStatsOnly || !PrintInstructions || !BC.InstPrinter)
499     return;
500 
501   // Offset of the instruction in function.
502   uint64_t Offset = 0;
503 
504   if (BasicBlocks.empty() && !Instructions.empty()) {
505     // Print before CFG was built.
506     for (const std::pair<const uint32_t, MCInst> &II : Instructions) {
507       Offset = II.first;
508 
509       // Print label if exists at this offset.
510       auto LI = Labels.find(Offset);
511       if (LI != Labels.end()) {
512         if (const MCSymbol *EntrySymbol =
513                 getSecondaryEntryPointSymbol(LI->second))
514           OS << EntrySymbol->getName() << " (Entry Point):\n";
515         OS << LI->second->getName() << ":\n";
516       }
517 
518       BC.printInstruction(OS, II.second, Offset, this);
519     }
520   }
521 
522   for (uint32_t I = 0, E = BasicBlocksLayout.size(); I != E; ++I) {
523     BinaryBasicBlock *BB = BasicBlocksLayout[I];
524     if (I != 0 && BB->isCold() != BasicBlocksLayout[I - 1]->isCold())
525       OS << "-------   HOT-COLD SPLIT POINT   -------\n\n";
526 
527     OS << BB->getName() << " (" << BB->size()
528        << " instructions, align : " << BB->getAlignment() << ")\n";
529 
530     if (isEntryPoint(*BB)) {
531       if (MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB))
532         OS << "  Secondary Entry Point: " << EntrySymbol->getName() << '\n';
533       else
534         OS << "  Entry Point\n";
535     }
536 
537     if (BB->isLandingPad())
538       OS << "  Landing Pad\n";
539 
540     uint64_t BBExecCount = BB->getExecutionCount();
541     if (hasValidProfile()) {
542       OS << "  Exec Count : ";
543       if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE)
544         OS << BBExecCount << '\n';
545       else
546         OS << "<unknown>\n";
547     }
548     if (BB->getCFIState() >= 0)
549       OS << "  CFI State : " << BB->getCFIState() << '\n';
550     if (opts::EnableBAT) {
551       OS << "  Input offset: " << Twine::utohexstr(BB->getInputOffset())
552          << "\n";
553     }
554     if (!BB->pred_empty()) {
555       OS << "  Predecessors: ";
556       const char *Sep = "";
557       for (BinaryBasicBlock *Pred : BB->predecessors()) {
558         OS << Sep << Pred->getName();
559         Sep = ", ";
560       }
561       OS << '\n';
562     }
563     if (!BB->throw_empty()) {
564       OS << "  Throwers: ";
565       const char *Sep = "";
566       for (BinaryBasicBlock *Throw : BB->throwers()) {
567         OS << Sep << Throw->getName();
568         Sep = ", ";
569       }
570       OS << '\n';
571     }
572 
573     Offset = alignTo(Offset, BB->getAlignment());
574 
575     // Note: offsets are imprecise since this is happening prior to relaxation.
576     Offset = BC.printInstructions(OS, BB->begin(), BB->end(), Offset, this);
577 
578     if (!BB->succ_empty()) {
579       OS << "  Successors: ";
580       // For more than 2 successors, sort them based on frequency.
581       std::vector<uint64_t> Indices(BB->succ_size());
582       std::iota(Indices.begin(), Indices.end(), 0);
583       if (BB->succ_size() > 2 && BB->getKnownExecutionCount()) {
584         std::stable_sort(Indices.begin(), Indices.end(),
585                          [&](const uint64_t A, const uint64_t B) {
586                            return BB->BranchInfo[B] < BB->BranchInfo[A];
587                          });
588       }
589       const char *Sep = "";
590       for (unsigned I = 0; I < Indices.size(); ++I) {
591         BinaryBasicBlock *Succ = BB->Successors[Indices[I]];
592         BinaryBasicBlock::BinaryBranchInfo &BI = BB->BranchInfo[Indices[I]];
593         OS << Sep << Succ->getName();
594         if (ExecutionCount != COUNT_NO_PROFILE &&
595             BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
596           OS << " (mispreds: " << BI.MispredictedCount
597              << ", count: " << BI.Count << ")";
598         } else if (ExecutionCount != COUNT_NO_PROFILE &&
599                    BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
600           OS << " (inferred count: " << BI.Count << ")";
601         }
602         Sep = ", ";
603       }
604       OS << '\n';
605     }
606 
607     if (!BB->lp_empty()) {
608       OS << "  Landing Pads: ";
609       const char *Sep = "";
610       for (BinaryBasicBlock *LP : BB->landing_pads()) {
611         OS << Sep << LP->getName();
612         if (ExecutionCount != COUNT_NO_PROFILE) {
613           OS << " (count: " << LP->getExecutionCount() << ")";
614         }
615         Sep = ", ";
616       }
617       OS << '\n';
618     }
619 
620     // In CFG_Finalized state we can miscalculate CFI state at exit.
621     if (CurrentState == State::CFG) {
622       const int32_t CFIStateAtExit = BB->getCFIStateAtExit();
623       if (CFIStateAtExit >= 0)
624         OS << "  CFI State: " << CFIStateAtExit << '\n';
625     }
626 
627     OS << '\n';
628   }
629 
630   // Dump new exception ranges for the function.
631   if (!CallSites.empty()) {
632     OS << "EH table:\n";
633     for (const CallSite &CSI : CallSites) {
634       OS << "  [" << *CSI.Start << ", " << *CSI.End << ") landing pad : ";
635       if (CSI.LP)
636         OS << *CSI.LP;
637       else
638         OS << "0";
639       OS << ", action : " << CSI.Action << '\n';
640     }
641     OS << '\n';
642   }
643 
644   // Print all jump tables.
645   for (const std::pair<const uint64_t, JumpTable *> &JTI : JumpTables)
646     JTI.second->print(OS);
647 
648   OS << "DWARF CFI Instructions:\n";
649   if (OffsetToCFI.size()) {
650     // Pre-buildCFG information
651     for (const std::pair<const uint32_t, uint32_t> &Elmt : OffsetToCFI) {
652       OS << format("    %08x:\t", Elmt.first);
653       assert(Elmt.second < FrameInstructions.size() && "Incorrect CFI offset");
654       BinaryContext::printCFI(OS, FrameInstructions[Elmt.second]);
655       OS << "\n";
656     }
657   } else {
658     // Post-buildCFG information
659     for (uint32_t I = 0, E = FrameInstructions.size(); I != E; ++I) {
660       const MCCFIInstruction &CFI = FrameInstructions[I];
661       OS << format("    %d:\t", I);
662       BinaryContext::printCFI(OS, CFI);
663       OS << "\n";
664     }
665   }
666   if (FrameInstructions.empty())
667     OS << "    <empty>\n";
668 
669   OS << "End of Function \"" << *this << "\"\n\n";
670 }
671 
672 void BinaryFunction::printRelocations(raw_ostream &OS, uint64_t Offset,
673                                       uint64_t Size) const {
674   const char *Sep = " # Relocs: ";
675 
676   auto RI = Relocations.lower_bound(Offset);
677   while (RI != Relocations.end() && RI->first < Offset + Size) {
678     OS << Sep << "(R: " << RI->second << ")";
679     Sep = ", ";
680     ++RI;
681   }
682 }
683 
684 namespace {
685 std::string mutateDWARFExpressionTargetReg(const MCCFIInstruction &Instr,
686                                            MCPhysReg NewReg) {
687   StringRef ExprBytes = Instr.getValues();
688   assert(ExprBytes.size() > 1 && "DWARF expression CFI is too short");
689   uint8_t Opcode = ExprBytes[0];
690   assert((Opcode == dwarf::DW_CFA_expression ||
691           Opcode == dwarf::DW_CFA_val_expression) &&
692          "invalid DWARF expression CFI");
693   (void)Opcode;
694   const uint8_t *const Start =
695       reinterpret_cast<const uint8_t *>(ExprBytes.drop_front(1).data());
696   const uint8_t *const End =
697       reinterpret_cast<const uint8_t *>(Start + ExprBytes.size() - 1);
698   unsigned Size = 0;
699   decodeULEB128(Start, &Size, End);
700   assert(Size > 0 && "Invalid reg encoding for DWARF expression CFI");
701   SmallString<8> Tmp;
702   raw_svector_ostream OSE(Tmp);
703   encodeULEB128(NewReg, OSE);
704   return Twine(ExprBytes.slice(0, 1))
705       .concat(OSE.str())
706       .concat(ExprBytes.drop_front(1 + Size))
707       .str();
708 }
709 } // namespace
710 
711 void BinaryFunction::mutateCFIRegisterFor(const MCInst &Instr,
712                                           MCPhysReg NewReg) {
713   const MCCFIInstruction *OldCFI = getCFIFor(Instr);
714   assert(OldCFI && "invalid CFI instr");
715   switch (OldCFI->getOperation()) {
716   default:
717     llvm_unreachable("Unexpected instruction");
718   case MCCFIInstruction::OpDefCfa:
719     setCFIFor(Instr, MCCFIInstruction::cfiDefCfa(nullptr, NewReg,
720                                                  OldCFI->getOffset()));
721     break;
722   case MCCFIInstruction::OpDefCfaRegister:
723     setCFIFor(Instr, MCCFIInstruction::createDefCfaRegister(nullptr, NewReg));
724     break;
725   case MCCFIInstruction::OpOffset:
726     setCFIFor(Instr, MCCFIInstruction::createOffset(nullptr, NewReg,
727                                                     OldCFI->getOffset()));
728     break;
729   case MCCFIInstruction::OpRegister:
730     setCFIFor(Instr, MCCFIInstruction::createRegister(nullptr, NewReg,
731                                                       OldCFI->getRegister2()));
732     break;
733   case MCCFIInstruction::OpSameValue:
734     setCFIFor(Instr, MCCFIInstruction::createSameValue(nullptr, NewReg));
735     break;
736   case MCCFIInstruction::OpEscape:
737     setCFIFor(Instr,
738               MCCFIInstruction::createEscape(
739                   nullptr,
740                   StringRef(mutateDWARFExpressionTargetReg(*OldCFI, NewReg))));
741     break;
742   case MCCFIInstruction::OpRestore:
743     setCFIFor(Instr, MCCFIInstruction::createRestore(nullptr, NewReg));
744     break;
745   case MCCFIInstruction::OpUndefined:
746     setCFIFor(Instr, MCCFIInstruction::createUndefined(nullptr, NewReg));
747     break;
748   }
749 }
750 
751 const MCCFIInstruction *BinaryFunction::mutateCFIOffsetFor(const MCInst &Instr,
752                                                            int64_t NewOffset) {
753   const MCCFIInstruction *OldCFI = getCFIFor(Instr);
754   assert(OldCFI && "invalid CFI instr");
755   switch (OldCFI->getOperation()) {
756   default:
757     llvm_unreachable("Unexpected instruction");
758   case MCCFIInstruction::OpDefCfaOffset:
759     setCFIFor(Instr, MCCFIInstruction::cfiDefCfaOffset(nullptr, NewOffset));
760     break;
761   case MCCFIInstruction::OpAdjustCfaOffset:
762     setCFIFor(Instr,
763               MCCFIInstruction::createAdjustCfaOffset(nullptr, NewOffset));
764     break;
765   case MCCFIInstruction::OpDefCfa:
766     setCFIFor(Instr, MCCFIInstruction::cfiDefCfa(nullptr, OldCFI->getRegister(),
767                                                  NewOffset));
768     break;
769   case MCCFIInstruction::OpOffset:
770     setCFIFor(Instr, MCCFIInstruction::createOffset(
771                          nullptr, OldCFI->getRegister(), NewOffset));
772     break;
773   }
774   return getCFIFor(Instr);
775 }
776 
777 IndirectBranchType
778 BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
779                                       uint64_t Offset,
780                                       uint64_t &TargetAddress) {
781   const unsigned PtrSize = BC.AsmInfo->getCodePointerSize();
782 
783   // The instruction referencing memory used by the branch instruction.
784   // It could be the branch instruction itself or one of the instructions
785   // setting the value of the register used by the branch.
786   MCInst *MemLocInstr;
787 
788   // Address of the table referenced by MemLocInstr. Could be either an
789   // array of function pointers, or a jump table.
790   uint64_t ArrayStart = 0;
791 
792   unsigned BaseRegNum, IndexRegNum;
793   int64_t DispValue;
794   const MCExpr *DispExpr;
795 
796   // In AArch, identify the instruction adding the PC-relative offset to
797   // jump table entries to correctly decode it.
798   MCInst *PCRelBaseInstr;
799   uint64_t PCRelAddr = 0;
800 
801   auto Begin = Instructions.begin();
802   if (BC.isAArch64()) {
803     PreserveNops = BC.HasRelocations;
804     // Start at the last label as an approximation of the current basic block.
805     // This is a heuristic, since the full set of labels have yet to be
806     // determined
807     for (auto LI = Labels.rbegin(); LI != Labels.rend(); ++LI) {
808       auto II = Instructions.find(LI->first);
809       if (II != Instructions.end()) {
810         Begin = II;
811         break;
812       }
813     }
814   }
815 
816   IndirectBranchType BranchType = BC.MIB->analyzeIndirectBranch(
817       Instruction, Begin, Instructions.end(), PtrSize, MemLocInstr, BaseRegNum,
818       IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
819 
820   if (BranchType == IndirectBranchType::UNKNOWN && !MemLocInstr)
821     return BranchType;
822 
823   if (MemLocInstr != &Instruction)
824     IndexRegNum = BC.MIB->getNoRegister();
825 
826   if (BC.isAArch64()) {
827     const MCSymbol *Sym = BC.MIB->getTargetSymbol(*PCRelBaseInstr, 1);
828     assert(Sym && "Symbol extraction failed");
829     ErrorOr<uint64_t> SymValueOrError = BC.getSymbolValue(*Sym);
830     if (SymValueOrError) {
831       PCRelAddr = *SymValueOrError;
832     } else {
833       for (std::pair<const uint32_t, MCSymbol *> &Elmt : Labels) {
834         if (Elmt.second == Sym) {
835           PCRelAddr = Elmt.first + getAddress();
836           break;
837         }
838       }
839     }
840     uint64_t InstrAddr = 0;
841     for (auto II = Instructions.rbegin(); II != Instructions.rend(); ++II) {
842       if (&II->second == PCRelBaseInstr) {
843         InstrAddr = II->first + getAddress();
844         break;
845       }
846     }
847     assert(InstrAddr != 0 && "instruction not found");
848     // We do this to avoid spurious references to code locations outside this
849     // function (for example, if the indirect jump lives in the last basic
850     // block of the function, it will create a reference to the next function).
851     // This replaces a symbol reference with an immediate.
852     BC.MIB->replaceMemOperandDisp(*PCRelBaseInstr,
853                                   MCOperand::createImm(PCRelAddr - InstrAddr));
854     // FIXME: Disable full jump table processing for AArch64 until we have a
855     // proper way of determining the jump table limits.
856     return IndirectBranchType::UNKNOWN;
857   }
858 
859   // RIP-relative addressing should be converted to symbol form by now
860   // in processed instructions (but not in jump).
861   if (DispExpr) {
862     const MCSymbol *TargetSym;
863     uint64_t TargetOffset;
864     std::tie(TargetSym, TargetOffset) = BC.MIB->getTargetSymbolInfo(DispExpr);
865     ErrorOr<uint64_t> SymValueOrError = BC.getSymbolValue(*TargetSym);
866     assert(SymValueOrError && "global symbol needs a value");
867     ArrayStart = *SymValueOrError + TargetOffset;
868     BaseRegNum = BC.MIB->getNoRegister();
869     if (BC.isAArch64()) {
870       ArrayStart &= ~0xFFFULL;
871       ArrayStart += DispValue & 0xFFFULL;
872     }
873   } else {
874     ArrayStart = static_cast<uint64_t>(DispValue);
875   }
876 
877   if (BaseRegNum == BC.MRI->getProgramCounter())
878     ArrayStart += getAddress() + Offset + Size;
879 
880   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addressed memory is 0x"
881                     << Twine::utohexstr(ArrayStart) << '\n');
882 
883   ErrorOr<BinarySection &> Section = BC.getSectionForAddress(ArrayStart);
884   if (!Section) {
885     // No section - possibly an absolute address. Since we don't allow
886     // internal function addresses to escape the function scope - we
887     // consider it a tail call.
888     if (opts::Verbosity >= 1) {
889       errs() << "BOLT-WARNING: no section for address 0x"
890              << Twine::utohexstr(ArrayStart) << " referenced from function "
891              << *this << '\n';
892     }
893     return IndirectBranchType::POSSIBLE_TAIL_CALL;
894   }
895   if (Section->isVirtual()) {
896     // The contents are filled at runtime.
897     return IndirectBranchType::POSSIBLE_TAIL_CALL;
898   }
899 
900   if (BranchType == IndirectBranchType::POSSIBLE_FIXED_BRANCH) {
901     ErrorOr<uint64_t> Value = BC.getPointerAtAddress(ArrayStart);
902     if (!Value)
903       return IndirectBranchType::UNKNOWN;
904 
905     if (!BC.getSectionForAddress(ArrayStart)->isReadOnly())
906       return IndirectBranchType::UNKNOWN;
907 
908     outs() << "BOLT-INFO: fixed indirect branch detected in " << *this
909            << " at 0x" << Twine::utohexstr(getAddress() + Offset)
910            << " referencing data at 0x" << Twine::utohexstr(ArrayStart)
911            << " the destination value is 0x" << Twine::utohexstr(*Value)
912            << '\n';
913 
914     TargetAddress = *Value;
915     return BranchType;
916   }
917 
918   // Check if there's already a jump table registered at this address.
919   MemoryContentsType MemType;
920   if (JumpTable *JT = BC.getJumpTableContainingAddress(ArrayStart)) {
921     switch (JT->Type) {
922     case JumpTable::JTT_NORMAL:
923       MemType = MemoryContentsType::POSSIBLE_JUMP_TABLE;
924       break;
925     case JumpTable::JTT_PIC:
926       MemType = MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
927       break;
928     }
929   } else {
930     MemType = BC.analyzeMemoryAt(ArrayStart, *this);
931   }
932 
933   // Check that jump table type in instruction pattern matches memory contents.
934   JumpTable::JumpTableType JTType;
935   if (BranchType == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
936     if (MemType != MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
937       return IndirectBranchType::UNKNOWN;
938     JTType = JumpTable::JTT_PIC;
939   } else {
940     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
941       return IndirectBranchType::UNKNOWN;
942 
943     if (MemType == MemoryContentsType::UNKNOWN)
944       return IndirectBranchType::POSSIBLE_TAIL_CALL;
945 
946     BranchType = IndirectBranchType::POSSIBLE_JUMP_TABLE;
947     JTType = JumpTable::JTT_NORMAL;
948   }
949 
950   // Convert the instruction into jump table branch.
951   const MCSymbol *JTLabel = BC.getOrCreateJumpTable(*this, ArrayStart, JTType);
952   BC.MIB->replaceMemOperandDisp(*MemLocInstr, JTLabel, BC.Ctx.get());
953   BC.MIB->setJumpTable(Instruction, ArrayStart, IndexRegNum);
954 
955   JTSites.emplace_back(Offset, ArrayStart);
956 
957   return BranchType;
958 }
959 
960 MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address,
961                                                 bool CreatePastEnd) {
962   const uint64_t Offset = Address - getAddress();
963 
964   if ((Offset == getSize()) && CreatePastEnd)
965     return getFunctionEndLabel();
966 
967   auto LI = Labels.find(Offset);
968   if (LI != Labels.end())
969     return LI->second;
970 
971   // For AArch64, check if this address is part of a constant island.
972   if (BC.isAArch64()) {
973     if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address))
974       return IslandSym;
975   }
976 
977   MCSymbol *Label = BC.Ctx->createNamedTempSymbol();
978   Labels[Offset] = Label;
979 
980   return Label;
981 }
982 
983 ErrorOr<ArrayRef<uint8_t>> BinaryFunction::getData() const {
984   BinarySection &Section = *getOriginSection();
985   assert(Section.containsRange(getAddress(), getMaxSize()) &&
986          "wrong section for function");
987 
988   if (!Section.isText() || Section.isVirtual() || !Section.getSize())
989     return std::make_error_code(std::errc::bad_address);
990 
991   StringRef SectionContents = Section.getContents();
992 
993   assert(SectionContents.size() == Section.getSize() &&
994          "section size mismatch");
995 
996   // Function offset from the section start.
997   uint64_t Offset = getAddress() - Section.getAddress();
998   auto *Bytes = reinterpret_cast<const uint8_t *>(SectionContents.data());
999   return ArrayRef<uint8_t>(Bytes + Offset, getMaxSize());
1000 }
1001 
1002 size_t BinaryFunction::getSizeOfDataInCodeAt(uint64_t Offset) const {
1003   if (!Islands)
1004     return 0;
1005 
1006   if (Islands->DataOffsets.find(Offset) == Islands->DataOffsets.end())
1007     return 0;
1008 
1009   auto Iter = Islands->CodeOffsets.upper_bound(Offset);
1010   if (Iter != Islands->CodeOffsets.end())
1011     return *Iter - Offset;
1012   return getSize() - Offset;
1013 }
1014 
1015 bool BinaryFunction::isZeroPaddingAt(uint64_t Offset) const {
1016   ArrayRef<uint8_t> FunctionData = *getData();
1017   uint64_t EndOfCode = getSize();
1018   if (Islands) {
1019     auto Iter = Islands->DataOffsets.upper_bound(Offset);
1020     if (Iter != Islands->DataOffsets.end())
1021       EndOfCode = *Iter;
1022   }
1023   for (uint64_t I = Offset; I < EndOfCode; ++I)
1024     if (FunctionData[I] != 0)
1025       return false;
1026 
1027   return true;
1028 }
1029 
1030 bool BinaryFunction::disassemble() {
1031   NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
1032                      "Build Binary Functions", opts::TimeBuild);
1033   ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
1034   assert(ErrorOrFunctionData && "function data is not available");
1035   ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
1036   assert(FunctionData.size() == getMaxSize() &&
1037          "function size does not match raw data size");
1038 
1039   auto &Ctx = BC.Ctx;
1040   auto &MIB = BC.MIB;
1041 
1042   // Insert a label at the beginning of the function. This will be our first
1043   // basic block.
1044   Labels[0] = Ctx->createNamedTempSymbol("BB0");
1045 
1046   auto handlePCRelOperand = [&](MCInst &Instruction, uint64_t Address,
1047                                 uint64_t Size) {
1048     uint64_t TargetAddress = 0;
1049     if (!MIB->evaluateMemOperandTarget(Instruction, TargetAddress, Address,
1050                                        Size)) {
1051       errs() << "BOLT-ERROR: PC-relative operand can't be evaluated:\n";
1052       BC.InstPrinter->printInst(&Instruction, 0, "", *BC.STI, errs());
1053       errs() << '\n';
1054       Instruction.dump_pretty(errs(), BC.InstPrinter.get());
1055       errs() << '\n';
1056       errs() << "BOLT-ERROR: cannot handle PC-relative operand at 0x"
1057              << Twine::utohexstr(Address) << ". Skipping function " << *this
1058              << ".\n";
1059       if (BC.HasRelocations)
1060         exit(1);
1061       IsSimple = false;
1062       return;
1063     }
1064     if (TargetAddress == 0 && opts::Verbosity >= 1) {
1065       outs() << "BOLT-INFO: PC-relative operand is zero in function " << *this
1066              << '\n';
1067     }
1068 
1069     const MCSymbol *TargetSymbol;
1070     uint64_t TargetOffset;
1071     std::tie(TargetSymbol, TargetOffset) =
1072         BC.handleAddressRef(TargetAddress, *this, /*IsPCRel*/ true);
1073     const MCExpr *Expr = MCSymbolRefExpr::create(
1074         TargetSymbol, MCSymbolRefExpr::VK_None, *BC.Ctx);
1075     if (TargetOffset) {
1076       const MCConstantExpr *Offset =
1077           MCConstantExpr::create(TargetOffset, *BC.Ctx);
1078       Expr = MCBinaryExpr::createAdd(Expr, Offset, *BC.Ctx);
1079     }
1080     MIB->replaceMemOperandDisp(Instruction,
1081                                MCOperand::createExpr(BC.MIB->getTargetExprFor(
1082                                    Instruction, Expr, *BC.Ctx, 0)));
1083   };
1084 
1085   // Used to fix the target of linker-generated AArch64 stubs with no relocation
1086   // info
1087   auto fixStubTarget = [&](MCInst &LoadLowBits, MCInst &LoadHiBits,
1088                            uint64_t Target) {
1089     const MCSymbol *TargetSymbol;
1090     uint64_t Addend = 0;
1091     std::tie(TargetSymbol, Addend) = BC.handleAddressRef(Target, *this, true);
1092 
1093     int64_t Val;
1094     MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(),
1095                                  Val, ELF::R_AARCH64_ADR_PREL_PG_HI21);
1096     MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
1097                                  Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
1098   };
1099 
1100   auto handleExternalReference = [&](MCInst &Instruction, uint64_t Size,
1101                                      uint64_t Offset, uint64_t TargetAddress,
1102                                      bool &IsCall) -> MCSymbol * {
1103     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1104     MCSymbol *TargetSymbol = nullptr;
1105     InterproceduralReferences.insert(TargetAddress);
1106     if (opts::Verbosity >= 2 && !IsCall && Size == 2 && !BC.HasRelocations) {
1107       errs() << "BOLT-WARNING: relaxed tail call detected at 0x"
1108              << Twine::utohexstr(AbsoluteInstrAddr) << " in function " << *this
1109              << ". Code size will be increased.\n";
1110     }
1111 
1112     assert(!MIB->isTailCall(Instruction) &&
1113            "synthetic tail call instruction found");
1114 
1115     // This is a call regardless of the opcode.
1116     // Assign proper opcode for tail calls, so that they could be
1117     // treated as calls.
1118     if (!IsCall) {
1119       if (!MIB->convertJmpToTailCall(Instruction)) {
1120         assert(MIB->isConditionalBranch(Instruction) &&
1121                "unknown tail call instruction");
1122         if (opts::Verbosity >= 2) {
1123           errs() << "BOLT-WARNING: conditional tail call detected in "
1124                  << "function " << *this << " at 0x"
1125                  << Twine::utohexstr(AbsoluteInstrAddr) << ".\n";
1126         }
1127       }
1128       IsCall = true;
1129     }
1130 
1131     TargetSymbol = BC.getOrCreateGlobalSymbol(TargetAddress, "FUNCat");
1132     if (opts::Verbosity >= 2 && TargetAddress == 0) {
1133       // We actually see calls to address 0 in presence of weak
1134       // symbols originating from libraries. This code is never meant
1135       // to be executed.
1136       outs() << "BOLT-INFO: Function " << *this
1137              << " has a call to address zero.\n";
1138     }
1139 
1140     return TargetSymbol;
1141   };
1142 
1143   auto handleIndirectBranch = [&](MCInst &Instruction, uint64_t Size,
1144                                   uint64_t Offset) {
1145     uint64_t IndirectTarget = 0;
1146     IndirectBranchType Result =
1147         processIndirectBranch(Instruction, Size, Offset, IndirectTarget);
1148     switch (Result) {
1149     default:
1150       llvm_unreachable("unexpected result");
1151     case IndirectBranchType::POSSIBLE_TAIL_CALL: {
1152       bool Result = MIB->convertJmpToTailCall(Instruction);
1153       (void)Result;
1154       assert(Result);
1155       break;
1156     }
1157     case IndirectBranchType::POSSIBLE_JUMP_TABLE:
1158     case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE:
1159       if (opts::JumpTables == JTS_NONE)
1160         IsSimple = false;
1161       break;
1162     case IndirectBranchType::POSSIBLE_FIXED_BRANCH: {
1163       if (containsAddress(IndirectTarget)) {
1164         const MCSymbol *TargetSymbol = getOrCreateLocalLabel(IndirectTarget);
1165         Instruction.clear();
1166         MIB->createUncondBranch(Instruction, TargetSymbol, BC.Ctx.get());
1167         TakenBranches.emplace_back(Offset, IndirectTarget - getAddress());
1168         HasFixedIndirectBranch = true;
1169       } else {
1170         MIB->convertJmpToTailCall(Instruction);
1171         InterproceduralReferences.insert(IndirectTarget);
1172       }
1173       break;
1174     }
1175     case IndirectBranchType::UNKNOWN:
1176       // Keep processing. We'll do more checks and fixes in
1177       // postProcessIndirectBranches().
1178       UnknownIndirectBranchOffsets.emplace(Offset);
1179       break;
1180     }
1181   };
1182 
1183   // Check for linker veneers, which lack relocations and need manual
1184   // adjustments.
1185   auto handleAArch64IndirectCall = [&](MCInst &Instruction, uint64_t Offset) {
1186     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1187     MCInst *TargetHiBits, *TargetLowBits;
1188     uint64_t TargetAddress;
1189     if (MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1190                                AbsoluteInstrAddr, Instruction, TargetHiBits,
1191                                TargetLowBits, TargetAddress)) {
1192       MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1193 
1194       uint8_t Counter = 0;
1195       for (auto It = std::prev(Instructions.end()); Counter != 2;
1196            --It, ++Counter) {
1197         MIB->addAnnotation(It->second, "AArch64Veneer", true);
1198       }
1199 
1200       fixStubTarget(*TargetLowBits, *TargetHiBits, TargetAddress);
1201     }
1202   };
1203 
1204   uint64_t Size = 0; // instruction size
1205   for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
1206     MCInst Instruction;
1207     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1208 
1209     // Check for data inside code and ignore it
1210     if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
1211       Size = DataInCodeSize;
1212       continue;
1213     }
1214 
1215     if (!BC.DisAsm->getInstruction(Instruction, Size,
1216                                    FunctionData.slice(Offset),
1217                                    AbsoluteInstrAddr, nulls())) {
1218       // Functions with "soft" boundaries, e.g. coming from assembly source,
1219       // can have 0-byte padding at the end.
1220       if (isZeroPaddingAt(Offset))
1221         break;
1222 
1223       errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
1224              << Twine::utohexstr(Offset) << " (address 0x"
1225              << Twine::utohexstr(AbsoluteInstrAddr) << ") in function " << *this
1226              << '\n';
1227       // Some AVX-512 instructions could not be disassembled at all.
1228       if (BC.HasRelocations && opts::TrapOnAVX512 && BC.isX86()) {
1229         setTrapOnEntry();
1230         BC.TrappedFunctions.push_back(this);
1231       } else {
1232         setIgnored();
1233       }
1234 
1235       break;
1236     }
1237 
1238     // Check integrity of LLVM assembler/disassembler.
1239     if (opts::CheckEncoding && !BC.MIB->isBranch(Instruction) &&
1240         !BC.MIB->isCall(Instruction) && !BC.MIB->isNoop(Instruction)) {
1241       if (!BC.validateEncoding(Instruction, FunctionData.slice(Offset, Size))) {
1242         errs() << "BOLT-WARNING: mismatching LLVM encoding detected in "
1243                << "function " << *this << " for instruction :\n";
1244         BC.printInstruction(errs(), Instruction, AbsoluteInstrAddr);
1245         errs() << '\n';
1246       }
1247     }
1248 
1249     // Special handling for AVX-512 instructions.
1250     if (MIB->hasEVEXEncoding(Instruction)) {
1251       if (BC.HasRelocations && opts::TrapOnAVX512) {
1252         setTrapOnEntry();
1253         BC.TrappedFunctions.push_back(this);
1254         break;
1255       }
1256 
1257       // Check if our disassembly is correct and matches the assembler output.
1258       if (!BC.validateEncoding(Instruction, FunctionData.slice(Offset, Size))) {
1259         if (opts::Verbosity >= 1) {
1260           errs() << "BOLT-WARNING: internal assembler/disassembler error "
1261                     "detected for AVX512 instruction:\n";
1262           BC.printInstruction(errs(), Instruction, AbsoluteInstrAddr);
1263           errs() << " in function " << *this << '\n';
1264         }
1265 
1266         setIgnored();
1267         break;
1268       }
1269     }
1270 
1271     if (MIB->isBranch(Instruction) || MIB->isCall(Instruction)) {
1272       uint64_t TargetAddress = 0;
1273       if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
1274                               TargetAddress)) {
1275         // Check if the target is within the same function. Otherwise it's
1276         // a call, possibly a tail call.
1277         //
1278         // If the target *is* the function address it could be either a branch
1279         // or a recursive call.
1280         bool IsCall = MIB->isCall(Instruction);
1281         const bool IsCondBranch = MIB->isConditionalBranch(Instruction);
1282         MCSymbol *TargetSymbol = nullptr;
1283 
1284         if (BC.MIB->isUnsupportedBranch(Instruction.getOpcode())) {
1285           setIgnored();
1286           if (BinaryFunction *TargetFunc =
1287                   BC.getBinaryFunctionContainingAddress(TargetAddress))
1288             TargetFunc->setIgnored();
1289         }
1290 
1291         if (IsCall && containsAddress(TargetAddress)) {
1292           if (TargetAddress == getAddress()) {
1293             // Recursive call.
1294             TargetSymbol = getSymbol();
1295           } else {
1296             if (BC.isX86()) {
1297               // Dangerous old-style x86 PIC code. We may need to freeze this
1298               // function, so preserve the function as is for now.
1299               PreserveNops = true;
1300             } else {
1301               errs() << "BOLT-WARNING: internal call detected at 0x"
1302                      << Twine::utohexstr(AbsoluteInstrAddr) << " in function "
1303                      << *this << ". Skipping.\n";
1304               IsSimple = false;
1305             }
1306           }
1307         }
1308 
1309         if (!TargetSymbol) {
1310           // Create either local label or external symbol.
1311           if (containsAddress(TargetAddress)) {
1312             TargetSymbol = getOrCreateLocalLabel(TargetAddress);
1313           } else {
1314             if (TargetAddress == getAddress() + getSize() &&
1315                 TargetAddress < getAddress() + getMaxSize()) {
1316               // Result of __builtin_unreachable().
1317               LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump past end detected at 0x"
1318                                 << Twine::utohexstr(AbsoluteInstrAddr)
1319                                 << " in function " << *this
1320                                 << " : replacing with nop.\n");
1321               BC.MIB->createNoop(Instruction);
1322               if (IsCondBranch) {
1323                 // Register branch offset for profile validation.
1324                 IgnoredBranches.emplace_back(Offset, Offset + Size);
1325               }
1326               goto add_instruction;
1327             }
1328             // May update Instruction and IsCall
1329             TargetSymbol = handleExternalReference(Instruction, Size, Offset,
1330                                                    TargetAddress, IsCall);
1331           }
1332         }
1333 
1334         if (!IsCall) {
1335           // Add taken branch info.
1336           TakenBranches.emplace_back(Offset, TargetAddress - getAddress());
1337         }
1338         BC.MIB->replaceBranchTarget(Instruction, TargetSymbol, &*Ctx);
1339 
1340         // Mark CTC.
1341         if (IsCondBranch && IsCall)
1342           MIB->setConditionalTailCall(Instruction, TargetAddress);
1343       } else {
1344         // Could not evaluate branch. Should be an indirect call or an
1345         // indirect branch. Bail out on the latter case.
1346         if (MIB->isIndirectBranch(Instruction))
1347           handleIndirectBranch(Instruction, Size, Offset);
1348         // Indirect call. We only need to fix it if the operand is RIP-relative.
1349         if (IsSimple && MIB->hasPCRelOperand(Instruction))
1350           handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size);
1351 
1352         if (BC.isAArch64())
1353           handleAArch64IndirectCall(Instruction, Offset);
1354       }
1355     } else {
1356       // Check if there's a relocation associated with this instruction.
1357       bool UsedReloc = false;
1358       for (auto Itr = Relocations.lower_bound(Offset),
1359                 ItrE = Relocations.lower_bound(Offset + Size);
1360            Itr != ItrE; ++Itr) {
1361         const Relocation &Relocation = Itr->second;
1362         uint64_t SymbolValue = Relocation.Value - Relocation.Addend;
1363         if (Relocation.isPCRelative())
1364           SymbolValue += getAddress() + Relocation.Offset;
1365 
1366         // Process reference to the symbol.
1367         if (BC.isX86())
1368           BC.handleAddressRef(SymbolValue, *this, Relocation.isPCRelative());
1369 
1370         if (BC.isAArch64() || !Relocation.isPCRelative()) {
1371           int64_t Value = Relocation.Value;
1372           const bool Result = BC.MIB->replaceImmWithSymbolRef(
1373               Instruction, Relocation.Symbol, Relocation.Addend, Ctx.get(),
1374               Value, Relocation.Type);
1375           (void)Result;
1376           assert(Result && "cannot replace immediate with relocation");
1377 
1378           if (BC.isX86()) {
1379             // Make sure we replaced the correct immediate (instruction
1380             // can have multiple immediate operands).
1381             assert(
1382                 truncateToSize(static_cast<uint64_t>(Value),
1383                                Relocation::getSizeForType(Relocation.Type)) ==
1384                     truncateToSize(Relocation.Value, Relocation::getSizeForType(
1385                                                          Relocation.Type)) &&
1386                 "immediate value mismatch in function");
1387           } else if (BC.isAArch64()) {
1388             // For aarch, if we replaced an immediate with a symbol from a
1389             // relocation, we mark it so we do not try to further process a
1390             // pc-relative operand. All we need is the symbol.
1391             UsedReloc = true;
1392           }
1393         } else {
1394           // Check if the relocation matches memop's Disp.
1395           uint64_t TargetAddress;
1396           if (!BC.MIB->evaluateMemOperandTarget(Instruction, TargetAddress,
1397                                                 AbsoluteInstrAddr, Size)) {
1398             errs() << "BOLT-ERROR: PC-relative operand can't be evaluated\n";
1399             exit(1);
1400           }
1401           assert(TargetAddress == Relocation.Value + AbsoluteInstrAddr + Size &&
1402                  "Immediate value mismatch detected.");
1403 
1404           const MCExpr *Expr = MCSymbolRefExpr::create(
1405               Relocation.Symbol, MCSymbolRefExpr::VK_None, *BC.Ctx);
1406           // Real addend for pc-relative targets is adjusted with a delta
1407           // from relocation placement to the next instruction.
1408           const uint64_t TargetAddend =
1409               Relocation.Addend + Offset + Size - Relocation.Offset;
1410           if (TargetAddend) {
1411             const MCConstantExpr *Offset =
1412                 MCConstantExpr::create(TargetAddend, *BC.Ctx);
1413             Expr = MCBinaryExpr::createAdd(Expr, Offset, *BC.Ctx);
1414           }
1415           BC.MIB->replaceMemOperandDisp(
1416               Instruction, MCOperand::createExpr(BC.MIB->getTargetExprFor(
1417                                Instruction, Expr, *BC.Ctx, 0)));
1418           UsedReloc = true;
1419         }
1420       }
1421 
1422       if (MIB->hasPCRelOperand(Instruction) && !UsedReloc)
1423         handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size);
1424     }
1425 
1426 add_instruction:
1427     if (getDWARFLineTable()) {
1428       Instruction.setLoc(findDebugLineInformationForInstructionAt(
1429           AbsoluteInstrAddr, getDWARFUnit(), getDWARFLineTable()));
1430     }
1431 
1432     // Record offset of the instruction for profile matching.
1433     if (BC.keepOffsetForInstruction(Instruction))
1434       MIB->setOffset(Instruction, static_cast<uint32_t>(Offset));
1435 
1436     if (BC.MIB->isNoop(Instruction)) {
1437       // NOTE: disassembly loses the correct size information for noops.
1438       //       E.g. nopw 0x0(%rax,%rax,1) is 9 bytes, but re-encoded it's only
1439       //       5 bytes. Preserve the size info using annotations.
1440       MIB->addAnnotation(Instruction, "Size", static_cast<uint32_t>(Size));
1441     }
1442 
1443     addInstruction(Offset, std::move(Instruction));
1444   }
1445 
1446   clearList(Relocations);
1447 
1448   if (!IsSimple) {
1449     clearList(Instructions);
1450     return false;
1451   }
1452 
1453   updateState(State::Disassembled);
1454 
1455   return true;
1456 }
1457 
1458 bool BinaryFunction::scanExternalRefs() {
1459   bool Success = true;
1460   bool DisassemblyFailed = false;
1461 
1462   // Ignore pseudo functions.
1463   if (isPseudo())
1464     return Success;
1465 
1466   if (opts::NoScan) {
1467     clearList(Relocations);
1468     clearList(ExternallyReferencedOffsets);
1469 
1470     return false;
1471   }
1472 
1473   // List of external references for this function.
1474   std::vector<Relocation> FunctionRelocations;
1475 
1476   static BinaryContext::IndependentCodeEmitter Emitter =
1477       BC.createIndependentMCCodeEmitter();
1478 
1479   ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
1480   assert(ErrorOrFunctionData && "function data is not available");
1481   ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
1482   assert(FunctionData.size() == getMaxSize() &&
1483          "function size does not match raw data size");
1484 
1485   uint64_t Size = 0; // instruction size
1486   for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
1487     // Check for data inside code and ignore it
1488     if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
1489       Size = DataInCodeSize;
1490       continue;
1491     }
1492 
1493     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1494     MCInst Instruction;
1495     if (!BC.DisAsm->getInstruction(Instruction, Size,
1496                                    FunctionData.slice(Offset),
1497                                    AbsoluteInstrAddr, nulls())) {
1498       if (opts::Verbosity >= 1 && !isZeroPaddingAt(Offset)) {
1499         errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
1500                << Twine::utohexstr(Offset) << " (address 0x"
1501                << Twine::utohexstr(AbsoluteInstrAddr) << ") in function "
1502                << *this << '\n';
1503       }
1504       Success = false;
1505       DisassemblyFailed = true;
1506       break;
1507     }
1508 
1509     // Return true if we can skip handling the Target function reference.
1510     auto ignoreFunctionRef = [&](const BinaryFunction &Target) {
1511       if (&Target == this)
1512         return true;
1513 
1514       // Note that later we may decide not to emit Target function. In that
1515       // case, we conservatively create references that will be ignored or
1516       // resolved to the same function.
1517       if (!BC.shouldEmit(Target))
1518         return true;
1519 
1520       return false;
1521     };
1522 
1523     // Return true if we can ignore reference to the symbol.
1524     auto ignoreReference = [&](const MCSymbol *TargetSymbol) {
1525       if (!TargetSymbol)
1526         return true;
1527 
1528       if (BC.forceSymbolRelocations(TargetSymbol->getName()))
1529         return false;
1530 
1531       BinaryFunction *TargetFunction = BC.getFunctionForSymbol(TargetSymbol);
1532       if (!TargetFunction)
1533         return true;
1534 
1535       return ignoreFunctionRef(*TargetFunction);
1536     };
1537 
1538     // Detect if the instruction references an address.
1539     // Without relocations, we can only trust PC-relative address modes.
1540     uint64_t TargetAddress = 0;
1541     bool IsPCRel = false;
1542     bool IsBranch = false;
1543     if (BC.MIB->hasPCRelOperand(Instruction)) {
1544       if (BC.MIB->evaluateMemOperandTarget(Instruction, TargetAddress,
1545                                            AbsoluteInstrAddr, Size)) {
1546         IsPCRel = true;
1547       }
1548     } else if (BC.MIB->isCall(Instruction) || BC.MIB->isBranch(Instruction)) {
1549       if (BC.MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
1550                                  TargetAddress)) {
1551         IsBranch = true;
1552       }
1553     }
1554 
1555     MCSymbol *TargetSymbol = nullptr;
1556 
1557     // Create an entry point at reference address if needed.
1558     BinaryFunction *TargetFunction =
1559         BC.getBinaryFunctionContainingAddress(TargetAddress);
1560     if (TargetFunction && !ignoreFunctionRef(*TargetFunction)) {
1561       const uint64_t FunctionOffset =
1562           TargetAddress - TargetFunction->getAddress();
1563       TargetSymbol = FunctionOffset
1564                          ? TargetFunction->addEntryPointAtOffset(FunctionOffset)
1565                          : TargetFunction->getSymbol();
1566     }
1567 
1568     // Can't find more references and not creating relocations.
1569     if (!BC.HasRelocations)
1570       continue;
1571 
1572     // Create a relocation against the TargetSymbol as the symbol might get
1573     // moved.
1574     if (TargetSymbol) {
1575       if (IsBranch) {
1576         BC.MIB->replaceBranchTarget(Instruction, TargetSymbol,
1577                                     Emitter.LocalCtx.get());
1578       } else if (IsPCRel) {
1579         const MCExpr *Expr = MCSymbolRefExpr::create(
1580             TargetSymbol, MCSymbolRefExpr::VK_None, *Emitter.LocalCtx.get());
1581         BC.MIB->replaceMemOperandDisp(
1582             Instruction, MCOperand::createExpr(BC.MIB->getTargetExprFor(
1583                              Instruction, Expr, *Emitter.LocalCtx.get(), 0)));
1584       }
1585     }
1586 
1587     // Create more relocations based on input file relocations.
1588     bool HasRel = false;
1589     for (auto Itr = Relocations.lower_bound(Offset),
1590               ItrE = Relocations.lower_bound(Offset + Size);
1591          Itr != ItrE; ++Itr) {
1592       Relocation &Relocation = Itr->second;
1593       if (Relocation.isPCRelative() && BC.isX86())
1594         continue;
1595       if (ignoreReference(Relocation.Symbol))
1596         continue;
1597 
1598       int64_t Value = Relocation.Value;
1599       const bool Result = BC.MIB->replaceImmWithSymbolRef(
1600           Instruction, Relocation.Symbol, Relocation.Addend,
1601           Emitter.LocalCtx.get(), Value, Relocation.Type);
1602       (void)Result;
1603       assert(Result && "cannot replace immediate with relocation");
1604 
1605       HasRel = true;
1606     }
1607 
1608     if (!TargetSymbol && !HasRel)
1609       continue;
1610 
1611     // Emit the instruction using temp emitter and generate relocations.
1612     SmallString<256> Code;
1613     SmallVector<MCFixup, 4> Fixups;
1614     raw_svector_ostream VecOS(Code);
1615     Emitter.MCE->encodeInstruction(Instruction, VecOS, Fixups, *BC.STI);
1616 
1617     // Create relocation for every fixup.
1618     for (const MCFixup &Fixup : Fixups) {
1619       Optional<Relocation> Rel = BC.MIB->createRelocation(Fixup, *BC.MAB);
1620       if (!Rel) {
1621         Success = false;
1622         continue;
1623       }
1624 
1625       if (Relocation::getSizeForType(Rel->Type) < 4) {
1626         // If the instruction uses a short form, then we might not be able
1627         // to handle the rewrite without relaxation, and hence cannot reliably
1628         // create an external reference relocation.
1629         Success = false;
1630         continue;
1631       }
1632       Rel->Offset += getAddress() - getOriginSection()->getAddress() + Offset;
1633       FunctionRelocations.push_back(*Rel);
1634     }
1635 
1636     if (!Success)
1637       break;
1638   }
1639 
1640   // Add relocations unless disassembly failed for this function.
1641   if (!DisassemblyFailed)
1642     for (Relocation &Rel : FunctionRelocations)
1643       getOriginSection()->addPendingRelocation(Rel);
1644 
1645   // Inform BinaryContext that this function symbols will not be defined and
1646   // relocations should not be created against them.
1647   if (BC.HasRelocations) {
1648     for (std::pair<const uint32_t, MCSymbol *> &LI : Labels)
1649       BC.UndefinedSymbols.insert(LI.second);
1650     if (FunctionEndLabel)
1651       BC.UndefinedSymbols.insert(FunctionEndLabel);
1652   }
1653 
1654   clearList(Relocations);
1655   clearList(ExternallyReferencedOffsets);
1656 
1657   if (Success && BC.HasRelocations)
1658     HasExternalRefRelocations = true;
1659 
1660   if (opts::Verbosity >= 1 && !Success)
1661     outs() << "BOLT-INFO: failed to scan refs for  " << *this << '\n';
1662 
1663   return Success;
1664 }
1665 
1666 void BinaryFunction::postProcessEntryPoints() {
1667   if (!isSimple())
1668     return;
1669 
1670   for (auto &KV : Labels) {
1671     MCSymbol *Label = KV.second;
1672     if (!getSecondaryEntryPointSymbol(Label))
1673       continue;
1674 
1675     // In non-relocation mode there's potentially an external undetectable
1676     // reference to the entry point and hence we cannot move this entry
1677     // point. Optimizing without moving could be difficult.
1678     if (!BC.HasRelocations)
1679       setSimple(false);
1680 
1681     const uint32_t Offset = KV.first;
1682 
1683     // If we are at Offset 0 and there is no instruction associated with it,
1684     // this means this is an empty function. Just ignore. If we find an
1685     // instruction at this offset, this entry point is valid.
1686     if (!Offset || getInstructionAtOffset(Offset))
1687       continue;
1688 
1689     // On AArch64 there are legitimate reasons to have references past the
1690     // end of the function, e.g. jump tables.
1691     if (BC.isAArch64() && Offset == getSize())
1692       continue;
1693 
1694     errs() << "BOLT-WARNING: reference in the middle of instruction "
1695               "detected in function "
1696            << *this << " at offset 0x" << Twine::utohexstr(Offset) << '\n';
1697     if (BC.HasRelocations)
1698       setIgnored();
1699     setSimple(false);
1700     return;
1701   }
1702 }
1703 
1704 void BinaryFunction::postProcessJumpTables() {
1705   // Create labels for all entries.
1706   for (auto &JTI : JumpTables) {
1707     JumpTable &JT = *JTI.second;
1708     if (JT.Type == JumpTable::JTT_PIC && opts::JumpTables == JTS_BASIC) {
1709       opts::JumpTables = JTS_MOVE;
1710       outs() << "BOLT-INFO: forcing -jump-tables=move as PIC jump table was "
1711                 "detected in function "
1712              << *this << '\n';
1713     }
1714     for (unsigned I = 0; I < JT.OffsetEntries.size(); ++I) {
1715       MCSymbol *Label =
1716           getOrCreateLocalLabel(getAddress() + JT.OffsetEntries[I],
1717                                 /*CreatePastEnd*/ true);
1718       JT.Entries.push_back(Label);
1719     }
1720 
1721     const uint64_t BDSize =
1722         BC.getBinaryDataAtAddress(JT.getAddress())->getSize();
1723     if (!BDSize) {
1724       BC.setBinaryDataSize(JT.getAddress(), JT.getSize());
1725     } else {
1726       assert(BDSize >= JT.getSize() &&
1727              "jump table cannot be larger than the containing object");
1728     }
1729   }
1730 
1731   // Add TakenBranches from JumpTables.
1732   //
1733   // We want to do it after initial processing since we don't know jump tables'
1734   // boundaries until we process them all.
1735   for (auto &JTSite : JTSites) {
1736     const uint64_t JTSiteOffset = JTSite.first;
1737     const uint64_t JTAddress = JTSite.second;
1738     const JumpTable *JT = getJumpTableContainingAddress(JTAddress);
1739     assert(JT && "cannot find jump table for address");
1740 
1741     uint64_t EntryOffset = JTAddress - JT->getAddress();
1742     while (EntryOffset < JT->getSize()) {
1743       uint64_t TargetOffset = JT->OffsetEntries[EntryOffset / JT->EntrySize];
1744       if (TargetOffset < getSize()) {
1745         TakenBranches.emplace_back(JTSiteOffset, TargetOffset);
1746 
1747         if (opts::StrictMode)
1748           registerReferencedOffset(TargetOffset);
1749       }
1750 
1751       EntryOffset += JT->EntrySize;
1752 
1753       // A label at the next entry means the end of this jump table.
1754       if (JT->Labels.count(EntryOffset))
1755         break;
1756     }
1757   }
1758   clearList(JTSites);
1759 
1760   // Free memory used by jump table offsets.
1761   for (auto &JTI : JumpTables) {
1762     JumpTable &JT = *JTI.second;
1763     clearList(JT.OffsetEntries);
1764   }
1765 
1766   // Conservatively populate all possible destinations for unknown indirect
1767   // branches.
1768   if (opts::StrictMode && hasInternalReference()) {
1769     for (uint64_t Offset : UnknownIndirectBranchOffsets) {
1770       for (uint64_t PossibleDestination : ExternallyReferencedOffsets) {
1771         // Ignore __builtin_unreachable().
1772         if (PossibleDestination == getSize())
1773           continue;
1774         TakenBranches.emplace_back(Offset, PossibleDestination);
1775       }
1776     }
1777   }
1778 
1779   // Remove duplicates branches. We can get a bunch of them from jump tables.
1780   // Without doing jump table value profiling we don't have use for extra
1781   // (duplicate) branches.
1782   std::sort(TakenBranches.begin(), TakenBranches.end());
1783   auto NewEnd = std::unique(TakenBranches.begin(), TakenBranches.end());
1784   TakenBranches.erase(NewEnd, TakenBranches.end());
1785 }
1786 
1787 bool BinaryFunction::postProcessIndirectBranches(
1788     MCPlusBuilder::AllocatorIdTy AllocId) {
1789   auto addUnknownControlFlow = [&](BinaryBasicBlock &BB) {
1790     HasUnknownControlFlow = true;
1791     BB.removeAllSuccessors();
1792     for (uint64_t PossibleDestination : ExternallyReferencedOffsets)
1793       if (BinaryBasicBlock *SuccBB = getBasicBlockAtOffset(PossibleDestination))
1794         BB.addSuccessor(SuccBB);
1795   };
1796 
1797   uint64_t NumIndirectJumps = 0;
1798   MCInst *LastIndirectJump = nullptr;
1799   BinaryBasicBlock *LastIndirectJumpBB = nullptr;
1800   uint64_t LastJT = 0;
1801   uint16_t LastJTIndexReg = BC.MIB->getNoRegister();
1802   for (BinaryBasicBlock *BB : layout()) {
1803     for (MCInst &Instr : *BB) {
1804       if (!BC.MIB->isIndirectBranch(Instr))
1805         continue;
1806 
1807       // If there's an indirect branch in a single-block function -
1808       // it must be a tail call.
1809       if (layout_size() == 1) {
1810         BC.MIB->convertJmpToTailCall(Instr);
1811         return true;
1812       }
1813 
1814       ++NumIndirectJumps;
1815 
1816       if (opts::StrictMode && !hasInternalReference()) {
1817         BC.MIB->convertJmpToTailCall(Instr);
1818         break;
1819       }
1820 
1821       // Validate the tail call or jump table assumptions now that we know
1822       // basic block boundaries.
1823       if (BC.MIB->isTailCall(Instr) || BC.MIB->getJumpTable(Instr)) {
1824         const unsigned PtrSize = BC.AsmInfo->getCodePointerSize();
1825         MCInst *MemLocInstr;
1826         unsigned BaseRegNum, IndexRegNum;
1827         int64_t DispValue;
1828         const MCExpr *DispExpr;
1829         MCInst *PCRelBaseInstr;
1830         IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
1831             Instr, BB->begin(), BB->end(), PtrSize, MemLocInstr, BaseRegNum,
1832             IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
1833         if (Type != IndirectBranchType::UNKNOWN || MemLocInstr != nullptr)
1834           continue;
1835 
1836         if (!opts::StrictMode)
1837           return false;
1838 
1839         if (BC.MIB->isTailCall(Instr)) {
1840           BC.MIB->convertTailCallToJmp(Instr);
1841         } else {
1842           LastIndirectJump = &Instr;
1843           LastIndirectJumpBB = BB;
1844           LastJT = BC.MIB->getJumpTable(Instr);
1845           LastJTIndexReg = BC.MIB->getJumpTableIndexReg(Instr);
1846           BC.MIB->unsetJumpTable(Instr);
1847 
1848           JumpTable *JT = BC.getJumpTableContainingAddress(LastJT);
1849           if (JT->Type == JumpTable::JTT_NORMAL) {
1850             // Invalidating the jump table may also invalidate other jump table
1851             // boundaries. Until we have/need a support for this, mark the
1852             // function as non-simple.
1853             LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejected jump table reference"
1854                               << JT->getName() << " in " << *this << '\n');
1855             return false;
1856           }
1857         }
1858 
1859         addUnknownControlFlow(*BB);
1860         continue;
1861       }
1862 
1863       // If this block contains an epilogue code and has an indirect branch,
1864       // then most likely it's a tail call. Otherwise, we cannot tell for sure
1865       // what it is and conservatively reject the function's CFG.
1866       bool IsEpilogue = false;
1867       for (const MCInst &Instr : *BB) {
1868         if (BC.MIB->isLeave(Instr) || BC.MIB->isPop(Instr)) {
1869           IsEpilogue = true;
1870           break;
1871         }
1872       }
1873       if (IsEpilogue) {
1874         BC.MIB->convertJmpToTailCall(Instr);
1875         BB->removeAllSuccessors();
1876         continue;
1877       }
1878 
1879       if (opts::Verbosity >= 2) {
1880         outs() << "BOLT-INFO: rejected potential indirect tail call in "
1881                << "function " << *this << " in basic block " << BB->getName()
1882                << ".\n";
1883         LLVM_DEBUG(BC.printInstructions(dbgs(), BB->begin(), BB->end(),
1884                                         BB->getOffset(), this, true));
1885       }
1886 
1887       if (!opts::StrictMode)
1888         return false;
1889 
1890       addUnknownControlFlow(*BB);
1891     }
1892   }
1893 
1894   if (HasInternalLabelReference)
1895     return false;
1896 
1897   // If there's only one jump table, and one indirect jump, and no other
1898   // references, then we should be able to derive the jump table even if we
1899   // fail to match the pattern.
1900   if (HasUnknownControlFlow && NumIndirectJumps == 1 &&
1901       JumpTables.size() == 1 && LastIndirectJump) {
1902     BC.MIB->setJumpTable(*LastIndirectJump, LastJT, LastJTIndexReg, AllocId);
1903     HasUnknownControlFlow = false;
1904 
1905     LastIndirectJumpBB->updateJumpTableSuccessors();
1906   }
1907 
1908   if (HasFixedIndirectBranch)
1909     return false;
1910 
1911   if (HasUnknownControlFlow && !BC.HasRelocations)
1912     return false;
1913 
1914   return true;
1915 }
1916 
1917 void BinaryFunction::recomputeLandingPads() {
1918   updateBBIndices(0);
1919 
1920   for (BinaryBasicBlock *BB : BasicBlocks) {
1921     BB->LandingPads.clear();
1922     BB->Throwers.clear();
1923   }
1924 
1925   for (BinaryBasicBlock *BB : BasicBlocks) {
1926     std::unordered_set<const BinaryBasicBlock *> BBLandingPads;
1927     for (MCInst &Instr : *BB) {
1928       if (!BC.MIB->isInvoke(Instr))
1929         continue;
1930 
1931       const Optional<MCPlus::MCLandingPad> EHInfo = BC.MIB->getEHInfo(Instr);
1932       if (!EHInfo || !EHInfo->first)
1933         continue;
1934 
1935       BinaryBasicBlock *LPBlock = getBasicBlockForLabel(EHInfo->first);
1936       if (!BBLandingPads.count(LPBlock)) {
1937         BBLandingPads.insert(LPBlock);
1938         BB->LandingPads.emplace_back(LPBlock);
1939         LPBlock->Throwers.emplace_back(BB);
1940       }
1941     }
1942   }
1943 }
1944 
1945 bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
1946   auto &MIB = BC.MIB;
1947 
1948   if (!isSimple()) {
1949     assert(!BC.HasRelocations &&
1950            "cannot process file with non-simple function in relocs mode");
1951     return false;
1952   }
1953 
1954   if (CurrentState != State::Disassembled)
1955     return false;
1956 
1957   assert(BasicBlocks.empty() && "basic block list should be empty");
1958   assert((Labels.find(0) != Labels.end()) &&
1959          "first instruction should always have a label");
1960 
1961   // Create basic blocks in the original layout order:
1962   //
1963   //  * Every instruction with associated label marks
1964   //    the beginning of a basic block.
1965   //  * Conditional instruction marks the end of a basic block,
1966   //    except when the following instruction is an
1967   //    unconditional branch, and the unconditional branch is not
1968   //    a destination of another branch. In the latter case, the
1969   //    basic block will consist of a single unconditional branch
1970   //    (missed "double-jump" optimization).
1971   //
1972   // Created basic blocks are sorted in layout order since they are
1973   // created in the same order as instructions, and instructions are
1974   // sorted by offsets.
1975   BinaryBasicBlock *InsertBB = nullptr;
1976   BinaryBasicBlock *PrevBB = nullptr;
1977   bool IsLastInstrNop = false;
1978   // Offset of the last non-nop instruction.
1979   uint64_t LastInstrOffset = 0;
1980 
1981   auto addCFIPlaceholders = [this](uint64_t CFIOffset,
1982                                    BinaryBasicBlock *InsertBB) {
1983     for (auto FI = OffsetToCFI.lower_bound(CFIOffset),
1984               FE = OffsetToCFI.upper_bound(CFIOffset);
1985          FI != FE; ++FI) {
1986       addCFIPseudo(InsertBB, InsertBB->end(), FI->second);
1987     }
1988   };
1989 
1990   // For profiling purposes we need to save the offset of the last instruction
1991   // in the basic block.
1992   // NOTE: nops always have an Offset annotation. Annotate the last non-nop as
1993   //       older profiles ignored nops.
1994   auto updateOffset = [&](uint64_t Offset) {
1995     assert(PrevBB && PrevBB != InsertBB && "invalid previous block");
1996     MCInst *LastNonNop = nullptr;
1997     for (BinaryBasicBlock::reverse_iterator RII = PrevBB->getLastNonPseudo(),
1998                                             E = PrevBB->rend();
1999          RII != E; ++RII) {
2000       if (!BC.MIB->isPseudo(*RII) && !BC.MIB->isNoop(*RII)) {
2001         LastNonNop = &*RII;
2002         break;
2003       }
2004     }
2005     if (LastNonNop && !MIB->getOffset(*LastNonNop))
2006       MIB->setOffset(*LastNonNop, static_cast<uint32_t>(Offset), AllocatorId);
2007   };
2008 
2009   for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) {
2010     const uint32_t Offset = I->first;
2011     MCInst &Instr = I->second;
2012 
2013     auto LI = Labels.find(Offset);
2014     if (LI != Labels.end()) {
2015       // Always create new BB at branch destination.
2016       PrevBB = InsertBB ? InsertBB : PrevBB;
2017       InsertBB = addBasicBlock(LI->first, LI->second,
2018                                opts::PreserveBlocksAlignment && IsLastInstrNop);
2019       if (PrevBB)
2020         updateOffset(LastInstrOffset);
2021     }
2022 
2023     const uint64_t InstrInputAddr = I->first + Address;
2024     bool IsSDTMarker =
2025         MIB->isNoop(Instr) && BC.SDTMarkers.count(InstrInputAddr);
2026     bool IsLKMarker = BC.LKMarkers.count(InstrInputAddr);
2027     // Mark all nops with Offset for profile tracking purposes.
2028     if (MIB->isNoop(Instr) || IsLKMarker) {
2029       if (!MIB->getOffset(Instr))
2030         MIB->setOffset(Instr, static_cast<uint32_t>(Offset), AllocatorId);
2031       if (IsSDTMarker || IsLKMarker)
2032         HasSDTMarker = true;
2033       else
2034         // Annotate ordinary nops, so we can safely delete them if required.
2035         MIB->addAnnotation(Instr, "NOP", static_cast<uint32_t>(1), AllocatorId);
2036     }
2037 
2038     if (!InsertBB) {
2039       // It must be a fallthrough or unreachable code. Create a new block unless
2040       // we see an unconditional branch following a conditional one. The latter
2041       // should not be a conditional tail call.
2042       assert(PrevBB && "no previous basic block for a fall through");
2043       MCInst *PrevInstr = PrevBB->getLastNonPseudoInstr();
2044       assert(PrevInstr && "no previous instruction for a fall through");
2045       if (MIB->isUnconditionalBranch(Instr) &&
2046           !MIB->isUnconditionalBranch(*PrevInstr) &&
2047           !MIB->getConditionalTailCall(*PrevInstr) &&
2048           !MIB->isReturn(*PrevInstr)) {
2049         // Temporarily restore inserter basic block.
2050         InsertBB = PrevBB;
2051       } else {
2052         MCSymbol *Label;
2053         {
2054           auto L = BC.scopeLock();
2055           Label = BC.Ctx->createNamedTempSymbol("FT");
2056         }
2057         InsertBB = addBasicBlock(
2058             Offset, Label, opts::PreserveBlocksAlignment && IsLastInstrNop);
2059         updateOffset(LastInstrOffset);
2060       }
2061     }
2062     if (Offset == 0) {
2063       // Add associated CFI pseudos in the first offset (0)
2064       addCFIPlaceholders(0, InsertBB);
2065     }
2066 
2067     const bool IsBlockEnd = MIB->isTerminator(Instr);
2068     IsLastInstrNop = MIB->isNoop(Instr);
2069     if (!IsLastInstrNop)
2070       LastInstrOffset = Offset;
2071     InsertBB->addInstruction(std::move(Instr));
2072 
2073     // Add associated CFI instrs. We always add the CFI instruction that is
2074     // located immediately after this instruction, since the next CFI
2075     // instruction reflects the change in state caused by this instruction.
2076     auto NextInstr = std::next(I);
2077     uint64_t CFIOffset;
2078     if (NextInstr != E)
2079       CFIOffset = NextInstr->first;
2080     else
2081       CFIOffset = getSize();
2082 
2083     // Note: this potentially invalidates instruction pointers/iterators.
2084     addCFIPlaceholders(CFIOffset, InsertBB);
2085 
2086     if (IsBlockEnd) {
2087       PrevBB = InsertBB;
2088       InsertBB = nullptr;
2089     }
2090   }
2091 
2092   if (BasicBlocks.empty()) {
2093     setSimple(false);
2094     return false;
2095   }
2096 
2097   // Intermediate dump.
2098   LLVM_DEBUG(print(dbgs(), "after creating basic blocks"));
2099 
2100   // TODO: handle properly calls to no-return functions,
2101   // e.g. exit(3), etc. Otherwise we'll see a false fall-through
2102   // blocks.
2103 
2104   for (std::pair<uint32_t, uint32_t> &Branch : TakenBranches) {
2105     LLVM_DEBUG(dbgs() << "registering branch [0x"
2106                       << Twine::utohexstr(Branch.first) << "] -> [0x"
2107                       << Twine::utohexstr(Branch.second) << "]\n");
2108     BinaryBasicBlock *FromBB = getBasicBlockContainingOffset(Branch.first);
2109     BinaryBasicBlock *ToBB = getBasicBlockAtOffset(Branch.second);
2110     if (!FromBB || !ToBB) {
2111       if (!FromBB)
2112         errs() << "BOLT-ERROR: cannot find BB containing the branch.\n";
2113       if (!ToBB)
2114         errs() << "BOLT-ERROR: cannot find BB containing branch destination.\n";
2115       BC.exitWithBugReport("disassembly failed - inconsistent branch found.",
2116                            *this);
2117     }
2118 
2119     FromBB->addSuccessor(ToBB);
2120   }
2121 
2122   // Add fall-through branches.
2123   PrevBB = nullptr;
2124   bool IsPrevFT = false; // Is previous block a fall-through.
2125   for (BinaryBasicBlock *BB : BasicBlocks) {
2126     if (IsPrevFT)
2127       PrevBB->addSuccessor(BB);
2128 
2129     if (BB->empty()) {
2130       IsPrevFT = true;
2131       PrevBB = BB;
2132       continue;
2133     }
2134 
2135     MCInst *LastInstr = BB->getLastNonPseudoInstr();
2136     assert(LastInstr &&
2137            "should have non-pseudo instruction in non-empty block");
2138 
2139     if (BB->succ_size() == 0) {
2140       // Since there's no existing successors, we know the last instruction is
2141       // not a conditional branch. Thus if it's a terminator, it shouldn't be a
2142       // fall-through.
2143       //
2144       // Conditional tail call is a special case since we don't add a taken
2145       // branch successor for it.
2146       IsPrevFT = !MIB->isTerminator(*LastInstr) ||
2147                  MIB->getConditionalTailCall(*LastInstr);
2148     } else if (BB->succ_size() == 1) {
2149       IsPrevFT = MIB->isConditionalBranch(*LastInstr);
2150     } else {
2151       IsPrevFT = false;
2152     }
2153 
2154     PrevBB = BB;
2155   }
2156 
2157   // Assign landing pads and throwers info.
2158   recomputeLandingPads();
2159 
2160   // Assign CFI information to each BB entry.
2161   annotateCFIState();
2162 
2163   // Annotate invoke instructions with GNU_args_size data.
2164   propagateGnuArgsSizeInfo(AllocatorId);
2165 
2166   // Set the basic block layout to the original order and set end offsets.
2167   PrevBB = nullptr;
2168   for (BinaryBasicBlock *BB : BasicBlocks) {
2169     BasicBlocksLayout.emplace_back(BB);
2170     if (PrevBB)
2171       PrevBB->setEndOffset(BB->getOffset());
2172     PrevBB = BB;
2173   }
2174   PrevBB->setEndOffset(getSize());
2175 
2176   updateLayoutIndices();
2177 
2178   normalizeCFIState();
2179 
2180   // Clean-up memory taken by intermediate structures.
2181   //
2182   // NB: don't clear Labels list as we may need them if we mark the function
2183   //     as non-simple later in the process of discovering extra entry points.
2184   clearList(Instructions);
2185   clearList(OffsetToCFI);
2186   clearList(TakenBranches);
2187 
2188   // Update the state.
2189   CurrentState = State::CFG;
2190 
2191   // Make any necessary adjustments for indirect branches.
2192   if (!postProcessIndirectBranches(AllocatorId)) {
2193     if (opts::Verbosity) {
2194       errs() << "BOLT-WARNING: failed to post-process indirect branches for "
2195              << *this << '\n';
2196     }
2197     // In relocation mode we want to keep processing the function but avoid
2198     // optimizing it.
2199     setSimple(false);
2200   }
2201 
2202   clearList(ExternallyReferencedOffsets);
2203   clearList(UnknownIndirectBranchOffsets);
2204 
2205   return true;
2206 }
2207 
2208 void BinaryFunction::postProcessCFG() {
2209   if (isSimple() && !BasicBlocks.empty()) {
2210     // Convert conditional tail call branches to conditional branches that jump
2211     // to a tail call.
2212     removeConditionalTailCalls();
2213 
2214     postProcessProfile();
2215 
2216     // Eliminate inconsistencies between branch instructions and CFG.
2217     postProcessBranches();
2218   }
2219 
2220   calculateMacroOpFusionStats();
2221 
2222   // The final cleanup of intermediate structures.
2223   clearList(IgnoredBranches);
2224 
2225   // Remove "Offset" annotations, unless we need an address-translation table
2226   // later. This has no cost, since annotations are allocated by a bumpptr
2227   // allocator and won't be released anyway until late in the pipeline.
2228   if (!requiresAddressTranslation() && !opts::Instrument) {
2229     for (BinaryBasicBlock *BB : layout())
2230       for (MCInst &Inst : *BB)
2231         BC.MIB->clearOffset(Inst);
2232   }
2233 
2234   assert((!isSimple() || validateCFG()) &&
2235          "invalid CFG detected after post-processing");
2236 }
2237 
2238 void BinaryFunction::calculateMacroOpFusionStats() {
2239   if (!getBinaryContext().isX86())
2240     return;
2241   for (BinaryBasicBlock *BB : layout()) {
2242     auto II = BB->getMacroOpFusionPair();
2243     if (II == BB->end())
2244       continue;
2245 
2246     // Check offset of the second instruction.
2247     // FIXME: arch-specific.
2248     const uint32_t Offset = BC.MIB->getOffsetWithDefault(*std::next(II), 0);
2249     if (!Offset || (getAddress() + Offset) % 64)
2250       continue;
2251 
2252     LLVM_DEBUG(dbgs() << "\nmissed macro-op fusion at address 0x"
2253                       << Twine::utohexstr(getAddress() + Offset)
2254                       << " in function " << *this << "; executed "
2255                       << BB->getKnownExecutionCount() << " times.\n");
2256     ++BC.MissedMacroFusionPairs;
2257     BC.MissedMacroFusionExecCount += BB->getKnownExecutionCount();
2258   }
2259 }
2260 
2261 void BinaryFunction::removeTagsFromProfile() {
2262   for (BinaryBasicBlock *BB : BasicBlocks) {
2263     if (BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
2264       BB->ExecutionCount = 0;
2265     for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) {
2266       if (BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
2267           BI.MispredictedCount != BinaryBasicBlock::COUNT_NO_PROFILE)
2268         continue;
2269       BI.Count = 0;
2270       BI.MispredictedCount = 0;
2271     }
2272   }
2273 }
2274 
2275 void BinaryFunction::removeConditionalTailCalls() {
2276   // Blocks to be appended at the end.
2277   std::vector<std::unique_ptr<BinaryBasicBlock>> NewBlocks;
2278 
2279   for (auto BBI = begin(); BBI != end(); ++BBI) {
2280     BinaryBasicBlock &BB = *BBI;
2281     MCInst *CTCInstr = BB.getLastNonPseudoInstr();
2282     if (!CTCInstr)
2283       continue;
2284 
2285     Optional<uint64_t> TargetAddressOrNone =
2286         BC.MIB->getConditionalTailCall(*CTCInstr);
2287     if (!TargetAddressOrNone)
2288       continue;
2289 
2290     // Gather all necessary information about CTC instruction before
2291     // annotations are destroyed.
2292     const int32_t CFIStateBeforeCTC = BB.getCFIStateAtInstr(CTCInstr);
2293     uint64_t CTCTakenCount = BinaryBasicBlock::COUNT_NO_PROFILE;
2294     uint64_t CTCMispredCount = BinaryBasicBlock::COUNT_NO_PROFILE;
2295     if (hasValidProfile()) {
2296       CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
2297           *CTCInstr, "CTCTakenCount");
2298       CTCMispredCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
2299           *CTCInstr, "CTCMispredCount");
2300     }
2301 
2302     // Assert that the tail call does not throw.
2303     assert(!BC.MIB->getEHInfo(*CTCInstr) &&
2304            "found tail call with associated landing pad");
2305 
2306     // Create a basic block with an unconditional tail call instruction using
2307     // the same destination.
2308     const MCSymbol *CTCTargetLabel = BC.MIB->getTargetSymbol(*CTCInstr);
2309     assert(CTCTargetLabel && "symbol expected for conditional tail call");
2310     MCInst TailCallInstr;
2311     BC.MIB->createTailCall(TailCallInstr, CTCTargetLabel, BC.Ctx.get());
2312     // Link new BBs to the original input offset of the BB where the CTC
2313     // is, so we can map samples recorded in new BBs back to the original BB
2314     // seem in the input binary (if using BAT)
2315     std::unique_ptr<BinaryBasicBlock> TailCallBB = createBasicBlock(
2316         BB.getInputOffset(), BC.Ctx->createNamedTempSymbol("TC"));
2317     TailCallBB->addInstruction(TailCallInstr);
2318     TailCallBB->setCFIState(CFIStateBeforeCTC);
2319 
2320     // Add CFG edge with profile info from BB to TailCallBB.
2321     BB.addSuccessor(TailCallBB.get(), CTCTakenCount, CTCMispredCount);
2322 
2323     // Add execution count for the block.
2324     TailCallBB->setExecutionCount(CTCTakenCount);
2325 
2326     BC.MIB->convertTailCallToJmp(*CTCInstr);
2327 
2328     BC.MIB->replaceBranchTarget(*CTCInstr, TailCallBB->getLabel(),
2329                                 BC.Ctx.get());
2330 
2331     // Add basic block to the list that will be added to the end.
2332     NewBlocks.emplace_back(std::move(TailCallBB));
2333 
2334     // Swap edges as the TailCallBB corresponds to the taken branch.
2335     BB.swapConditionalSuccessors();
2336 
2337     // This branch is no longer a conditional tail call.
2338     BC.MIB->unsetConditionalTailCall(*CTCInstr);
2339   }
2340 
2341   insertBasicBlocks(std::prev(end()), std::move(NewBlocks),
2342                     /* UpdateLayout */ true,
2343                     /* UpdateCFIState */ false);
2344 }
2345 
2346 uint64_t BinaryFunction::getFunctionScore() const {
2347   if (FunctionScore != -1)
2348     return FunctionScore;
2349 
2350   if (!isSimple() || !hasValidProfile()) {
2351     FunctionScore = 0;
2352     return FunctionScore;
2353   }
2354 
2355   uint64_t TotalScore = 0ULL;
2356   for (BinaryBasicBlock *BB : layout()) {
2357     uint64_t BBExecCount = BB->getExecutionCount();
2358     if (BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
2359       continue;
2360     TotalScore += BBExecCount;
2361   }
2362   FunctionScore = TotalScore;
2363   return FunctionScore;
2364 }
2365 
2366 void BinaryFunction::annotateCFIState() {
2367   assert(CurrentState == State::Disassembled && "unexpected function state");
2368   assert(!BasicBlocks.empty() && "basic block list should not be empty");
2369 
2370   // This is an index of the last processed CFI in FDE CFI program.
2371   uint32_t State = 0;
2372 
2373   // This is an index of RememberState CFI reflecting effective state right
2374   // after execution of RestoreState CFI.
2375   //
2376   // It differs from State iff the CFI at (State-1)
2377   // was RestoreState (modulo GNU_args_size CFIs, which are ignored).
2378   //
2379   // This allows us to generate shorter replay sequences when producing new
2380   // CFI programs.
2381   uint32_t EffectiveState = 0;
2382 
2383   // For tracking RememberState/RestoreState sequences.
2384   std::stack<uint32_t> StateStack;
2385 
2386   for (BinaryBasicBlock *BB : BasicBlocks) {
2387     BB->setCFIState(EffectiveState);
2388 
2389     for (const MCInst &Instr : *BB) {
2390       const MCCFIInstruction *CFI = getCFIFor(Instr);
2391       if (!CFI)
2392         continue;
2393 
2394       ++State;
2395 
2396       switch (CFI->getOperation()) {
2397       case MCCFIInstruction::OpRememberState:
2398         StateStack.push(EffectiveState);
2399         EffectiveState = State;
2400         break;
2401       case MCCFIInstruction::OpRestoreState:
2402         assert(!StateStack.empty() && "corrupt CFI stack");
2403         EffectiveState = StateStack.top();
2404         StateStack.pop();
2405         break;
2406       case MCCFIInstruction::OpGnuArgsSize:
2407         // OpGnuArgsSize CFIs do not affect the CFI state.
2408         break;
2409       default:
2410         // Any other CFI updates the state.
2411         EffectiveState = State;
2412         break;
2413       }
2414     }
2415   }
2416 
2417   assert(StateStack.empty() && "corrupt CFI stack");
2418 }
2419 
2420 namespace {
2421 
2422 /// Our full interpretation of a DWARF CFI machine state at a given point
2423 struct CFISnapshot {
2424   /// CFA register number and offset defining the canonical frame at this
2425   /// point, or the number of a rule (CFI state) that computes it with a
2426   /// DWARF expression. This number will be negative if it refers to a CFI
2427   /// located in the CIE instead of the FDE.
2428   uint32_t CFAReg;
2429   int32_t CFAOffset;
2430   int32_t CFARule;
2431   /// Mapping of rules (CFI states) that define the location of each
2432   /// register. If absent, no rule defining the location of such register
2433   /// was ever read. This number will be negative if it refers to a CFI
2434   /// located in the CIE instead of the FDE.
2435   DenseMap<int32_t, int32_t> RegRule;
2436 
2437   /// References to CIE, FDE and expanded instructions after a restore state
2438   const BinaryFunction::CFIInstrMapType &CIE;
2439   const BinaryFunction::CFIInstrMapType &FDE;
2440   const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents;
2441 
2442   /// Current FDE CFI number representing the state where the snapshot is at
2443   int32_t CurState;
2444 
2445   /// Used when we don't have information about which state/rule to apply
2446   /// to recover the location of either the CFA or a specific register
2447   constexpr static int32_t UNKNOWN = std::numeric_limits<int32_t>::min();
2448 
2449 private:
2450   /// Update our snapshot by executing a single CFI
2451   void update(const MCCFIInstruction &Instr, int32_t RuleNumber) {
2452     switch (Instr.getOperation()) {
2453     case MCCFIInstruction::OpSameValue:
2454     case MCCFIInstruction::OpRelOffset:
2455     case MCCFIInstruction::OpOffset:
2456     case MCCFIInstruction::OpRestore:
2457     case MCCFIInstruction::OpUndefined:
2458     case MCCFIInstruction::OpRegister:
2459       RegRule[Instr.getRegister()] = RuleNumber;
2460       break;
2461     case MCCFIInstruction::OpDefCfaRegister:
2462       CFAReg = Instr.getRegister();
2463       CFARule = UNKNOWN;
2464       break;
2465     case MCCFIInstruction::OpDefCfaOffset:
2466       CFAOffset = Instr.getOffset();
2467       CFARule = UNKNOWN;
2468       break;
2469     case MCCFIInstruction::OpDefCfa:
2470       CFAReg = Instr.getRegister();
2471       CFAOffset = Instr.getOffset();
2472       CFARule = UNKNOWN;
2473       break;
2474     case MCCFIInstruction::OpEscape: {
2475       Optional<uint8_t> Reg = readDWARFExpressionTargetReg(Instr.getValues());
2476       // Handle DW_CFA_def_cfa_expression
2477       if (!Reg) {
2478         CFARule = RuleNumber;
2479         break;
2480       }
2481       RegRule[*Reg] = RuleNumber;
2482       break;
2483     }
2484     case MCCFIInstruction::OpAdjustCfaOffset:
2485     case MCCFIInstruction::OpWindowSave:
2486     case MCCFIInstruction::OpNegateRAState:
2487     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2488       llvm_unreachable("unsupported CFI opcode");
2489       break;
2490     case MCCFIInstruction::OpRememberState:
2491     case MCCFIInstruction::OpRestoreState:
2492     case MCCFIInstruction::OpGnuArgsSize:
2493       // do not affect CFI state
2494       break;
2495     }
2496   }
2497 
2498 public:
2499   /// Advance state reading FDE CFI instructions up to State number
2500   void advanceTo(int32_t State) {
2501     for (int32_t I = CurState, E = State; I != E; ++I) {
2502       const MCCFIInstruction &Instr = FDE[I];
2503       if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) {
2504         update(Instr, I);
2505         continue;
2506       }
2507       // If restore state instruction, fetch the equivalent CFIs that have
2508       // the same effect of this restore. This is used to ensure remember-
2509       // restore pairs are completely removed.
2510       auto Iter = FrameRestoreEquivalents.find(I);
2511       if (Iter == FrameRestoreEquivalents.end())
2512         continue;
2513       for (int32_t RuleNumber : Iter->second)
2514         update(FDE[RuleNumber], RuleNumber);
2515     }
2516 
2517     assert(((CFAReg != (uint32_t)UNKNOWN && CFAOffset != UNKNOWN) ||
2518             CFARule != UNKNOWN) &&
2519            "CIE did not define default CFA?");
2520 
2521     CurState = State;
2522   }
2523 
2524   /// Interpret all CIE and FDE instructions up until CFI State number and
2525   /// populate this snapshot
2526   CFISnapshot(
2527       const BinaryFunction::CFIInstrMapType &CIE,
2528       const BinaryFunction::CFIInstrMapType &FDE,
2529       const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents,
2530       int32_t State)
2531       : CIE(CIE), FDE(FDE), FrameRestoreEquivalents(FrameRestoreEquivalents) {
2532     CFAReg = UNKNOWN;
2533     CFAOffset = UNKNOWN;
2534     CFARule = UNKNOWN;
2535     CurState = 0;
2536 
2537     for (int32_t I = 0, E = CIE.size(); I != E; ++I) {
2538       const MCCFIInstruction &Instr = CIE[I];
2539       update(Instr, -I);
2540     }
2541 
2542     advanceTo(State);
2543   }
2544 };
2545 
2546 /// A CFI snapshot with the capability of checking if incremental additions to
2547 /// it are redundant. This is used to ensure we do not emit two CFI instructions
2548 /// back-to-back that are doing the same state change, or to avoid emitting a
2549 /// CFI at all when the state at that point would not be modified after that CFI
2550 struct CFISnapshotDiff : public CFISnapshot {
2551   bool RestoredCFAReg{false};
2552   bool RestoredCFAOffset{false};
2553   DenseMap<int32_t, bool> RestoredRegs;
2554 
2555   CFISnapshotDiff(const CFISnapshot &S) : CFISnapshot(S) {}
2556 
2557   CFISnapshotDiff(
2558       const BinaryFunction::CFIInstrMapType &CIE,
2559       const BinaryFunction::CFIInstrMapType &FDE,
2560       const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents,
2561       int32_t State)
2562       : CFISnapshot(CIE, FDE, FrameRestoreEquivalents, State) {}
2563 
2564   /// Return true if applying Instr to this state is redundant and can be
2565   /// dismissed.
2566   bool isRedundant(const MCCFIInstruction &Instr) {
2567     switch (Instr.getOperation()) {
2568     case MCCFIInstruction::OpSameValue:
2569     case MCCFIInstruction::OpRelOffset:
2570     case MCCFIInstruction::OpOffset:
2571     case MCCFIInstruction::OpRestore:
2572     case MCCFIInstruction::OpUndefined:
2573     case MCCFIInstruction::OpRegister:
2574     case MCCFIInstruction::OpEscape: {
2575       uint32_t Reg;
2576       if (Instr.getOperation() != MCCFIInstruction::OpEscape) {
2577         Reg = Instr.getRegister();
2578       } else {
2579         Optional<uint8_t> R = readDWARFExpressionTargetReg(Instr.getValues());
2580         // Handle DW_CFA_def_cfa_expression
2581         if (!R) {
2582           if (RestoredCFAReg && RestoredCFAOffset)
2583             return true;
2584           RestoredCFAReg = true;
2585           RestoredCFAOffset = true;
2586           return false;
2587         }
2588         Reg = *R;
2589       }
2590       if (RestoredRegs[Reg])
2591         return true;
2592       RestoredRegs[Reg] = true;
2593       const int32_t CurRegRule =
2594           RegRule.find(Reg) != RegRule.end() ? RegRule[Reg] : UNKNOWN;
2595       if (CurRegRule == UNKNOWN) {
2596         if (Instr.getOperation() == MCCFIInstruction::OpRestore ||
2597             Instr.getOperation() == MCCFIInstruction::OpSameValue)
2598           return true;
2599         return false;
2600       }
2601       const MCCFIInstruction &LastDef =
2602           CurRegRule < 0 ? CIE[-CurRegRule] : FDE[CurRegRule];
2603       return LastDef == Instr;
2604     }
2605     case MCCFIInstruction::OpDefCfaRegister:
2606       if (RestoredCFAReg)
2607         return true;
2608       RestoredCFAReg = true;
2609       return CFAReg == Instr.getRegister();
2610     case MCCFIInstruction::OpDefCfaOffset:
2611       if (RestoredCFAOffset)
2612         return true;
2613       RestoredCFAOffset = true;
2614       return CFAOffset == Instr.getOffset();
2615     case MCCFIInstruction::OpDefCfa:
2616       if (RestoredCFAReg && RestoredCFAOffset)
2617         return true;
2618       RestoredCFAReg = true;
2619       RestoredCFAOffset = true;
2620       return CFAReg == Instr.getRegister() && CFAOffset == Instr.getOffset();
2621     case MCCFIInstruction::OpAdjustCfaOffset:
2622     case MCCFIInstruction::OpWindowSave:
2623     case MCCFIInstruction::OpNegateRAState:
2624     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2625       llvm_unreachable("unsupported CFI opcode");
2626       return false;
2627     case MCCFIInstruction::OpRememberState:
2628     case MCCFIInstruction::OpRestoreState:
2629     case MCCFIInstruction::OpGnuArgsSize:
2630       // do not affect CFI state
2631       return true;
2632     }
2633     return false;
2634   }
2635 };
2636 
2637 } // end anonymous namespace
2638 
2639 bool BinaryFunction::replayCFIInstrs(int32_t FromState, int32_t ToState,
2640                                      BinaryBasicBlock *InBB,
2641                                      BinaryBasicBlock::iterator InsertIt) {
2642   if (FromState == ToState)
2643     return true;
2644   assert(FromState < ToState && "can only replay CFIs forward");
2645 
2646   CFISnapshotDiff CFIDiff(CIEFrameInstructions, FrameInstructions,
2647                           FrameRestoreEquivalents, FromState);
2648 
2649   std::vector<uint32_t> NewCFIs;
2650   for (int32_t CurState = FromState; CurState < ToState; ++CurState) {
2651     MCCFIInstruction *Instr = &FrameInstructions[CurState];
2652     if (Instr->getOperation() == MCCFIInstruction::OpRestoreState) {
2653       auto Iter = FrameRestoreEquivalents.find(CurState);
2654       assert(Iter != FrameRestoreEquivalents.end());
2655       NewCFIs.insert(NewCFIs.end(), Iter->second.begin(), Iter->second.end());
2656       // RestoreState / Remember will be filtered out later by CFISnapshotDiff,
2657       // so we might as well fall-through here.
2658     }
2659     NewCFIs.push_back(CurState);
2660     continue;
2661   }
2662 
2663   // Replay instructions while avoiding duplicates
2664   for (auto I = NewCFIs.rbegin(), E = NewCFIs.rend(); I != E; ++I) {
2665     if (CFIDiff.isRedundant(FrameInstructions[*I]))
2666       continue;
2667     InsertIt = addCFIPseudo(InBB, InsertIt, *I);
2668   }
2669 
2670   return true;
2671 }
2672 
2673 SmallVector<int32_t, 4>
2674 BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState,
2675                                BinaryBasicBlock *InBB,
2676                                BinaryBasicBlock::iterator &InsertIt) {
2677   SmallVector<int32_t, 4> NewStates;
2678 
2679   CFISnapshot ToCFITable(CIEFrameInstructions, FrameInstructions,
2680                          FrameRestoreEquivalents, ToState);
2681   CFISnapshotDiff FromCFITable(ToCFITable);
2682   FromCFITable.advanceTo(FromState);
2683 
2684   auto undoStateDefCfa = [&]() {
2685     if (ToCFITable.CFARule == CFISnapshot::UNKNOWN) {
2686       FrameInstructions.emplace_back(MCCFIInstruction::cfiDefCfa(
2687           nullptr, ToCFITable.CFAReg, ToCFITable.CFAOffset));
2688       if (FromCFITable.isRedundant(FrameInstructions.back())) {
2689         FrameInstructions.pop_back();
2690         return;
2691       }
2692       NewStates.push_back(FrameInstructions.size() - 1);
2693       InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size() - 1);
2694       ++InsertIt;
2695     } else if (ToCFITable.CFARule < 0) {
2696       if (FromCFITable.isRedundant(CIEFrameInstructions[-ToCFITable.CFARule]))
2697         return;
2698       NewStates.push_back(FrameInstructions.size());
2699       InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size());
2700       ++InsertIt;
2701       FrameInstructions.emplace_back(CIEFrameInstructions[-ToCFITable.CFARule]);
2702     } else if (!FromCFITable.isRedundant(
2703                    FrameInstructions[ToCFITable.CFARule])) {
2704       NewStates.push_back(ToCFITable.CFARule);
2705       InsertIt = addCFIPseudo(InBB, InsertIt, ToCFITable.CFARule);
2706       ++InsertIt;
2707     }
2708   };
2709 
2710   auto undoState = [&](const MCCFIInstruction &Instr) {
2711     switch (Instr.getOperation()) {
2712     case MCCFIInstruction::OpRememberState:
2713     case MCCFIInstruction::OpRestoreState:
2714       break;
2715     case MCCFIInstruction::OpSameValue:
2716     case MCCFIInstruction::OpRelOffset:
2717     case MCCFIInstruction::OpOffset:
2718     case MCCFIInstruction::OpRestore:
2719     case MCCFIInstruction::OpUndefined:
2720     case MCCFIInstruction::OpEscape:
2721     case MCCFIInstruction::OpRegister: {
2722       uint32_t Reg;
2723       if (Instr.getOperation() != MCCFIInstruction::OpEscape) {
2724         Reg = Instr.getRegister();
2725       } else {
2726         Optional<uint8_t> R = readDWARFExpressionTargetReg(Instr.getValues());
2727         // Handle DW_CFA_def_cfa_expression
2728         if (!R) {
2729           undoStateDefCfa();
2730           return;
2731         }
2732         Reg = *R;
2733       }
2734 
2735       if (ToCFITable.RegRule.find(Reg) == ToCFITable.RegRule.end()) {
2736         FrameInstructions.emplace_back(
2737             MCCFIInstruction::createRestore(nullptr, Reg));
2738         if (FromCFITable.isRedundant(FrameInstructions.back())) {
2739           FrameInstructions.pop_back();
2740           break;
2741         }
2742         NewStates.push_back(FrameInstructions.size() - 1);
2743         InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size() - 1);
2744         ++InsertIt;
2745         break;
2746       }
2747       const int32_t Rule = ToCFITable.RegRule[Reg];
2748       if (Rule < 0) {
2749         if (FromCFITable.isRedundant(CIEFrameInstructions[-Rule]))
2750           break;
2751         NewStates.push_back(FrameInstructions.size());
2752         InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size());
2753         ++InsertIt;
2754         FrameInstructions.emplace_back(CIEFrameInstructions[-Rule]);
2755         break;
2756       }
2757       if (FromCFITable.isRedundant(FrameInstructions[Rule]))
2758         break;
2759       NewStates.push_back(Rule);
2760       InsertIt = addCFIPseudo(InBB, InsertIt, Rule);
2761       ++InsertIt;
2762       break;
2763     }
2764     case MCCFIInstruction::OpDefCfaRegister:
2765     case MCCFIInstruction::OpDefCfaOffset:
2766     case MCCFIInstruction::OpDefCfa:
2767       undoStateDefCfa();
2768       break;
2769     case MCCFIInstruction::OpAdjustCfaOffset:
2770     case MCCFIInstruction::OpWindowSave:
2771     case MCCFIInstruction::OpNegateRAState:
2772     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2773       llvm_unreachable("unsupported CFI opcode");
2774       break;
2775     case MCCFIInstruction::OpGnuArgsSize:
2776       // do not affect CFI state
2777       break;
2778     }
2779   };
2780 
2781   // Undo all modifications from ToState to FromState
2782   for (int32_t I = ToState, E = FromState; I != E; ++I) {
2783     const MCCFIInstruction &Instr = FrameInstructions[I];
2784     if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) {
2785       undoState(Instr);
2786       continue;
2787     }
2788     auto Iter = FrameRestoreEquivalents.find(I);
2789     if (Iter == FrameRestoreEquivalents.end())
2790       continue;
2791     for (int32_t State : Iter->second)
2792       undoState(FrameInstructions[State]);
2793   }
2794 
2795   return NewStates;
2796 }
2797 
2798 void BinaryFunction::normalizeCFIState() {
2799   // Reordering blocks with remember-restore state instructions can be specially
2800   // tricky. When rewriting the CFI, we omit remember-restore state instructions
2801   // entirely. For restore state, we build a map expanding each restore to the
2802   // equivalent unwindCFIState sequence required at that point to achieve the
2803   // same effect of the restore. All remember state are then just ignored.
2804   std::stack<int32_t> Stack;
2805   for (BinaryBasicBlock *CurBB : BasicBlocksLayout) {
2806     for (auto II = CurBB->begin(); II != CurBB->end(); ++II) {
2807       if (const MCCFIInstruction *CFI = getCFIFor(*II)) {
2808         if (CFI->getOperation() == MCCFIInstruction::OpRememberState) {
2809           Stack.push(II->getOperand(0).getImm());
2810           continue;
2811         }
2812         if (CFI->getOperation() == MCCFIInstruction::OpRestoreState) {
2813           const int32_t RememberState = Stack.top();
2814           const int32_t CurState = II->getOperand(0).getImm();
2815           FrameRestoreEquivalents[CurState] =
2816               unwindCFIState(CurState, RememberState, CurBB, II);
2817           Stack.pop();
2818         }
2819       }
2820     }
2821   }
2822 }
2823 
2824 bool BinaryFunction::finalizeCFIState() {
2825   LLVM_DEBUG(
2826       dbgs() << "Trying to fix CFI states for each BB after reordering.\n");
2827   LLVM_DEBUG(dbgs() << "This is the list of CFI states for each BB of " << *this
2828                     << ": ");
2829 
2830   int32_t State = 0;
2831   bool SeenCold = false;
2832   const char *Sep = "";
2833   (void)Sep;
2834   for (BinaryBasicBlock *BB : BasicBlocksLayout) {
2835     const int32_t CFIStateAtExit = BB->getCFIStateAtExit();
2836 
2837     // Hot-cold border: check if this is the first BB to be allocated in a cold
2838     // region (with a different FDE). If yes, we need to reset the CFI state.
2839     if (!SeenCold && BB->isCold()) {
2840       State = 0;
2841       SeenCold = true;
2842     }
2843 
2844     // We need to recover the correct state if it doesn't match expected
2845     // state at BB entry point.
2846     if (BB->getCFIState() < State) {
2847       // In this case, State is currently higher than what this BB expect it
2848       // to be. To solve this, we need to insert CFI instructions to undo
2849       // the effect of all CFI from BB's state to current State.
2850       auto InsertIt = BB->begin();
2851       unwindCFIState(State, BB->getCFIState(), BB, InsertIt);
2852     } else if (BB->getCFIState() > State) {
2853       // If BB's CFI state is greater than State, it means we are behind in the
2854       // state. Just emit all instructions to reach this state at the
2855       // beginning of this BB. If this sequence of instructions involve
2856       // remember state or restore state, bail out.
2857       if (!replayCFIInstrs(State, BB->getCFIState(), BB, BB->begin()))
2858         return false;
2859     }
2860 
2861     State = CFIStateAtExit;
2862     LLVM_DEBUG(dbgs() << Sep << State; Sep = ", ");
2863   }
2864   LLVM_DEBUG(dbgs() << "\n");
2865 
2866   for (BinaryBasicBlock *BB : BasicBlocksLayout) {
2867     for (auto II = BB->begin(); II != BB->end();) {
2868       const MCCFIInstruction *CFI = getCFIFor(*II);
2869       if (CFI && (CFI->getOperation() == MCCFIInstruction::OpRememberState ||
2870                   CFI->getOperation() == MCCFIInstruction::OpRestoreState)) {
2871         II = BB->eraseInstruction(II);
2872       } else {
2873         ++II;
2874       }
2875     }
2876   }
2877 
2878   return true;
2879 }
2880 
2881 bool BinaryFunction::requiresAddressTranslation() const {
2882   return opts::EnableBAT || hasSDTMarker() || hasPseudoProbe();
2883 }
2884 
2885 uint64_t BinaryFunction::getInstructionCount() const {
2886   uint64_t Count = 0;
2887   for (BinaryBasicBlock *const &Block : BasicBlocksLayout)
2888     Count += Block->getNumNonPseudos();
2889   return Count;
2890 }
2891 
2892 bool BinaryFunction::hasLayoutChanged() const { return ModifiedLayout; }
2893 
2894 uint64_t BinaryFunction::getEditDistance() const {
2895   return ComputeEditDistance<BinaryBasicBlock *>(BasicBlocksPreviousLayout,
2896                                                  BasicBlocksLayout);
2897 }
2898 
2899 void BinaryFunction::clearDisasmState() {
2900   clearList(Instructions);
2901   clearList(IgnoredBranches);
2902   clearList(TakenBranches);
2903   clearList(InterproceduralReferences);
2904 
2905   if (BC.HasRelocations) {
2906     for (std::pair<const uint32_t, MCSymbol *> &LI : Labels)
2907       BC.UndefinedSymbols.insert(LI.second);
2908     if (FunctionEndLabel)
2909       BC.UndefinedSymbols.insert(FunctionEndLabel);
2910   }
2911 }
2912 
2913 void BinaryFunction::setTrapOnEntry() {
2914   clearDisasmState();
2915 
2916   auto addTrapAtOffset = [&](uint64_t Offset) {
2917     MCInst TrapInstr;
2918     BC.MIB->createTrap(TrapInstr);
2919     addInstruction(Offset, std::move(TrapInstr));
2920   };
2921 
2922   addTrapAtOffset(0);
2923   for (const std::pair<const uint32_t, MCSymbol *> &KV : getLabels())
2924     if (getSecondaryEntryPointSymbol(KV.second))
2925       addTrapAtOffset(KV.first);
2926 
2927   TrapsOnEntry = true;
2928 }
2929 
2930 void BinaryFunction::setIgnored() {
2931   if (opts::processAllFunctions()) {
2932     // We can accept ignored functions before they've been disassembled.
2933     // In that case, they would still get disassembled and emited, but not
2934     // optimized.
2935     assert(CurrentState == State::Empty &&
2936            "cannot ignore non-empty functions in current mode");
2937     IsIgnored = true;
2938     return;
2939   }
2940 
2941   clearDisasmState();
2942 
2943   // Clear CFG state too.
2944   if (hasCFG()) {
2945     releaseCFG();
2946 
2947     for (BinaryBasicBlock *BB : BasicBlocks)
2948       delete BB;
2949     clearList(BasicBlocks);
2950 
2951     for (BinaryBasicBlock *BB : DeletedBasicBlocks)
2952       delete BB;
2953     clearList(DeletedBasicBlocks);
2954 
2955     clearList(BasicBlocksLayout);
2956     clearList(BasicBlocksPreviousLayout);
2957   }
2958 
2959   CurrentState = State::Empty;
2960 
2961   IsIgnored = true;
2962   IsSimple = false;
2963   LLVM_DEBUG(dbgs() << "Ignoring " << getPrintName() << '\n');
2964 }
2965 
2966 void BinaryFunction::duplicateConstantIslands() {
2967   assert(Islands && "function expected to have constant islands");
2968 
2969   for (BinaryBasicBlock *BB : layout()) {
2970     if (!BB->isCold())
2971       continue;
2972 
2973     for (MCInst &Inst : *BB) {
2974       int OpNum = 0;
2975       for (MCOperand &Operand : Inst) {
2976         if (!Operand.isExpr()) {
2977           ++OpNum;
2978           continue;
2979         }
2980         const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Inst, OpNum);
2981         // Check if this is an island symbol
2982         if (!Islands->Symbols.count(Symbol) &&
2983             !Islands->ProxySymbols.count(Symbol))
2984           continue;
2985 
2986         // Create cold symbol, if missing
2987         auto ISym = Islands->ColdSymbols.find(Symbol);
2988         MCSymbol *ColdSymbol;
2989         if (ISym != Islands->ColdSymbols.end()) {
2990           ColdSymbol = ISym->second;
2991         } else {
2992           ColdSymbol = BC.Ctx->getOrCreateSymbol(Symbol->getName() + ".cold");
2993           Islands->ColdSymbols[Symbol] = ColdSymbol;
2994           // Check if this is a proxy island symbol and update owner proxy map
2995           if (Islands->ProxySymbols.count(Symbol)) {
2996             BinaryFunction *Owner = Islands->ProxySymbols[Symbol];
2997             auto IProxiedSym = Owner->Islands->Proxies[this].find(Symbol);
2998             Owner->Islands->ColdProxies[this][IProxiedSym->second] = ColdSymbol;
2999           }
3000         }
3001 
3002         // Update instruction reference
3003         Operand = MCOperand::createExpr(BC.MIB->getTargetExprFor(
3004             Inst,
3005             MCSymbolRefExpr::create(ColdSymbol, MCSymbolRefExpr::VK_None,
3006                                     *BC.Ctx),
3007             *BC.Ctx, 0));
3008         ++OpNum;
3009       }
3010     }
3011   }
3012 }
3013 
3014 namespace {
3015 
3016 #ifndef MAX_PATH
3017 #define MAX_PATH 255
3018 #endif
3019 
3020 std::string constructFilename(std::string Filename, std::string Annotation,
3021                               std::string Suffix) {
3022   std::replace(Filename.begin(), Filename.end(), '/', '-');
3023   if (!Annotation.empty())
3024     Annotation.insert(0, "-");
3025   if (Filename.size() + Annotation.size() + Suffix.size() > MAX_PATH) {
3026     assert(Suffix.size() + Annotation.size() <= MAX_PATH);
3027     if (opts::Verbosity >= 1) {
3028       errs() << "BOLT-WARNING: Filename \"" << Filename << Annotation << Suffix
3029              << "\" exceeds the " << MAX_PATH << " size limit, truncating.\n";
3030     }
3031     Filename.resize(MAX_PATH - (Suffix.size() + Annotation.size()));
3032   }
3033   Filename += Annotation;
3034   Filename += Suffix;
3035   return Filename;
3036 }
3037 
3038 std::string formatEscapes(const std::string &Str) {
3039   std::string Result;
3040   for (unsigned I = 0; I < Str.size(); ++I) {
3041     char C = Str[I];
3042     switch (C) {
3043     case '\n':
3044       Result += "&#13;";
3045       break;
3046     case '"':
3047       break;
3048     default:
3049       Result += C;
3050       break;
3051     }
3052   }
3053   return Result;
3054 }
3055 
3056 } // namespace
3057 
3058 void BinaryFunction::dumpGraph(raw_ostream &OS) const {
3059   OS << "strict digraph \"" << getPrintName() << "\" {\n";
3060   uint64_t Offset = Address;
3061   for (BinaryBasicBlock *BB : BasicBlocks) {
3062     auto LayoutPos =
3063         std::find(BasicBlocksLayout.begin(), BasicBlocksLayout.end(), BB);
3064     unsigned Layout = LayoutPos - BasicBlocksLayout.begin();
3065     const char *ColdStr = BB->isCold() ? " (cold)" : "";
3066     OS << format("\"%s\" [label=\"%s%s\\n(C:%lu,O:%lu,I:%u,L:%u:CFI:%u)\"]\n",
3067                  BB->getName().data(), BB->getName().data(), ColdStr,
3068                  (BB->ExecutionCount != BinaryBasicBlock::COUNT_NO_PROFILE
3069                       ? BB->ExecutionCount
3070                       : 0),
3071                  BB->getOffset(), getIndex(BB), Layout, BB->getCFIState());
3072     OS << format("\"%s\" [shape=box]\n", BB->getName().data());
3073     if (opts::DotToolTipCode) {
3074       std::string Str;
3075       raw_string_ostream CS(Str);
3076       Offset = BC.printInstructions(CS, BB->begin(), BB->end(), Offset, this);
3077       const std::string Code = formatEscapes(CS.str());
3078       OS << format("\"%s\" [tooltip=\"%s\"]\n", BB->getName().data(),
3079                    Code.c_str());
3080     }
3081 
3082     // analyzeBranch is just used to get the names of the branch
3083     // opcodes.
3084     const MCSymbol *TBB = nullptr;
3085     const MCSymbol *FBB = nullptr;
3086     MCInst *CondBranch = nullptr;
3087     MCInst *UncondBranch = nullptr;
3088     const bool Success = BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch);
3089 
3090     const MCInst *LastInstr = BB->getLastNonPseudoInstr();
3091     const bool IsJumpTable = LastInstr && BC.MIB->getJumpTable(*LastInstr);
3092 
3093     auto BI = BB->branch_info_begin();
3094     for (BinaryBasicBlock *Succ : BB->successors()) {
3095       std::string Branch;
3096       if (Success) {
3097         if (Succ == BB->getConditionalSuccessor(true)) {
3098           Branch = CondBranch ? std::string(BC.InstPrinter->getOpcodeName(
3099                                     CondBranch->getOpcode()))
3100                               : "TB";
3101         } else if (Succ == BB->getConditionalSuccessor(false)) {
3102           Branch = UncondBranch ? std::string(BC.InstPrinter->getOpcodeName(
3103                                       UncondBranch->getOpcode()))
3104                                 : "FB";
3105         } else {
3106           Branch = "FT";
3107         }
3108       }
3109       if (IsJumpTable)
3110         Branch = "JT";
3111       OS << format("\"%s\" -> \"%s\" [label=\"%s", BB->getName().data(),
3112                    Succ->getName().data(), Branch.c_str());
3113 
3114       if (BB->getExecutionCount() != COUNT_NO_PROFILE &&
3115           BI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
3116         OS << "\\n(C:" << BI->Count << ",M:" << BI->MispredictedCount << ")";
3117       } else if (ExecutionCount != COUNT_NO_PROFILE &&
3118                  BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
3119         OS << "\\n(IC:" << BI->Count << ")";
3120       }
3121       OS << "\"]\n";
3122 
3123       ++BI;
3124     }
3125     for (BinaryBasicBlock *LP : BB->landing_pads()) {
3126       OS << format("\"%s\" -> \"%s\" [constraint=false style=dashed]\n",
3127                    BB->getName().data(), LP->getName().data());
3128     }
3129   }
3130   OS << "}\n";
3131 }
3132 
3133 void BinaryFunction::viewGraph() const {
3134   SmallString<MAX_PATH> Filename;
3135   if (std::error_code EC =
3136           sys::fs::createTemporaryFile("bolt-cfg", "dot", Filename)) {
3137     errs() << "BOLT-ERROR: " << EC.message() << ", unable to create "
3138            << " bolt-cfg-XXXXX.dot temporary file.\n";
3139     return;
3140   }
3141   dumpGraphToFile(std::string(Filename));
3142   if (DisplayGraph(Filename))
3143     errs() << "BOLT-ERROR: Can't display " << Filename << " with graphviz.\n";
3144   if (std::error_code EC = sys::fs::remove(Filename)) {
3145     errs() << "BOLT-WARNING: " << EC.message() << ", failed to remove "
3146            << Filename << "\n";
3147   }
3148 }
3149 
3150 void BinaryFunction::dumpGraphForPass(std::string Annotation) const {
3151   std::string Filename = constructFilename(getPrintName(), Annotation, ".dot");
3152   outs() << "BOLT-DEBUG: Dumping CFG to " << Filename << "\n";
3153   dumpGraphToFile(Filename);
3154 }
3155 
3156 void BinaryFunction::dumpGraphToFile(std::string Filename) const {
3157   std::error_code EC;
3158   raw_fd_ostream of(Filename, EC, sys::fs::OF_None);
3159   if (EC) {
3160     if (opts::Verbosity >= 1) {
3161       errs() << "BOLT-WARNING: " << EC.message() << ", unable to open "
3162              << Filename << " for output.\n";
3163     }
3164     return;
3165   }
3166   dumpGraph(of);
3167 }
3168 
3169 bool BinaryFunction::validateCFG() const {
3170   bool Valid = true;
3171   for (BinaryBasicBlock *BB : BasicBlocks)
3172     Valid &= BB->validateSuccessorInvariants();
3173 
3174   if (!Valid)
3175     return Valid;
3176 
3177   // Make sure all blocks in CFG are valid.
3178   auto validateBlock = [this](const BinaryBasicBlock *BB, StringRef Desc) {
3179     if (!BB->isValid()) {
3180       errs() << "BOLT-ERROR: deleted " << Desc << " " << BB->getName()
3181              << " detected in:\n";
3182       this->dump();
3183       return false;
3184     }
3185     return true;
3186   };
3187   for (const BinaryBasicBlock *BB : BasicBlocks) {
3188     if (!validateBlock(BB, "block"))
3189       return false;
3190     for (const BinaryBasicBlock *PredBB : BB->predecessors())
3191       if (!validateBlock(PredBB, "predecessor"))
3192         return false;
3193     for (const BinaryBasicBlock *SuccBB : BB->successors())
3194       if (!validateBlock(SuccBB, "successor"))
3195         return false;
3196     for (const BinaryBasicBlock *LP : BB->landing_pads())
3197       if (!validateBlock(LP, "landing pad"))
3198         return false;
3199     for (const BinaryBasicBlock *Thrower : BB->throwers())
3200       if (!validateBlock(Thrower, "thrower"))
3201         return false;
3202   }
3203 
3204   for (const BinaryBasicBlock *BB : BasicBlocks) {
3205     std::unordered_set<const BinaryBasicBlock *> BBLandingPads;
3206     for (const BinaryBasicBlock *LP : BB->landing_pads()) {
3207       if (BBLandingPads.count(LP)) {
3208         errs() << "BOLT-ERROR: duplicate landing pad detected in"
3209                << BB->getName() << " in function " << *this << '\n';
3210         return false;
3211       }
3212       BBLandingPads.insert(LP);
3213     }
3214 
3215     std::unordered_set<const BinaryBasicBlock *> BBThrowers;
3216     for (const BinaryBasicBlock *Thrower : BB->throwers()) {
3217       if (BBThrowers.count(Thrower)) {
3218         errs() << "BOLT-ERROR: duplicate thrower detected in" << BB->getName()
3219                << " in function " << *this << '\n';
3220         return false;
3221       }
3222       BBThrowers.insert(Thrower);
3223     }
3224 
3225     for (const BinaryBasicBlock *LPBlock : BB->landing_pads()) {
3226       if (std::find(LPBlock->throw_begin(), LPBlock->throw_end(), BB) ==
3227           LPBlock->throw_end()) {
3228         errs() << "BOLT-ERROR: inconsistent landing pad detected in " << *this
3229                << ": " << BB->getName() << " is in LandingPads but not in "
3230                << LPBlock->getName() << " Throwers\n";
3231         return false;
3232       }
3233     }
3234     for (const BinaryBasicBlock *Thrower : BB->throwers()) {
3235       if (std::find(Thrower->lp_begin(), Thrower->lp_end(), BB) ==
3236           Thrower->lp_end()) {
3237         errs() << "BOLT-ERROR: inconsistent thrower detected in " << *this
3238                << ": " << BB->getName() << " is in Throwers list but not in "
3239                << Thrower->getName() << " LandingPads\n";
3240         return false;
3241       }
3242     }
3243   }
3244 
3245   return Valid;
3246 }
3247 
3248 void BinaryFunction::fixBranches() {
3249   auto &MIB = BC.MIB;
3250   MCContext *Ctx = BC.Ctx.get();
3251 
3252   for (unsigned I = 0, E = BasicBlocksLayout.size(); I != E; ++I) {
3253     BinaryBasicBlock *BB = BasicBlocksLayout[I];
3254     const MCSymbol *TBB = nullptr;
3255     const MCSymbol *FBB = nullptr;
3256     MCInst *CondBranch = nullptr;
3257     MCInst *UncondBranch = nullptr;
3258     if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch))
3259       continue;
3260 
3261     // We will create unconditional branch with correct destination if needed.
3262     if (UncondBranch)
3263       BB->eraseInstruction(BB->findInstruction(UncondBranch));
3264 
3265     // Basic block that follows the current one in the final layout.
3266     const BinaryBasicBlock *NextBB = nullptr;
3267     if (I + 1 != E && BB->isCold() == BasicBlocksLayout[I + 1]->isCold())
3268       NextBB = BasicBlocksLayout[I + 1];
3269 
3270     if (BB->succ_size() == 1) {
3271       // __builtin_unreachable() could create a conditional branch that
3272       // falls-through into the next function - hence the block will have only
3273       // one valid successor. Since behaviour is undefined - we replace
3274       // the conditional branch with an unconditional if required.
3275       if (CondBranch)
3276         BB->eraseInstruction(BB->findInstruction(CondBranch));
3277       if (BB->getSuccessor() == NextBB)
3278         continue;
3279       BB->addBranchInstruction(BB->getSuccessor());
3280     } else if (BB->succ_size() == 2) {
3281       assert(CondBranch && "conditional branch expected");
3282       const BinaryBasicBlock *TSuccessor = BB->getConditionalSuccessor(true);
3283       const BinaryBasicBlock *FSuccessor = BB->getConditionalSuccessor(false);
3284       // Check whether we support reversing this branch direction
3285       const bool IsSupported =
3286           !MIB->isUnsupportedBranch(CondBranch->getOpcode());
3287       if (NextBB && NextBB == TSuccessor && IsSupported) {
3288         std::swap(TSuccessor, FSuccessor);
3289         {
3290           auto L = BC.scopeLock();
3291           MIB->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(), Ctx);
3292         }
3293         BB->swapConditionalSuccessors();
3294       } else {
3295         auto L = BC.scopeLock();
3296         MIB->replaceBranchTarget(*CondBranch, TSuccessor->getLabel(), Ctx);
3297       }
3298       if (TSuccessor == FSuccessor)
3299         BB->removeDuplicateConditionalSuccessor(CondBranch);
3300       if (!NextBB ||
3301           ((NextBB != TSuccessor || !IsSupported) && NextBB != FSuccessor)) {
3302         // If one of the branches is guaranteed to be "long" while the other
3303         // could be "short", then prioritize short for "taken". This will
3304         // generate a sequence 1 byte shorter on x86.
3305         if (IsSupported && BC.isX86() &&
3306             TSuccessor->isCold() != FSuccessor->isCold() &&
3307             BB->isCold() != TSuccessor->isCold()) {
3308           std::swap(TSuccessor, FSuccessor);
3309           {
3310             auto L = BC.scopeLock();
3311             MIB->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(),
3312                                         Ctx);
3313           }
3314           BB->swapConditionalSuccessors();
3315         }
3316         BB->addBranchInstruction(FSuccessor);
3317       }
3318     }
3319     // Cases where the number of successors is 0 (block ends with a
3320     // terminator) or more than 2 (switch table) don't require branch
3321     // instruction adjustments.
3322   }
3323   assert((!isSimple() || validateCFG()) &&
3324          "Invalid CFG detected after fixing branches");
3325 }
3326 
3327 void BinaryFunction::propagateGnuArgsSizeInfo(
3328     MCPlusBuilder::AllocatorIdTy AllocId) {
3329   assert(CurrentState == State::Disassembled && "unexpected function state");
3330 
3331   if (!hasEHRanges() || !usesGnuArgsSize())
3332     return;
3333 
3334   // The current value of DW_CFA_GNU_args_size affects all following
3335   // invoke instructions until the next CFI overrides it.
3336   // It is important to iterate basic blocks in the original order when
3337   // assigning the value.
3338   uint64_t CurrentGnuArgsSize = 0;
3339   for (BinaryBasicBlock *BB : BasicBlocks) {
3340     for (auto II = BB->begin(); II != BB->end();) {
3341       MCInst &Instr = *II;
3342       if (BC.MIB->isCFI(Instr)) {
3343         const MCCFIInstruction *CFI = getCFIFor(Instr);
3344         if (CFI->getOperation() == MCCFIInstruction::OpGnuArgsSize) {
3345           CurrentGnuArgsSize = CFI->getOffset();
3346           // Delete DW_CFA_GNU_args_size instructions and only regenerate
3347           // during the final code emission. The information is embedded
3348           // inside call instructions.
3349           II = BB->erasePseudoInstruction(II);
3350           continue;
3351         }
3352       } else if (BC.MIB->isInvoke(Instr)) {
3353         // Add the value of GNU_args_size as an extra operand to invokes.
3354         BC.MIB->addGnuArgsSize(Instr, CurrentGnuArgsSize, AllocId);
3355       }
3356       ++II;
3357     }
3358   }
3359 }
3360 
3361 void BinaryFunction::postProcessBranches() {
3362   if (!isSimple())
3363     return;
3364   for (BinaryBasicBlock *BB : BasicBlocksLayout) {
3365     auto LastInstrRI = BB->getLastNonPseudo();
3366     if (BB->succ_size() == 1) {
3367       if (LastInstrRI != BB->rend() &&
3368           BC.MIB->isConditionalBranch(*LastInstrRI)) {
3369         // __builtin_unreachable() could create a conditional branch that
3370         // falls-through into the next function - hence the block will have only
3371         // one valid successor. Such behaviour is undefined and thus we remove
3372         // the conditional branch while leaving a valid successor.
3373         BB->eraseInstruction(std::prev(LastInstrRI.base()));
3374         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: erasing conditional branch in "
3375                           << BB->getName() << " in function " << *this << '\n');
3376       }
3377     } else if (BB->succ_size() == 0) {
3378       // Ignore unreachable basic blocks.
3379       if (BB->pred_size() == 0 || BB->isLandingPad())
3380         continue;
3381 
3382       // If it's the basic block that does not end up with a terminator - we
3383       // insert a return instruction unless it's a call instruction.
3384       if (LastInstrRI == BB->rend()) {
3385         LLVM_DEBUG(
3386             dbgs() << "BOLT-DEBUG: at least one instruction expected in BB "
3387                    << BB->getName() << " in function " << *this << '\n');
3388         continue;
3389       }
3390       if (!BC.MIB->isTerminator(*LastInstrRI) &&
3391           !BC.MIB->isCall(*LastInstrRI)) {
3392         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding return to basic block "
3393                           << BB->getName() << " in function " << *this << '\n');
3394         MCInst ReturnInstr;
3395         BC.MIB->createReturn(ReturnInstr);
3396         BB->addInstruction(ReturnInstr);
3397       }
3398     }
3399   }
3400   assert(validateCFG() && "invalid CFG");
3401 }
3402 
3403 MCSymbol *BinaryFunction::addEntryPointAtOffset(uint64_t Offset) {
3404   assert(Offset && "cannot add primary entry point");
3405   assert(CurrentState == State::Empty || CurrentState == State::Disassembled);
3406 
3407   const uint64_t EntryPointAddress = getAddress() + Offset;
3408   MCSymbol *LocalSymbol = getOrCreateLocalLabel(EntryPointAddress);
3409 
3410   MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(LocalSymbol);
3411   if (EntrySymbol)
3412     return EntrySymbol;
3413 
3414   if (BinaryData *EntryBD = BC.getBinaryDataAtAddress(EntryPointAddress)) {
3415     EntrySymbol = EntryBD->getSymbol();
3416   } else {
3417     EntrySymbol = BC.getOrCreateGlobalSymbol(
3418         EntryPointAddress, Twine("__ENTRY_") + getOneName() + "@");
3419   }
3420   SecondaryEntryPoints[LocalSymbol] = EntrySymbol;
3421 
3422   BC.setSymbolToFunctionMap(EntrySymbol, this);
3423 
3424   return EntrySymbol;
3425 }
3426 
3427 MCSymbol *BinaryFunction::addEntryPoint(const BinaryBasicBlock &BB) {
3428   assert(CurrentState == State::CFG &&
3429          "basic block can be added as an entry only in a function with CFG");
3430 
3431   if (&BB == BasicBlocks.front())
3432     return getSymbol();
3433 
3434   MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(BB);
3435   if (EntrySymbol)
3436     return EntrySymbol;
3437 
3438   EntrySymbol =
3439       BC.Ctx->getOrCreateSymbol("__ENTRY_" + BB.getLabel()->getName());
3440 
3441   SecondaryEntryPoints[BB.getLabel()] = EntrySymbol;
3442 
3443   BC.setSymbolToFunctionMap(EntrySymbol, this);
3444 
3445   return EntrySymbol;
3446 }
3447 
3448 MCSymbol *BinaryFunction::getSymbolForEntryID(uint64_t EntryID) {
3449   if (EntryID == 0)
3450     return getSymbol();
3451 
3452   if (!isMultiEntry())
3453     return nullptr;
3454 
3455   uint64_t NumEntries = 0;
3456   if (hasCFG()) {
3457     for (BinaryBasicBlock *BB : BasicBlocks) {
3458       MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
3459       if (!EntrySymbol)
3460         continue;
3461       if (NumEntries == EntryID)
3462         return EntrySymbol;
3463       ++NumEntries;
3464     }
3465   } else {
3466     for (std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3467       MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3468       if (!EntrySymbol)
3469         continue;
3470       if (NumEntries == EntryID)
3471         return EntrySymbol;
3472       ++NumEntries;
3473     }
3474   }
3475 
3476   return nullptr;
3477 }
3478 
3479 uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
3480   if (!isMultiEntry())
3481     return 0;
3482 
3483   for (const MCSymbol *FunctionSymbol : getSymbols())
3484     if (FunctionSymbol == Symbol)
3485       return 0;
3486 
3487   // Check all secondary entries available as either basic blocks or lables.
3488   uint64_t NumEntries = 0;
3489   for (const BinaryBasicBlock *BB : BasicBlocks) {
3490     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
3491     if (!EntrySymbol)
3492       continue;
3493     if (EntrySymbol == Symbol)
3494       return NumEntries;
3495     ++NumEntries;
3496   }
3497   NumEntries = 0;
3498   for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3499     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3500     if (!EntrySymbol)
3501       continue;
3502     if (EntrySymbol == Symbol)
3503       return NumEntries;
3504     ++NumEntries;
3505   }
3506 
3507   llvm_unreachable("symbol not found");
3508 }
3509 
3510 bool BinaryFunction::forEachEntryPoint(EntryPointCallbackTy Callback) const {
3511   bool Status = Callback(0, getSymbol());
3512   if (!isMultiEntry())
3513     return Status;
3514 
3515   for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3516     if (!Status)
3517       break;
3518 
3519     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3520     if (!EntrySymbol)
3521       continue;
3522 
3523     Status = Callback(KV.first, EntrySymbol);
3524   }
3525 
3526   return Status;
3527 }
3528 
3529 BinaryFunction::BasicBlockOrderType BinaryFunction::dfs() const {
3530   BasicBlockOrderType DFS;
3531   unsigned Index = 0;
3532   std::stack<BinaryBasicBlock *> Stack;
3533 
3534   // Push entry points to the stack in reverse order.
3535   //
3536   // NB: we rely on the original order of entries to match.
3537   for (auto BBI = layout_rbegin(); BBI != layout_rend(); ++BBI) {
3538     BinaryBasicBlock *BB = *BBI;
3539     if (isEntryPoint(*BB))
3540       Stack.push(BB);
3541     BB->setLayoutIndex(BinaryBasicBlock::InvalidIndex);
3542   }
3543 
3544   while (!Stack.empty()) {
3545     BinaryBasicBlock *BB = Stack.top();
3546     Stack.pop();
3547 
3548     if (BB->getLayoutIndex() != BinaryBasicBlock::InvalidIndex)
3549       continue;
3550 
3551     BB->setLayoutIndex(Index++);
3552     DFS.push_back(BB);
3553 
3554     for (BinaryBasicBlock *SuccBB : BB->landing_pads()) {
3555       Stack.push(SuccBB);
3556     }
3557 
3558     const MCSymbol *TBB = nullptr;
3559     const MCSymbol *FBB = nullptr;
3560     MCInst *CondBranch = nullptr;
3561     MCInst *UncondBranch = nullptr;
3562     if (BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch) && CondBranch &&
3563         BB->succ_size() == 2) {
3564       if (BC.MIB->getCanonicalBranchCondCode(BC.MIB->getCondCode(
3565               *CondBranch)) == BC.MIB->getCondCode(*CondBranch)) {
3566         Stack.push(BB->getConditionalSuccessor(true));
3567         Stack.push(BB->getConditionalSuccessor(false));
3568       } else {
3569         Stack.push(BB->getConditionalSuccessor(false));
3570         Stack.push(BB->getConditionalSuccessor(true));
3571       }
3572     } else {
3573       for (BinaryBasicBlock *SuccBB : BB->successors()) {
3574         Stack.push(SuccBB);
3575       }
3576     }
3577   }
3578 
3579   return DFS;
3580 }
3581 
3582 size_t BinaryFunction::computeHash(bool UseDFS,
3583                                    OperandHashFuncTy OperandHashFunc) const {
3584   if (size() == 0)
3585     return 0;
3586 
3587   assert(hasCFG() && "function is expected to have CFG");
3588 
3589   const BasicBlockOrderType &Order = UseDFS ? dfs() : BasicBlocksLayout;
3590 
3591   // The hash is computed by creating a string of all instruction opcodes and
3592   // possibly their operands and then hashing that string with std::hash.
3593   std::string HashString;
3594   for (const BinaryBasicBlock *BB : Order) {
3595     for (const MCInst &Inst : *BB) {
3596       unsigned Opcode = Inst.getOpcode();
3597 
3598       if (BC.MIB->isPseudo(Inst))
3599         continue;
3600 
3601       // Ignore unconditional jumps since we check CFG consistency by processing
3602       // basic blocks in order and do not rely on branches to be in-sync with
3603       // CFG. Note that we still use condition code of conditional jumps.
3604       if (BC.MIB->isUnconditionalBranch(Inst))
3605         continue;
3606 
3607       if (Opcode == 0)
3608         HashString.push_back(0);
3609 
3610       while (Opcode) {
3611         uint8_t LSB = Opcode & 0xff;
3612         HashString.push_back(LSB);
3613         Opcode = Opcode >> 8;
3614       }
3615 
3616       for (const MCOperand &Op : MCPlus::primeOperands(Inst))
3617         HashString.append(OperandHashFunc(Op));
3618     }
3619   }
3620 
3621   return Hash = std::hash<std::string>{}(HashString);
3622 }
3623 
3624 void BinaryFunction::insertBasicBlocks(
3625     BinaryBasicBlock *Start,
3626     std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
3627     const bool UpdateLayout, const bool UpdateCFIState,
3628     const bool RecomputeLandingPads) {
3629   const int64_t StartIndex = Start ? getIndex(Start) : -1LL;
3630   const size_t NumNewBlocks = NewBBs.size();
3631 
3632   BasicBlocks.insert(BasicBlocks.begin() + (StartIndex + 1), NumNewBlocks,
3633                      nullptr);
3634 
3635   int64_t I = StartIndex + 1;
3636   for (std::unique_ptr<BinaryBasicBlock> &BB : NewBBs) {
3637     assert(!BasicBlocks[I]);
3638     BasicBlocks[I++] = BB.release();
3639   }
3640 
3641   if (RecomputeLandingPads)
3642     recomputeLandingPads();
3643   else
3644     updateBBIndices(0);
3645 
3646   if (UpdateLayout)
3647     updateLayout(Start, NumNewBlocks);
3648 
3649   if (UpdateCFIState)
3650     updateCFIState(Start, NumNewBlocks);
3651 }
3652 
3653 BinaryFunction::iterator BinaryFunction::insertBasicBlocks(
3654     BinaryFunction::iterator StartBB,
3655     std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
3656     const bool UpdateLayout, const bool UpdateCFIState,
3657     const bool RecomputeLandingPads) {
3658   const unsigned StartIndex = getIndex(&*StartBB);
3659   const size_t NumNewBlocks = NewBBs.size();
3660 
3661   BasicBlocks.insert(BasicBlocks.begin() + StartIndex + 1, NumNewBlocks,
3662                      nullptr);
3663   auto RetIter = BasicBlocks.begin() + StartIndex + 1;
3664 
3665   unsigned I = StartIndex + 1;
3666   for (std::unique_ptr<BinaryBasicBlock> &BB : NewBBs) {
3667     assert(!BasicBlocks[I]);
3668     BasicBlocks[I++] = BB.release();
3669   }
3670 
3671   if (RecomputeLandingPads)
3672     recomputeLandingPads();
3673   else
3674     updateBBIndices(0);
3675 
3676   if (UpdateLayout)
3677     updateLayout(*std::prev(RetIter), NumNewBlocks);
3678 
3679   if (UpdateCFIState)
3680     updateCFIState(*std::prev(RetIter), NumNewBlocks);
3681 
3682   return RetIter;
3683 }
3684 
3685 void BinaryFunction::updateBBIndices(const unsigned StartIndex) {
3686   for (unsigned I = StartIndex; I < BasicBlocks.size(); ++I)
3687     BasicBlocks[I]->Index = I;
3688 }
3689 
3690 void BinaryFunction::updateCFIState(BinaryBasicBlock *Start,
3691                                     const unsigned NumNewBlocks) {
3692   const int32_t CFIState = Start->getCFIStateAtExit();
3693   const unsigned StartIndex = getIndex(Start) + 1;
3694   for (unsigned I = 0; I < NumNewBlocks; ++I)
3695     BasicBlocks[StartIndex + I]->setCFIState(CFIState);
3696 }
3697 
3698 void BinaryFunction::updateLayout(BinaryBasicBlock *Start,
3699                                   const unsigned NumNewBlocks) {
3700   // If start not provided insert new blocks at the beginning
3701   if (!Start) {
3702     BasicBlocksLayout.insert(layout_begin(), BasicBlocks.begin(),
3703                              BasicBlocks.begin() + NumNewBlocks);
3704     updateLayoutIndices();
3705     return;
3706   }
3707 
3708   // Insert new blocks in the layout immediately after Start.
3709   auto Pos = std::find(layout_begin(), layout_end(), Start);
3710   assert(Pos != layout_end());
3711   BasicBlockListType::iterator Begin =
3712       std::next(BasicBlocks.begin(), getIndex(Start) + 1);
3713   BasicBlockListType::iterator End =
3714       std::next(BasicBlocks.begin(), getIndex(Start) + NumNewBlocks + 1);
3715   BasicBlocksLayout.insert(Pos + 1, Begin, End);
3716   updateLayoutIndices();
3717 }
3718 
3719 bool BinaryFunction::checkForAmbiguousJumpTables() {
3720   SmallSet<uint64_t, 4> JumpTables;
3721   for (BinaryBasicBlock *&BB : BasicBlocks) {
3722     for (MCInst &Inst : *BB) {
3723       if (!BC.MIB->isIndirectBranch(Inst))
3724         continue;
3725       uint64_t JTAddress = BC.MIB->getJumpTable(Inst);
3726       if (!JTAddress)
3727         continue;
3728       // This address can be inside another jump table, but we only consider
3729       // it ambiguous when the same start address is used, not the same JT
3730       // object.
3731       if (!JumpTables.count(JTAddress)) {
3732         JumpTables.insert(JTAddress);
3733         continue;
3734       }
3735       return true;
3736     }
3737   }
3738   return false;
3739 }
3740 
3741 void BinaryFunction::disambiguateJumpTables(
3742     MCPlusBuilder::AllocatorIdTy AllocId) {
3743   assert((opts::JumpTables != JTS_BASIC && isSimple()) || !BC.HasRelocations);
3744   SmallPtrSet<JumpTable *, 4> JumpTables;
3745   for (BinaryBasicBlock *&BB : BasicBlocks) {
3746     for (MCInst &Inst : *BB) {
3747       if (!BC.MIB->isIndirectBranch(Inst))
3748         continue;
3749       JumpTable *JT = getJumpTable(Inst);
3750       if (!JT)
3751         continue;
3752       auto Iter = JumpTables.find(JT);
3753       if (Iter == JumpTables.end()) {
3754         JumpTables.insert(JT);
3755         continue;
3756       }
3757       // This instruction is an indirect jump using a jump table, but it is
3758       // using the same jump table of another jump. Try all our tricks to
3759       // extract the jump table symbol and make it point to a new, duplicated JT
3760       MCPhysReg BaseReg1;
3761       uint64_t Scale;
3762       const MCSymbol *Target;
3763       // In case we match if our first matcher, first instruction is the one to
3764       // patch
3765       MCInst *JTLoadInst = &Inst;
3766       // Try a standard indirect jump matcher, scale 8
3767       std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher =
3768           BC.MIB->matchIndJmp(BC.MIB->matchReg(BaseReg1),
3769                               BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3770                               /*Offset=*/BC.MIB->matchSymbol(Target));
3771       if (!IndJmpMatcher->match(
3772               *BC.MRI, *BC.MIB,
3773               MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3774           BaseReg1 != BC.MIB->getNoRegister() || Scale != 8) {
3775         MCPhysReg BaseReg2;
3776         uint64_t Offset;
3777         // Standard JT matching failed. Trying now:
3778         //     movq  "jt.2397/1"(,%rax,8), %rax
3779         //     jmpq  *%rax
3780         std::unique_ptr<MCPlusBuilder::MCInstMatcher> LoadMatcherOwner =
3781             BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg1),
3782                               BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3783                               /*Offset=*/BC.MIB->matchSymbol(Target));
3784         MCPlusBuilder::MCInstMatcher *LoadMatcher = LoadMatcherOwner.get();
3785         std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher2 =
3786             BC.MIB->matchIndJmp(std::move(LoadMatcherOwner));
3787         if (!IndJmpMatcher2->match(
3788                 *BC.MRI, *BC.MIB,
3789                 MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3790             BaseReg1 != BC.MIB->getNoRegister() || Scale != 8) {
3791           // JT matching failed. Trying now:
3792           // PIC-style matcher, scale 4
3793           //    addq    %rdx, %rsi
3794           //    addq    %rdx, %rdi
3795           //    leaq    DATAat0x402450(%rip), %r11
3796           //    movslq  (%r11,%rdx,4), %rcx
3797           //    addq    %r11, %rcx
3798           //    jmpq    *%rcx # JUMPTABLE @0x402450
3799           std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICIndJmpMatcher =
3800               BC.MIB->matchIndJmp(BC.MIB->matchAdd(
3801                   BC.MIB->matchReg(BaseReg1),
3802                   BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg2),
3803                                     BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3804                                     BC.MIB->matchImm(Offset))));
3805           std::unique_ptr<MCPlusBuilder::MCInstMatcher> LEAMatcherOwner =
3806               BC.MIB->matchLoadAddr(BC.MIB->matchSymbol(Target));
3807           MCPlusBuilder::MCInstMatcher *LEAMatcher = LEAMatcherOwner.get();
3808           std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICBaseAddrMatcher =
3809               BC.MIB->matchIndJmp(BC.MIB->matchAdd(std::move(LEAMatcherOwner),
3810                                                    BC.MIB->matchAnyOperand()));
3811           if (!PICIndJmpMatcher->match(
3812                   *BC.MRI, *BC.MIB,
3813                   MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3814               Scale != 4 || BaseReg1 != BaseReg2 || Offset != 0 ||
3815               !PICBaseAddrMatcher->match(
3816                   *BC.MRI, *BC.MIB,
3817                   MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1)) {
3818             llvm_unreachable("Failed to extract jump table base");
3819             continue;
3820           }
3821           // Matched PIC, identify the instruction with the reference to the JT
3822           JTLoadInst = LEAMatcher->CurInst;
3823         } else {
3824           // Matched non-PIC
3825           JTLoadInst = LoadMatcher->CurInst;
3826         }
3827       }
3828 
3829       uint64_t NewJumpTableID = 0;
3830       const MCSymbol *NewJTLabel;
3831       std::tie(NewJumpTableID, NewJTLabel) =
3832           BC.duplicateJumpTable(*this, JT, Target);
3833       {
3834         auto L = BC.scopeLock();
3835         BC.MIB->replaceMemOperandDisp(*JTLoadInst, NewJTLabel, BC.Ctx.get());
3836       }
3837       // We use a unique ID with the high bit set as address for this "injected"
3838       // jump table (not originally in the input binary).
3839       BC.MIB->setJumpTable(Inst, NewJumpTableID, 0, AllocId);
3840     }
3841   }
3842 }
3843 
3844 bool BinaryFunction::replaceJumpTableEntryIn(BinaryBasicBlock *BB,
3845                                              BinaryBasicBlock *OldDest,
3846                                              BinaryBasicBlock *NewDest) {
3847   MCInst *Instr = BB->getLastNonPseudoInstr();
3848   if (!Instr || !BC.MIB->isIndirectBranch(*Instr))
3849     return false;
3850   uint64_t JTAddress = BC.MIB->getJumpTable(*Instr);
3851   assert(JTAddress && "Invalid jump table address");
3852   JumpTable *JT = getJumpTableContainingAddress(JTAddress);
3853   assert(JT && "No jump table structure for this indirect branch");
3854   bool Patched = JT->replaceDestination(JTAddress, OldDest->getLabel(),
3855                                         NewDest->getLabel());
3856   (void)Patched;
3857   assert(Patched && "Invalid entry to be replaced in jump table");
3858   return true;
3859 }
3860 
3861 BinaryBasicBlock *BinaryFunction::splitEdge(BinaryBasicBlock *From,
3862                                             BinaryBasicBlock *To) {
3863   // Create intermediate BB
3864   MCSymbol *Tmp;
3865   {
3866     auto L = BC.scopeLock();
3867     Tmp = BC.Ctx->createNamedTempSymbol("SplitEdge");
3868   }
3869   // Link new BBs to the original input offset of the From BB, so we can map
3870   // samples recorded in new BBs back to the original BB seem in the input
3871   // binary (if using BAT)
3872   std::unique_ptr<BinaryBasicBlock> NewBB =
3873       createBasicBlock(From->getInputOffset(), Tmp);
3874   BinaryBasicBlock *NewBBPtr = NewBB.get();
3875 
3876   // Update "From" BB
3877   auto I = From->succ_begin();
3878   auto BI = From->branch_info_begin();
3879   for (; I != From->succ_end(); ++I) {
3880     if (*I == To)
3881       break;
3882     ++BI;
3883   }
3884   assert(I != From->succ_end() && "Invalid CFG edge in splitEdge!");
3885   uint64_t OrigCount = BI->Count;
3886   uint64_t OrigMispreds = BI->MispredictedCount;
3887   replaceJumpTableEntryIn(From, To, NewBBPtr);
3888   From->replaceSuccessor(To, NewBBPtr, OrigCount, OrigMispreds);
3889 
3890   NewBB->addSuccessor(To, OrigCount, OrigMispreds);
3891   NewBB->setExecutionCount(OrigCount);
3892   NewBB->setIsCold(From->isCold());
3893 
3894   // Update CFI and BB layout with new intermediate BB
3895   std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs;
3896   NewBBs.emplace_back(std::move(NewBB));
3897   insertBasicBlocks(From, std::move(NewBBs), true, true,
3898                     /*RecomputeLandingPads=*/false);
3899   return NewBBPtr;
3900 }
3901 
3902 void BinaryFunction::deleteConservativeEdges() {
3903   // Our goal is to aggressively remove edges from the CFG that we believe are
3904   // wrong. This is used for instrumentation, where it is safe to remove
3905   // fallthrough edges because we won't reorder blocks.
3906   for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) {
3907     BinaryBasicBlock *BB = *I;
3908     if (BB->succ_size() != 1 || BB->size() == 0)
3909       continue;
3910 
3911     auto NextBB = std::next(I);
3912     MCInst *Last = BB->getLastNonPseudoInstr();
3913     // Fallthrough is a landing pad? Delete this edge (as long as we don't
3914     // have a direct jump to it)
3915     if ((*BB->succ_begin())->isLandingPad() && NextBB != E &&
3916         *BB->succ_begin() == *NextBB && Last && !BC.MIB->isBranch(*Last)) {
3917       BB->removeAllSuccessors();
3918       continue;
3919     }
3920 
3921     // Look for suspicious calls at the end of BB where gcc may optimize it and
3922     // remove the jump to the epilogue when it knows the call won't return.
3923     if (!Last || !BC.MIB->isCall(*Last))
3924       continue;
3925 
3926     const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(*Last);
3927     if (!CalleeSymbol)
3928       continue;
3929 
3930     StringRef CalleeName = CalleeSymbol->getName();
3931     if (CalleeName != "__cxa_throw@PLT" && CalleeName != "_Unwind_Resume@PLT" &&
3932         CalleeName != "__cxa_rethrow@PLT" && CalleeName != "exit@PLT" &&
3933         CalleeName != "abort@PLT")
3934       continue;
3935 
3936     BB->removeAllSuccessors();
3937   }
3938 }
3939 
3940 bool BinaryFunction::isDataMarker(const SymbolRef &Symbol,
3941                                   uint64_t SymbolSize) const {
3942   // For aarch64, the ABI defines mapping symbols so we identify data in the
3943   // code section (see IHI0056B). $d identifies a symbol starting data contents.
3944   if (BC.isAArch64() && Symbol.getType() &&
3945       cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && SymbolSize == 0 &&
3946       Symbol.getName() &&
3947       (cantFail(Symbol.getName()) == "$d" ||
3948        cantFail(Symbol.getName()).startswith("$d.")))
3949     return true;
3950   return false;
3951 }
3952 
3953 bool BinaryFunction::isCodeMarker(const SymbolRef &Symbol,
3954                                   uint64_t SymbolSize) const {
3955   // For aarch64, the ABI defines mapping symbols so we identify data in the
3956   // code section (see IHI0056B). $x identifies a symbol starting code or the
3957   // end of a data chunk inside code.
3958   if (BC.isAArch64() && Symbol.getType() &&
3959       cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && SymbolSize == 0 &&
3960       Symbol.getName() &&
3961       (cantFail(Symbol.getName()) == "$x" ||
3962        cantFail(Symbol.getName()).startswith("$x.")))
3963     return true;
3964   return false;
3965 }
3966 
3967 bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
3968                                           uint64_t SymbolSize) const {
3969   // If this symbol is in a different section from the one where the
3970   // function symbol is, don't consider it as valid.
3971   if (!getOriginSection()->containsAddress(
3972           cantFail(Symbol.getAddress(), "cannot get symbol address")))
3973     return false;
3974 
3975   // Some symbols are tolerated inside function bodies, others are not.
3976   // The real function boundaries may not be known at this point.
3977   if (isDataMarker(Symbol, SymbolSize) || isCodeMarker(Symbol, SymbolSize))
3978     return true;
3979 
3980   // It's okay to have a zero-sized symbol in the middle of non-zero-sized
3981   // function.
3982   if (SymbolSize == 0 && containsAddress(cantFail(Symbol.getAddress())))
3983     return true;
3984 
3985   if (cantFail(Symbol.getType()) != SymbolRef::ST_Unknown)
3986     return false;
3987 
3988   if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)
3989     return false;
3990 
3991   return true;
3992 }
3993 
3994 void BinaryFunction::adjustExecutionCount(uint64_t Count) {
3995   if (getKnownExecutionCount() == 0 || Count == 0)
3996     return;
3997 
3998   if (ExecutionCount < Count)
3999     Count = ExecutionCount;
4000 
4001   double AdjustmentRatio = ((double)ExecutionCount - Count) / ExecutionCount;
4002   if (AdjustmentRatio < 0.0)
4003     AdjustmentRatio = 0.0;
4004 
4005   for (BinaryBasicBlock *&BB : layout())
4006     BB->adjustExecutionCount(AdjustmentRatio);
4007 
4008   ExecutionCount -= Count;
4009 }
4010 
4011 BinaryFunction::~BinaryFunction() {
4012   for (BinaryBasicBlock *BB : BasicBlocks)
4013     delete BB;
4014   for (BinaryBasicBlock *BB : DeletedBasicBlocks)
4015     delete BB;
4016 }
4017 
4018 void BinaryFunction::calculateLoopInfo() {
4019   // Discover loops.
4020   BinaryDominatorTree DomTree;
4021   DomTree.recalculate(*this);
4022   BLI.reset(new BinaryLoopInfo());
4023   BLI->analyze(DomTree);
4024 
4025   // Traverse discovered loops and add depth and profile information.
4026   std::stack<BinaryLoop *> St;
4027   for (auto I = BLI->begin(), E = BLI->end(); I != E; ++I) {
4028     St.push(*I);
4029     ++BLI->OuterLoops;
4030   }
4031 
4032   while (!St.empty()) {
4033     BinaryLoop *L = St.top();
4034     St.pop();
4035     ++BLI->TotalLoops;
4036     BLI->MaximumDepth = std::max(L->getLoopDepth(), BLI->MaximumDepth);
4037 
4038     // Add nested loops in the stack.
4039     for (BinaryLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
4040       St.push(*I);
4041 
4042     // Skip if no valid profile is found.
4043     if (!hasValidProfile()) {
4044       L->EntryCount = COUNT_NO_PROFILE;
4045       L->ExitCount = COUNT_NO_PROFILE;
4046       L->TotalBackEdgeCount = COUNT_NO_PROFILE;
4047       continue;
4048     }
4049 
4050     // Compute back edge count.
4051     SmallVector<BinaryBasicBlock *, 1> Latches;
4052     L->getLoopLatches(Latches);
4053 
4054     for (BinaryBasicBlock *Latch : Latches) {
4055       auto BI = Latch->branch_info_begin();
4056       for (BinaryBasicBlock *Succ : Latch->successors()) {
4057         if (Succ == L->getHeader()) {
4058           assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
4059                  "profile data not found");
4060           L->TotalBackEdgeCount += BI->Count;
4061         }
4062         ++BI;
4063       }
4064     }
4065 
4066     // Compute entry count.
4067     L->EntryCount = L->getHeader()->getExecutionCount() - L->TotalBackEdgeCount;
4068 
4069     // Compute exit count.
4070     SmallVector<BinaryLoop::Edge, 1> ExitEdges;
4071     L->getExitEdges(ExitEdges);
4072     for (BinaryLoop::Edge &Exit : ExitEdges) {
4073       const BinaryBasicBlock *Exiting = Exit.first;
4074       const BinaryBasicBlock *ExitTarget = Exit.second;
4075       auto BI = Exiting->branch_info_begin();
4076       for (BinaryBasicBlock *Succ : Exiting->successors()) {
4077         if (Succ == ExitTarget) {
4078           assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
4079                  "profile data not found");
4080           L->ExitCount += BI->Count;
4081         }
4082         ++BI;
4083       }
4084     }
4085   }
4086 }
4087 
4088 void BinaryFunction::updateOutputValues(const MCAsmLayout &Layout) {
4089   if (!isEmitted()) {
4090     assert(!isInjected() && "injected function should be emitted");
4091     setOutputAddress(getAddress());
4092     setOutputSize(getSize());
4093     return;
4094   }
4095 
4096   const uint64_t BaseAddress = getCodeSection()->getOutputAddress();
4097   ErrorOr<BinarySection &> ColdSection = getColdCodeSection();
4098   const uint64_t ColdBaseAddress =
4099       isSplit() ? ColdSection->getOutputAddress() : 0;
4100   if (BC.HasRelocations || isInjected()) {
4101     const uint64_t StartOffset = Layout.getSymbolOffset(*getSymbol());
4102     const uint64_t EndOffset = Layout.getSymbolOffset(*getFunctionEndLabel());
4103     setOutputAddress(BaseAddress + StartOffset);
4104     setOutputSize(EndOffset - StartOffset);
4105     if (hasConstantIsland()) {
4106       const uint64_t DataOffset =
4107           Layout.getSymbolOffset(*getFunctionConstantIslandLabel());
4108       setOutputDataAddress(BaseAddress + DataOffset);
4109     }
4110     if (isSplit()) {
4111       const MCSymbol *ColdStartSymbol = getColdSymbol();
4112       assert(ColdStartSymbol && ColdStartSymbol->isDefined() &&
4113              "split function should have defined cold symbol");
4114       const MCSymbol *ColdEndSymbol = getFunctionColdEndLabel();
4115       assert(ColdEndSymbol && ColdEndSymbol->isDefined() &&
4116              "split function should have defined cold end symbol");
4117       const uint64_t ColdStartOffset = Layout.getSymbolOffset(*ColdStartSymbol);
4118       const uint64_t ColdEndOffset = Layout.getSymbolOffset(*ColdEndSymbol);
4119       cold().setAddress(ColdBaseAddress + ColdStartOffset);
4120       cold().setImageSize(ColdEndOffset - ColdStartOffset);
4121       if (hasConstantIsland()) {
4122         const uint64_t DataOffset =
4123             Layout.getSymbolOffset(*getFunctionColdConstantIslandLabel());
4124         setOutputColdDataAddress(ColdBaseAddress + DataOffset);
4125       }
4126     }
4127   } else {
4128     setOutputAddress(getAddress());
4129     setOutputSize(Layout.getSymbolOffset(*getFunctionEndLabel()));
4130   }
4131 
4132   // Update basic block output ranges for the debug info, if we have
4133   // secondary entry points in the symbol table to update or if writing BAT.
4134   if (!opts::UpdateDebugSections && !isMultiEntry() &&
4135       !requiresAddressTranslation())
4136     return;
4137 
4138   // Output ranges should match the input if the body hasn't changed.
4139   if (!isSimple() && !BC.HasRelocations)
4140     return;
4141 
4142   // AArch64 may have functions that only contains a constant island (no code).
4143   if (layout_begin() == layout_end())
4144     return;
4145 
4146   BinaryBasicBlock *PrevBB = nullptr;
4147   for (auto BBI = layout_begin(), BBE = layout_end(); BBI != BBE; ++BBI) {
4148     BinaryBasicBlock *BB = *BBI;
4149     assert(BB->getLabel()->isDefined() && "symbol should be defined");
4150     const uint64_t BBBaseAddress = BB->isCold() ? ColdBaseAddress : BaseAddress;
4151     if (!BC.HasRelocations) {
4152       if (BB->isCold()) {
4153         assert(BBBaseAddress == cold().getAddress());
4154       } else {
4155         assert(BBBaseAddress == getOutputAddress());
4156       }
4157     }
4158     const uint64_t BBOffset = Layout.getSymbolOffset(*BB->getLabel());
4159     const uint64_t BBAddress = BBBaseAddress + BBOffset;
4160     BB->setOutputStartAddress(BBAddress);
4161 
4162     if (PrevBB) {
4163       uint64_t PrevBBEndAddress = BBAddress;
4164       if (BB->isCold() != PrevBB->isCold())
4165         PrevBBEndAddress = getOutputAddress() + getOutputSize();
4166       PrevBB->setOutputEndAddress(PrevBBEndAddress);
4167     }
4168     PrevBB = BB;
4169 
4170     BB->updateOutputValues(Layout);
4171   }
4172   PrevBB->setOutputEndAddress(PrevBB->isCold()
4173                                   ? cold().getAddress() + cold().getImageSize()
4174                                   : getOutputAddress() + getOutputSize());
4175 }
4176 
4177 DebugAddressRangesVector BinaryFunction::getOutputAddressRanges() const {
4178   DebugAddressRangesVector OutputRanges;
4179 
4180   if (isFolded())
4181     return OutputRanges;
4182 
4183   if (IsFragment)
4184     return OutputRanges;
4185 
4186   OutputRanges.emplace_back(getOutputAddress(),
4187                             getOutputAddress() + getOutputSize());
4188   if (isSplit()) {
4189     assert(isEmitted() && "split function should be emitted");
4190     OutputRanges.emplace_back(cold().getAddress(),
4191                               cold().getAddress() + cold().getImageSize());
4192   }
4193 
4194   if (isSimple())
4195     return OutputRanges;
4196 
4197   for (BinaryFunction *Frag : Fragments) {
4198     assert(!Frag->isSimple() &&
4199            "fragment of non-simple function should also be non-simple");
4200     OutputRanges.emplace_back(Frag->getOutputAddress(),
4201                               Frag->getOutputAddress() + Frag->getOutputSize());
4202   }
4203 
4204   return OutputRanges;
4205 }
4206 
4207 uint64_t BinaryFunction::translateInputToOutputAddress(uint64_t Address) const {
4208   if (isFolded())
4209     return 0;
4210 
4211   // If the function hasn't changed return the same address.
4212   if (!isEmitted())
4213     return Address;
4214 
4215   if (Address < getAddress())
4216     return 0;
4217 
4218   // Check if the address is associated with an instruction that is tracked
4219   // by address translation.
4220   auto KV = InputOffsetToAddressMap.find(Address - getAddress());
4221   if (KV != InputOffsetToAddressMap.end())
4222     return KV->second;
4223 
4224   // FIXME: #18950828 - we rely on relative offsets inside basic blocks to stay
4225   //        intact. Instead we can use pseudo instructions and/or annotations.
4226   const uint64_t Offset = Address - getAddress();
4227   const BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
4228   if (!BB) {
4229     // Special case for address immediately past the end of the function.
4230     if (Offset == getSize())
4231       return getOutputAddress() + getOutputSize();
4232 
4233     return 0;
4234   }
4235 
4236   return std::min(BB->getOutputAddressRange().first + Offset - BB->getOffset(),
4237                   BB->getOutputAddressRange().second);
4238 }
4239 
4240 DebugAddressRangesVector BinaryFunction::translateInputToOutputRanges(
4241     const DWARFAddressRangesVector &InputRanges) const {
4242   DebugAddressRangesVector OutputRanges;
4243 
4244   if (isFolded())
4245     return OutputRanges;
4246 
4247   // If the function hasn't changed return the same ranges.
4248   if (!isEmitted()) {
4249     OutputRanges.resize(InputRanges.size());
4250     std::transform(InputRanges.begin(), InputRanges.end(), OutputRanges.begin(),
4251                    [](const DWARFAddressRange &Range) {
4252                      return DebugAddressRange(Range.LowPC, Range.HighPC);
4253                    });
4254     return OutputRanges;
4255   }
4256 
4257   // Even though we will merge ranges in a post-processing pass, we attempt to
4258   // merge them in a main processing loop as it improves the processing time.
4259   uint64_t PrevEndAddress = 0;
4260   for (const DWARFAddressRange &Range : InputRanges) {
4261     if (!containsAddress(Range.LowPC)) {
4262       LLVM_DEBUG(
4263           dbgs() << "BOLT-DEBUG: invalid debug address range detected for "
4264                  << *this << " : [0x" << Twine::utohexstr(Range.LowPC) << ", 0x"
4265                  << Twine::utohexstr(Range.HighPC) << "]\n");
4266       PrevEndAddress = 0;
4267       continue;
4268     }
4269     uint64_t InputOffset = Range.LowPC - getAddress();
4270     const uint64_t InputEndOffset =
4271         std::min(Range.HighPC - getAddress(), getSize());
4272 
4273     auto BBI = std::upper_bound(
4274         BasicBlockOffsets.begin(), BasicBlockOffsets.end(),
4275         BasicBlockOffset(InputOffset, nullptr), CompareBasicBlockOffsets());
4276     --BBI;
4277     do {
4278       const BinaryBasicBlock *BB = BBI->second;
4279       if (InputOffset < BB->getOffset() || InputOffset >= BB->getEndOffset()) {
4280         LLVM_DEBUG(
4281             dbgs() << "BOLT-DEBUG: invalid debug address range detected for "
4282                    << *this << " : [0x" << Twine::utohexstr(Range.LowPC)
4283                    << ", 0x" << Twine::utohexstr(Range.HighPC) << "]\n");
4284         PrevEndAddress = 0;
4285         break;
4286       }
4287 
4288       // Skip the range if the block was deleted.
4289       if (const uint64_t OutputStart = BB->getOutputAddressRange().first) {
4290         const uint64_t StartAddress =
4291             OutputStart + InputOffset - BB->getOffset();
4292         uint64_t EndAddress = BB->getOutputAddressRange().second;
4293         if (InputEndOffset < BB->getEndOffset())
4294           EndAddress = StartAddress + InputEndOffset - InputOffset;
4295 
4296         if (StartAddress == PrevEndAddress) {
4297           OutputRanges.back().HighPC =
4298               std::max(OutputRanges.back().HighPC, EndAddress);
4299         } else {
4300           OutputRanges.emplace_back(StartAddress,
4301                                     std::max(StartAddress, EndAddress));
4302         }
4303         PrevEndAddress = OutputRanges.back().HighPC;
4304       }
4305 
4306       InputOffset = BB->getEndOffset();
4307       ++BBI;
4308     } while (InputOffset < InputEndOffset);
4309   }
4310 
4311   // Post-processing pass to sort and merge ranges.
4312   std::sort(OutputRanges.begin(), OutputRanges.end());
4313   DebugAddressRangesVector MergedRanges;
4314   PrevEndAddress = 0;
4315   for (const DebugAddressRange &Range : OutputRanges) {
4316     if (Range.LowPC <= PrevEndAddress) {
4317       MergedRanges.back().HighPC =
4318           std::max(MergedRanges.back().HighPC, Range.HighPC);
4319     } else {
4320       MergedRanges.emplace_back(Range.LowPC, Range.HighPC);
4321     }
4322     PrevEndAddress = MergedRanges.back().HighPC;
4323   }
4324 
4325   return MergedRanges;
4326 }
4327 
4328 MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) {
4329   if (CurrentState == State::Disassembled) {
4330     auto II = Instructions.find(Offset);
4331     return (II == Instructions.end()) ? nullptr : &II->second;
4332   } else if (CurrentState == State::CFG) {
4333     BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
4334     if (!BB)
4335       return nullptr;
4336 
4337     for (MCInst &Inst : *BB) {
4338       constexpr uint32_t InvalidOffset = std::numeric_limits<uint32_t>::max();
4339       if (Offset == BC.MIB->getOffsetWithDefault(Inst, InvalidOffset))
4340         return &Inst;
4341     }
4342 
4343     if (MCInst *LastInstr = BB->getLastNonPseudoInstr()) {
4344       const uint32_t Size =
4345           BC.MIB->getAnnotationWithDefault<uint32_t>(*LastInstr, "Size");
4346       if (BB->getEndOffset() - Offset == Size)
4347         return LastInstr;
4348     }
4349 
4350     return nullptr;
4351   } else {
4352     llvm_unreachable("invalid CFG state to use getInstructionAtOffset()");
4353   }
4354 }
4355 
4356 DebugLocationsVector BinaryFunction::translateInputToOutputLocationList(
4357     const DebugLocationsVector &InputLL) const {
4358   DebugLocationsVector OutputLL;
4359 
4360   if (isFolded())
4361     return OutputLL;
4362 
4363   // If the function hasn't changed - there's nothing to update.
4364   if (!isEmitted())
4365     return InputLL;
4366 
4367   uint64_t PrevEndAddress = 0;
4368   SmallVectorImpl<uint8_t> *PrevExpr = nullptr;
4369   for (const DebugLocationEntry &Entry : InputLL) {
4370     const uint64_t Start = Entry.LowPC;
4371     const uint64_t End = Entry.HighPC;
4372     if (!containsAddress(Start)) {
4373       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invalid debug address range detected "
4374                            "for "
4375                         << *this << " : [0x" << Twine::utohexstr(Start)
4376                         << ", 0x" << Twine::utohexstr(End) << "]\n");
4377       continue;
4378     }
4379     uint64_t InputOffset = Start - getAddress();
4380     const uint64_t InputEndOffset = std::min(End - getAddress(), getSize());
4381     auto BBI = std::upper_bound(
4382         BasicBlockOffsets.begin(), BasicBlockOffsets.end(),
4383         BasicBlockOffset(InputOffset, nullptr), CompareBasicBlockOffsets());
4384     --BBI;
4385     do {
4386       const BinaryBasicBlock *BB = BBI->second;
4387       if (InputOffset < BB->getOffset() || InputOffset >= BB->getEndOffset()) {
4388         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invalid debug address range detected "
4389                              "for "
4390                           << *this << " : [0x" << Twine::utohexstr(Start)
4391                           << ", 0x" << Twine::utohexstr(End) << "]\n");
4392         PrevEndAddress = 0;
4393         break;
4394       }
4395 
4396       // Skip the range if the block was deleted.
4397       if (const uint64_t OutputStart = BB->getOutputAddressRange().first) {
4398         const uint64_t StartAddress =
4399             OutputStart + InputOffset - BB->getOffset();
4400         uint64_t EndAddress = BB->getOutputAddressRange().second;
4401         if (InputEndOffset < BB->getEndOffset())
4402           EndAddress = StartAddress + InputEndOffset - InputOffset;
4403 
4404         if (StartAddress == PrevEndAddress && Entry.Expr == *PrevExpr) {
4405           OutputLL.back().HighPC = std::max(OutputLL.back().HighPC, EndAddress);
4406         } else {
4407           OutputLL.emplace_back(DebugLocationEntry{
4408               StartAddress, std::max(StartAddress, EndAddress), Entry.Expr});
4409         }
4410         PrevEndAddress = OutputLL.back().HighPC;
4411         PrevExpr = &OutputLL.back().Expr;
4412       }
4413 
4414       ++BBI;
4415       InputOffset = BB->getEndOffset();
4416     } while (InputOffset < InputEndOffset);
4417   }
4418 
4419   // Sort and merge adjacent entries with identical location.
4420   std::stable_sort(
4421       OutputLL.begin(), OutputLL.end(),
4422       [](const DebugLocationEntry &A, const DebugLocationEntry &B) {
4423         return A.LowPC < B.LowPC;
4424       });
4425   DebugLocationsVector MergedLL;
4426   PrevEndAddress = 0;
4427   PrevExpr = nullptr;
4428   for (const DebugLocationEntry &Entry : OutputLL) {
4429     if (Entry.LowPC <= PrevEndAddress && *PrevExpr == Entry.Expr) {
4430       MergedLL.back().HighPC = std::max(Entry.HighPC, MergedLL.back().HighPC);
4431     } else {
4432       const uint64_t Begin = std::max(Entry.LowPC, PrevEndAddress);
4433       const uint64_t End = std::max(Begin, Entry.HighPC);
4434       MergedLL.emplace_back(DebugLocationEntry{Begin, End, Entry.Expr});
4435     }
4436     PrevEndAddress = MergedLL.back().HighPC;
4437     PrevExpr = &MergedLL.back().Expr;
4438   }
4439 
4440   return MergedLL;
4441 }
4442 
4443 void BinaryFunction::printLoopInfo(raw_ostream &OS) const {
4444   OS << "Loop Info for Function \"" << *this << "\"";
4445   if (hasValidProfile())
4446     OS << " (count: " << getExecutionCount() << ")";
4447   OS << "\n";
4448 
4449   std::stack<BinaryLoop *> St;
4450   for_each(*BLI, [&](BinaryLoop *L) { St.push(L); });
4451   while (!St.empty()) {
4452     BinaryLoop *L = St.top();
4453     St.pop();
4454 
4455     for_each(*L, [&](BinaryLoop *Inner) { St.push(Inner); });
4456 
4457     if (!hasValidProfile())
4458       continue;
4459 
4460     OS << (L->getLoopDepth() > 1 ? "Nested" : "Outer")
4461        << " loop header: " << L->getHeader()->getName();
4462     OS << "\n";
4463     OS << "Loop basic blocks: ";
4464     const char *Sep = "";
4465     for (auto BI = L->block_begin(), BE = L->block_end(); BI != BE; ++BI) {
4466       OS << Sep << (*BI)->getName();
4467       Sep = ", ";
4468     }
4469     OS << "\n";
4470     if (hasValidProfile()) {
4471       OS << "Total back edge count: " << L->TotalBackEdgeCount << "\n";
4472       OS << "Loop entry count: " << L->EntryCount << "\n";
4473       OS << "Loop exit count: " << L->ExitCount << "\n";
4474       if (L->EntryCount > 0) {
4475         OS << "Average iters per entry: "
4476            << format("%.4lf", (double)L->TotalBackEdgeCount / L->EntryCount)
4477            << "\n";
4478       }
4479     }
4480     OS << "----\n";
4481   }
4482 
4483   OS << "Total number of loops: " << BLI->TotalLoops << "\n";
4484   OS << "Number of outer loops: " << BLI->OuterLoops << "\n";
4485   OS << "Maximum nested loop depth: " << BLI->MaximumDepth << "\n\n";
4486 }
4487 
4488 bool BinaryFunction::isAArch64Veneer() const {
4489   if (BasicBlocks.size() != 1)
4490     return false;
4491 
4492   BinaryBasicBlock &BB = **BasicBlocks.begin();
4493   if (BB.size() != 3)
4494     return false;
4495 
4496   for (MCInst &Inst : BB)
4497     if (!BC.MIB->hasAnnotation(Inst, "AArch64Veneer"))
4498       return false;
4499 
4500   return true;
4501 }
4502 
4503 } // namespace bolt
4504 } // namespace llvm
4505