xref: /llvm-project/bolt/lib/Core/BinaryFunction.cpp (revision 6aa735ceaf63150690923e9e5e0319eace3056f8)
1 //===--- BinaryFunction.cpp - Interface for machine-level function --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
11 #include "bolt/Core/BinaryFunction.h"
12 #include "bolt/Core/BinaryBasicBlock.h"
13 #include "bolt/Core/DynoStats.h"
14 #include "bolt/Core/MCPlusBuilder.h"
15 #include "bolt/Utils/NameResolver.h"
16 #include "bolt/Utils/NameShortener.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/edit_distance.h"
21 #include "llvm/Demangle/Demangle.h"
22 #include "llvm/MC/MCAsmInfo.h"
23 #include "llvm/MC/MCAsmLayout.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCInstPrinter.h"
29 #include "llvm/MC/MCStreamer.h"
30 #include "llvm/Object/ObjectFile.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/GraphWriter.h"
34 #include "llvm/Support/LEB128.h"
35 #include "llvm/Support/Regex.h"
36 #include "llvm/Support/Timer.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include <functional>
39 #include <limits>
40 #include <numeric>
41 #include <string>
42 
43 #undef  DEBUG_TYPE
44 #define DEBUG_TYPE "bolt"
45 
46 using namespace llvm;
47 using namespace bolt;
48 
49 namespace opts {
50 
51 extern cl::OptionCategory BoltCategory;
52 extern cl::OptionCategory BoltOptCategory;
53 extern cl::OptionCategory BoltRelocCategory;
54 
55 extern cl::opt<bool> EnableBAT;
56 extern cl::opt<bool> Instrument;
57 extern cl::opt<bool> StrictMode;
58 extern cl::opt<bool> UpdateDebugSections;
59 extern cl::opt<unsigned> Verbosity;
60 
61 extern bool processAllFunctions();
62 
63 cl::opt<bool>
64 CheckEncoding("check-encoding",
65   cl::desc("perform verification of LLVM instruction encoding/decoding. "
66            "Every instruction in the input is decoded and re-encoded. "
67            "If the resulting bytes do not match the input, a warning message "
68            "is printed."),
69   cl::init(false),
70   cl::ZeroOrMore,
71   cl::Hidden,
72   cl::cat(BoltCategory));
73 
74 static cl::opt<bool>
75 DotToolTipCode("dot-tooltip-code",
76   cl::desc("add basic block instructions as tool tips on nodes"),
77   cl::ZeroOrMore,
78   cl::Hidden,
79   cl::cat(BoltCategory));
80 
81 cl::opt<JumpTableSupportLevel>
82 JumpTables("jump-tables",
83   cl::desc("jump tables support (default=basic)"),
84   cl::init(JTS_BASIC),
85   cl::values(
86       clEnumValN(JTS_NONE, "none",
87                  "do not optimize functions with jump tables"),
88       clEnumValN(JTS_BASIC, "basic",
89                  "optimize functions with jump tables"),
90       clEnumValN(JTS_MOVE, "move",
91                  "move jump tables to a separate section"),
92       clEnumValN(JTS_SPLIT, "split",
93                  "split jump tables section into hot and cold based on "
94                  "function execution frequency"),
95       clEnumValN(JTS_AGGRESSIVE, "aggressive",
96                  "aggressively split jump tables section based on usage "
97                  "of the tables")),
98   cl::ZeroOrMore,
99   cl::cat(BoltOptCategory));
100 
101 static cl::opt<bool>
102 NoScan("no-scan",
103   cl::desc("do not scan cold functions for external references (may result in "
104            "slower binary)"),
105   cl::init(false),
106   cl::ZeroOrMore,
107   cl::Hidden,
108   cl::cat(BoltOptCategory));
109 
110 cl::opt<bool>
111 PreserveBlocksAlignment("preserve-blocks-alignment",
112   cl::desc("try to preserve basic block alignment"),
113   cl::init(false),
114   cl::ZeroOrMore,
115   cl::cat(BoltOptCategory));
116 
117 cl::opt<bool>
118 PrintDynoStats("dyno-stats",
119   cl::desc("print execution info based on profile"),
120   cl::cat(BoltCategory));
121 
122 static cl::opt<bool>
123 PrintDynoStatsOnly("print-dyno-stats-only",
124   cl::desc("while printing functions output dyno-stats and skip instructions"),
125   cl::init(false),
126   cl::Hidden,
127   cl::cat(BoltCategory));
128 
129 static cl::list<std::string>
130 PrintOnly("print-only",
131   cl::CommaSeparated,
132   cl::desc("list of functions to print"),
133   cl::value_desc("func1,func2,func3,..."),
134   cl::Hidden,
135   cl::cat(BoltCategory));
136 
137 cl::opt<bool>
138 TimeBuild("time-build",
139   cl::desc("print time spent constructing binary functions"),
140   cl::ZeroOrMore,
141   cl::Hidden,
142   cl::cat(BoltCategory));
143 
144 cl::opt<bool>
145 TrapOnAVX512("trap-avx512",
146   cl::desc("in relocation mode trap upon entry to any function that uses "
147             "AVX-512 instructions"),
148   cl::init(false),
149   cl::ZeroOrMore,
150   cl::Hidden,
151   cl::cat(BoltCategory));
152 
153 bool shouldPrint(const BinaryFunction &Function) {
154   if (Function.isIgnored())
155     return false;
156 
157   if (PrintOnly.empty())
158     return true;
159 
160   for (std::string &Name : opts::PrintOnly) {
161     if (Function.hasNameRegex(Name)) {
162       return true;
163     }
164   }
165 
166   return false;
167 }
168 
169 } // namespace opts
170 
171 namespace llvm {
172 namespace bolt {
173 
174 constexpr unsigned BinaryFunction::MinAlign;
175 
176 namespace {
177 
178 template <typename R>
179 bool emptyRange(const R &Range) {
180   return Range.begin() == Range.end();
181 }
182 
183 /// Gets debug line information for the instruction located at the given
184 /// address in the original binary. The SMLoc's pointer is used
185 /// to point to this information, which is represented by a
186 /// DebugLineTableRowRef. The returned pointer is null if no debug line
187 /// information for this instruction was found.
188 SMLoc findDebugLineInformationForInstructionAt(uint64_t Address,
189     DWARFUnit *Unit, const DWARFDebugLine::LineTable *LineTable) {
190   // We use the pointer in SMLoc to store an instance of DebugLineTableRowRef,
191   // which occupies 64 bits. Thus, we can only proceed if the struct fits into
192   // the pointer itself.
193   assert(
194       sizeof(decltype(SMLoc().getPointer())) >= sizeof(DebugLineTableRowRef) &&
195       "Cannot fit instruction debug line information into SMLoc's pointer");
196 
197   SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc();
198   uint32_t RowIndex = LineTable->lookupAddress(
199       {Address, object::SectionedAddress::UndefSection});
200   if (RowIndex == LineTable->UnknownRowIndex)
201     return NullResult;
202 
203   assert(RowIndex < LineTable->Rows.size() &&
204          "Line Table lookup returned invalid index.");
205 
206   decltype(SMLoc().getPointer()) Ptr;
207   DebugLineTableRowRef *InstructionLocation =
208     reinterpret_cast<DebugLineTableRowRef *>(&Ptr);
209 
210   InstructionLocation->DwCompileUnitIndex = Unit->getOffset();
211   InstructionLocation->RowIndex = RowIndex + 1;
212 
213   return SMLoc::getFromPointer(Ptr);
214 }
215 
216 std::string buildSectionName(StringRef Prefix, StringRef Name,
217                              const BinaryContext &BC) {
218   if (BC.isELF())
219     return (Prefix + Name).str();
220   static NameShortener NS;
221   return (Prefix + Twine(NS.getID(Name))).str();
222 }
223 
224 } // namespace
225 
226 std::string BinaryFunction::buildCodeSectionName(StringRef Name,
227                                                  const BinaryContext &BC) {
228   return buildSectionName(BC.isELF() ? ".local.text." : ".l.text.", Name, BC);
229 }
230 
231 std::string BinaryFunction::buildColdCodeSectionName(StringRef Name,
232                                                      const BinaryContext &BC) {
233   return buildSectionName(BC.isELF() ? ".local.cold.text." : ".l.c.text.", Name,
234                           BC);
235 }
236 
237 uint64_t BinaryFunction::Count = 0;
238 
239 Optional<StringRef>
240 BinaryFunction::hasNameRegex(const StringRef Name) const {
241   const std::string RegexName = (Twine("^") + StringRef(Name) + "$").str();
242   Regex MatchName(RegexName);
243   Optional<StringRef> Match = forEachName(
244       [&MatchName](StringRef Name) { return MatchName.match(Name); });
245 
246   return Match;
247 }
248 
249 Optional<StringRef>
250 BinaryFunction::hasRestoredNameRegex(const StringRef Name) const {
251   const std::string RegexName = (Twine("^") + StringRef(Name) + "$").str();
252   Regex MatchName(RegexName);
253   Optional<StringRef> Match = forEachName([&MatchName](StringRef Name) {
254     return MatchName.match(NameResolver::restore(Name));
255   });
256 
257   return Match;
258 }
259 
260 std::string BinaryFunction::getDemangledName() const {
261   StringRef MangledName = NameResolver::restore(getOneName());
262   return demangle(MangledName.str());
263 }
264 
265 BinaryBasicBlock *
266 BinaryFunction::getBasicBlockContainingOffset(uint64_t Offset) {
267   if (Offset > Size)
268     return nullptr;
269 
270   if (BasicBlockOffsets.empty())
271     return nullptr;
272 
273   /*
274    * This is commented out because it makes BOLT too slow.
275    * assert(std::is_sorted(BasicBlockOffsets.begin(),
276    *                       BasicBlockOffsets.end(),
277    *                       CompareBasicBlockOffsets())));
278    */
279   auto I = std::upper_bound(BasicBlockOffsets.begin(),
280                             BasicBlockOffsets.end(),
281                             BasicBlockOffset(Offset, nullptr),
282                             CompareBasicBlockOffsets());
283   assert(I != BasicBlockOffsets.begin() && "first basic block not at offset 0");
284   --I;
285   BinaryBasicBlock *BB = I->second;
286   return (Offset < BB->getOffset() + BB->getOriginalSize()) ? BB : nullptr;
287 }
288 
289 void BinaryFunction::markUnreachableBlocks() {
290   std::stack<BinaryBasicBlock *> Stack;
291 
292   for (BinaryBasicBlock *BB : layout()) {
293     BB->markValid(false);
294   }
295 
296   // Add all entries and landing pads as roots.
297   for (BinaryBasicBlock *BB : BasicBlocks) {
298     if (isEntryPoint(*BB) || BB->isLandingPad()) {
299       Stack.push(BB);
300       BB->markValid(true);
301       continue;
302     }
303     // FIXME:
304     // Also mark BBs with indirect jumps as reachable, since we do not
305     // support removing unused jump tables yet (GH-issue20).
306     for (const MCInst &Inst : *BB) {
307       if (BC.MIB->getJumpTable(Inst)) {
308         Stack.push(BB);
309         BB->markValid(true);
310         break;
311       }
312     }
313   }
314 
315   // Determine reachable BBs from the entry point
316   while (!Stack.empty()) {
317     BinaryBasicBlock *BB = Stack.top();
318     Stack.pop();
319     for (BinaryBasicBlock *Succ : BB->successors()) {
320       if (Succ->isValid())
321         continue;
322       Succ->markValid(true);
323       Stack.push(Succ);
324     }
325   }
326 }
327 
328 // Any unnecessary fallthrough jumps revealed after calling eraseInvalidBBs
329 // will be cleaned up by fixBranches().
330 std::pair<unsigned, uint64_t> BinaryFunction::eraseInvalidBBs() {
331   BasicBlockOrderType NewLayout;
332   unsigned Count = 0;
333   uint64_t Bytes = 0;
334   for (BinaryBasicBlock *BB : layout()) {
335     if (BB->isValid()) {
336       NewLayout.push_back(BB);
337     } else {
338       assert(!isEntryPoint(*BB) && "all entry blocks must be valid");
339       ++Count;
340       Bytes += BC.computeCodeSize(BB->begin(), BB->end());
341     }
342   }
343   BasicBlocksLayout = std::move(NewLayout);
344 
345   BasicBlockListType NewBasicBlocks;
346   for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) {
347     BinaryBasicBlock *BB = *I;
348     if (BB->isValid()) {
349       NewBasicBlocks.push_back(BB);
350     } else {
351       // Make sure the block is removed from the list of predecessors.
352       BB->removeAllSuccessors();
353       DeletedBasicBlocks.push_back(BB);
354     }
355   }
356   BasicBlocks = std::move(NewBasicBlocks);
357 
358   assert(BasicBlocks.size() == BasicBlocksLayout.size());
359 
360   // Update CFG state if needed
361   if (Count > 0)
362     recomputeLandingPads();
363 
364   return std::make_pair(Count, Bytes);
365 }
366 
367 bool BinaryFunction::isForwardCall(const MCSymbol *CalleeSymbol) const {
368   // This function should work properly before and after function reordering.
369   // In order to accomplish this, we use the function index (if it is valid).
370   // If the function indices are not valid, we fall back to the original
371   // addresses.  This should be ok because the functions without valid indices
372   // should have been ordered with a stable sort.
373   const BinaryFunction *CalleeBF = BC.getFunctionForSymbol(CalleeSymbol);
374   if (CalleeBF) {
375     if(CalleeBF->isInjected())
376       return true;
377 
378     if (hasValidIndex() && CalleeBF->hasValidIndex()) {
379       return getIndex() < CalleeBF->getIndex();
380     } else if (hasValidIndex() && !CalleeBF->hasValidIndex()) {
381       return true;
382     } else if (!hasValidIndex() && CalleeBF->hasValidIndex()) {
383       return false;
384     } else {
385       return getAddress() < CalleeBF->getAddress();
386     }
387   } else {
388     // Absolute symbol.
389     ErrorOr<uint64_t> CalleeAddressOrError = BC.getSymbolValue(*CalleeSymbol);
390     assert(CalleeAddressOrError && "unregistered symbol found");
391     return *CalleeAddressOrError > getAddress();
392   }
393 }
394 
395 void BinaryFunction::dump(bool PrintInstructions) const {
396   print(dbgs(), "", PrintInstructions);
397 }
398 
399 void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
400                            bool PrintInstructions) const {
401   if (!opts::shouldPrint(*this))
402     return;
403 
404   StringRef SectionName =
405       OriginSection ? OriginSection->getName() : "<no origin section>";
406   OS << "Binary Function \"" << *this << "\" " << Annotation << " {";
407   std::vector<StringRef> AllNames = getNames();
408   if (AllNames.size() > 1) {
409     OS << "\n  All names   : ";
410     const char *Sep = "";
411     for (const StringRef Name : AllNames) {
412       OS << Sep << Name;
413       Sep = "\n                ";
414     }
415   }
416   OS << "\n  Number      : "   << FunctionNumber
417      << "\n  State       : "   << CurrentState
418      << "\n  Address     : 0x" << Twine::utohexstr(Address)
419      << "\n  Size        : 0x" << Twine::utohexstr(Size)
420      << "\n  MaxSize     : 0x" << Twine::utohexstr(MaxSize)
421      << "\n  Offset      : 0x" << Twine::utohexstr(FileOffset)
422      << "\n  Section     : "   << SectionName
423      << "\n  Orc Section : "   << getCodeSectionName()
424      << "\n  LSDA        : 0x" << Twine::utohexstr(getLSDAAddress())
425      << "\n  IsSimple    : "   << IsSimple
426      << "\n  IsMultiEntry: "   << isMultiEntry()
427      << "\n  IsSplit     : "   << isSplit()
428      << "\n  BB Count    : "   << size();
429 
430   if (HasFixedIndirectBranch) {
431     OS << "\n  HasFixedIndirectBranch : true";
432   }
433   if (HasUnknownControlFlow) {
434     OS << "\n  Unknown CF  : true";
435   }
436   if (getPersonalityFunction()) {
437     OS << "\n  Personality : " << getPersonalityFunction()->getName();
438   }
439   if (IsFragment) {
440     OS << "\n  IsFragment  : true";
441   }
442   if (isFolded()) {
443     OS << "\n  FoldedInto  : " << *getFoldedIntoFunction();
444   }
445   for (BinaryFunction *ParentFragment : ParentFragments) {
446     OS << "\n  Parent      : " << *ParentFragment;
447   }
448   if (!Fragments.empty()) {
449     OS << "\n  Fragments   : ";
450     const char *Sep = "";
451     for (BinaryFunction *Frag : Fragments) {
452       OS << Sep << *Frag;
453       Sep = ", ";
454     }
455   }
456   if (hasCFG()) {
457     OS << "\n  Hash        : "   << Twine::utohexstr(computeHash());
458   }
459   if (isMultiEntry()) {
460     OS << "\n  Secondary Entry Points : ";
461     const char *Sep = "";
462     for (const auto &KV : SecondaryEntryPoints) {
463       OS << Sep << KV.second->getName();
464       Sep = ", ";
465     }
466   }
467   if (FrameInstructions.size()) {
468     OS << "\n  CFI Instrs  : "   << FrameInstructions.size();
469   }
470   if (BasicBlocksLayout.size()) {
471     OS << "\n  BB Layout   : ";
472     const char *Sep = "";
473     for (BinaryBasicBlock *BB : BasicBlocksLayout) {
474       OS << Sep << BB->getName();
475       Sep = ", ";
476     }
477   }
478   if (ImageAddress)
479     OS << "\n  Image       : 0x" << Twine::utohexstr(ImageAddress);
480   if (ExecutionCount != COUNT_NO_PROFILE) {
481     OS << "\n  Exec Count  : " << ExecutionCount;
482     OS << "\n  Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
483   }
484 
485   if (opts::PrintDynoStats && !BasicBlocksLayout.empty()) {
486     OS << '\n';
487     DynoStats dynoStats = getDynoStats(*this);
488     OS << dynoStats;
489   }
490 
491   OS << "\n}\n";
492 
493   if (opts::PrintDynoStatsOnly || !PrintInstructions || !BC.InstPrinter)
494     return;
495 
496   // Offset of the instruction in function.
497   uint64_t Offset = 0;
498 
499   if (BasicBlocks.empty() && !Instructions.empty()) {
500     // Print before CFG was built.
501     for (const std::pair<const uint32_t, MCInst> &II : Instructions) {
502       Offset = II.first;
503 
504       // Print label if exists at this offset.
505       auto LI = Labels.find(Offset);
506       if (LI != Labels.end()) {
507         if (const MCSymbol *EntrySymbol =
508                 getSecondaryEntryPointSymbol(LI->second))
509           OS << EntrySymbol->getName() << " (Entry Point):\n";
510         OS << LI->second->getName() << ":\n";
511       }
512 
513       BC.printInstruction(OS, II.second, Offset, this);
514     }
515   }
516 
517   for (uint32_t I = 0, E = BasicBlocksLayout.size(); I != E; ++I) {
518     BinaryBasicBlock *BB = BasicBlocksLayout[I];
519     if (I != 0 &&
520         BB->isCold() != BasicBlocksLayout[I - 1]->isCold())
521       OS << "-------   HOT-COLD SPLIT POINT   -------\n\n";
522 
523     OS << BB->getName() << " ("
524        << BB->size() << " instructions, align : " << BB->getAlignment()
525        << ")\n";
526 
527     if (isEntryPoint(*BB)) {
528       if (MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB))
529         OS << "  Secondary Entry Point: " << EntrySymbol->getName() << '\n';
530       else
531         OS << "  Entry Point\n";
532     }
533 
534     if (BB->isLandingPad())
535       OS << "  Landing Pad\n";
536 
537     uint64_t BBExecCount = BB->getExecutionCount();
538     if (hasValidProfile()) {
539       OS << "  Exec Count : ";
540       if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE)
541         OS << BBExecCount << '\n';
542       else
543         OS << "<unknown>\n";
544     }
545     if (BB->getCFIState() >= 0) {
546       OS << "  CFI State : " << BB->getCFIState() << '\n';
547     }
548     if (opts::EnableBAT) {
549       OS << "  Input offset: " << Twine::utohexstr(BB->getInputOffset())
550          << "\n";
551     }
552     if (!BB->pred_empty()) {
553       OS << "  Predecessors: ";
554       const char *Sep = "";
555       for (BinaryBasicBlock *Pred : BB->predecessors()) {
556         OS << Sep << Pred->getName();
557         Sep = ", ";
558       }
559       OS << '\n';
560     }
561     if (!BB->throw_empty()) {
562       OS << "  Throwers: ";
563       const char *Sep = "";
564       for (BinaryBasicBlock *Throw : BB->throwers()) {
565         OS << Sep << Throw->getName();
566         Sep = ", ";
567       }
568       OS << '\n';
569     }
570 
571     Offset = alignTo(Offset, BB->getAlignment());
572 
573     // Note: offsets are imprecise since this is happening prior to relaxation.
574     Offset = BC.printInstructions(OS, BB->begin(), BB->end(), Offset, this);
575 
576     if (!BB->succ_empty()) {
577       OS << "  Successors: ";
578       // For more than 2 successors, sort them based on frequency.
579       std::vector<uint64_t> Indices(BB->succ_size());
580       std::iota(Indices.begin(), Indices.end(), 0);
581       if (BB->succ_size() > 2 && BB->getKnownExecutionCount()) {
582         std::stable_sort(Indices.begin(), Indices.end(),
583                          [&](const uint64_t A, const uint64_t B) {
584                            return BB->BranchInfo[B] < BB->BranchInfo[A];
585                          });
586       }
587       const char *Sep = "";
588       for (unsigned I = 0; I < Indices.size(); ++I) {
589         BinaryBasicBlock *Succ = BB->Successors[Indices[I]];
590         BinaryBasicBlock::BinaryBranchInfo &BI = BB->BranchInfo[Indices[I]];
591         OS << Sep << Succ->getName();
592         if (ExecutionCount != COUNT_NO_PROFILE &&
593             BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
594           OS << " (mispreds: " << BI.MispredictedCount
595              << ", count: " << BI.Count << ")";
596         } else if (ExecutionCount != COUNT_NO_PROFILE &&
597                    BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
598           OS << " (inferred count: " << BI.Count << ")";
599         }
600         Sep = ", ";
601       }
602       OS << '\n';
603     }
604 
605     if (!BB->lp_empty()) {
606       OS << "  Landing Pads: ";
607       const char *Sep = "";
608       for (BinaryBasicBlock *LP : BB->landing_pads()) {
609         OS << Sep << LP->getName();
610         if (ExecutionCount != COUNT_NO_PROFILE) {
611           OS << " (count: " << LP->getExecutionCount() << ")";
612         }
613         Sep = ", ";
614       }
615       OS << '\n';
616     }
617 
618     // In CFG_Finalized state we can miscalculate CFI state at exit.
619     if (CurrentState == State::CFG) {
620       const int32_t CFIStateAtExit = BB->getCFIStateAtExit();
621       if (CFIStateAtExit >= 0)
622         OS << "  CFI State: " << CFIStateAtExit << '\n';
623     }
624 
625     OS << '\n';
626   }
627 
628   // Dump new exception ranges for the function.
629   if (!CallSites.empty()) {
630     OS << "EH table:\n";
631     for (const CallSite &CSI : CallSites) {
632       OS << "  [" << *CSI.Start << ", " << *CSI.End << ") landing pad : ";
633       if (CSI.LP)
634         OS << *CSI.LP;
635       else
636         OS << "0";
637       OS << ", action : " << CSI.Action << '\n';
638     }
639     OS << '\n';
640   }
641 
642   // Print all jump tables.
643   for (const std::pair<const uint64_t, JumpTable *> &JTI : JumpTables) {
644     JTI.second->print(OS);
645   }
646 
647   OS << "DWARF CFI Instructions:\n";
648   if (OffsetToCFI.size()) {
649     // Pre-buildCFG information
650     for (const std::pair<const uint32_t, uint32_t> &Elmt : OffsetToCFI) {
651       OS << format("    %08x:\t", Elmt.first);
652       assert(Elmt.second < FrameInstructions.size() && "Incorrect CFI offset");
653       BinaryContext::printCFI(OS, FrameInstructions[Elmt.second]);
654       OS << "\n";
655     }
656   } else {
657     // Post-buildCFG information
658     for (uint32_t I = 0, E = FrameInstructions.size(); I != E; ++I) {
659       const MCCFIInstruction &CFI = FrameInstructions[I];
660       OS << format("    %d:\t", I);
661       BinaryContext::printCFI(OS, CFI);
662       OS << "\n";
663     }
664   }
665   if (FrameInstructions.empty())
666     OS << "    <empty>\n";
667 
668   OS << "End of Function \"" << *this << "\"\n\n";
669 }
670 
671 void BinaryFunction::printRelocations(raw_ostream &OS,
672                                       uint64_t Offset,
673                                       uint64_t Size) const {
674   const char *Sep = " # Relocs: ";
675 
676   auto RI = Relocations.lower_bound(Offset);
677   while (RI != Relocations.end() && RI->first < Offset + Size) {
678     OS << Sep << "(R: " << RI->second << ")";
679     Sep = ", ";
680     ++RI;
681   }
682 }
683 
684 namespace {
685 std::string mutateDWARFExpressionTargetReg(const MCCFIInstruction &Instr,
686                                            MCPhysReg NewReg) {
687   StringRef ExprBytes = Instr.getValues();
688   assert(ExprBytes.size() > 1 && "DWARF expression CFI is too short");
689   uint8_t Opcode = ExprBytes[0];
690   assert((Opcode == dwarf::DW_CFA_expression ||
691           Opcode == dwarf::DW_CFA_val_expression) &&
692          "invalid DWARF expression CFI");
693   const uint8_t *const Start =
694       reinterpret_cast<const uint8_t *>(ExprBytes.drop_front(1).data());
695   const uint8_t *const End =
696       reinterpret_cast<const uint8_t *>(Start + ExprBytes.size() - 1);
697   unsigned Size = 0;
698   decodeULEB128(Start, &Size, End);
699   assert(Size > 0 && "Invalid reg encoding for DWARF expression CFI");
700   SmallString<8> Tmp;
701   raw_svector_ostream OSE(Tmp);
702   encodeULEB128(NewReg, OSE);
703   return Twine(ExprBytes.slice(0, 1))
704       .concat(OSE.str())
705       .concat(ExprBytes.drop_front(1 + Size))
706       .str();
707 }
708 } // namespace
709 
710 void BinaryFunction::mutateCFIRegisterFor(const MCInst &Instr,
711                                           MCPhysReg NewReg) {
712   const MCCFIInstruction *OldCFI = getCFIFor(Instr);
713   assert(OldCFI && "invalid CFI instr");
714   switch (OldCFI->getOperation()) {
715   default:
716     llvm_unreachable("Unexpected instruction");
717   case MCCFIInstruction::OpDefCfa:
718     setCFIFor(Instr, MCCFIInstruction::cfiDefCfa(nullptr, NewReg,
719                                                  OldCFI->getOffset()));
720     break;
721   case MCCFIInstruction::OpDefCfaRegister:
722     setCFIFor(Instr, MCCFIInstruction::createDefCfaRegister(nullptr, NewReg));
723     break;
724   case MCCFIInstruction::OpOffset:
725     setCFIFor(Instr, MCCFIInstruction::createOffset(nullptr, NewReg,
726                                                     OldCFI->getOffset()));
727     break;
728   case MCCFIInstruction::OpRegister:
729     setCFIFor(Instr, MCCFIInstruction::createRegister(nullptr, NewReg,
730                                                       OldCFI->getRegister2()));
731     break;
732   case MCCFIInstruction::OpSameValue:
733     setCFIFor(Instr, MCCFIInstruction::createSameValue(nullptr, NewReg));
734     break;
735   case MCCFIInstruction::OpEscape:
736     setCFIFor(Instr,
737               MCCFIInstruction::createEscape(
738                   nullptr,
739                   StringRef(mutateDWARFExpressionTargetReg(*OldCFI, NewReg))));
740     break;
741   case MCCFIInstruction::OpRestore:
742     setCFIFor(Instr, MCCFIInstruction::createRestore(nullptr, NewReg));
743     break;
744   case MCCFIInstruction::OpUndefined:
745     setCFIFor(Instr, MCCFIInstruction::createUndefined(nullptr, NewReg));
746     break;
747   }
748 }
749 
750 const MCCFIInstruction *BinaryFunction::mutateCFIOffsetFor(const MCInst &Instr,
751                                                            int64_t NewOffset) {
752   const MCCFIInstruction *OldCFI = getCFIFor(Instr);
753   assert(OldCFI && "invalid CFI instr");
754   switch (OldCFI->getOperation()) {
755   default:
756     llvm_unreachable("Unexpected instruction");
757   case MCCFIInstruction::OpDefCfaOffset:
758     setCFIFor(Instr, MCCFIInstruction::cfiDefCfaOffset(nullptr, NewOffset));
759     break;
760   case MCCFIInstruction::OpAdjustCfaOffset:
761     setCFIFor(Instr,
762               MCCFIInstruction::createAdjustCfaOffset(nullptr, NewOffset));
763     break;
764   case MCCFIInstruction::OpDefCfa:
765     setCFIFor(Instr, MCCFIInstruction::cfiDefCfa(nullptr, OldCFI->getRegister(),
766                                                  NewOffset));
767     break;
768   case MCCFIInstruction::OpOffset:
769     setCFIFor(Instr, MCCFIInstruction::createOffset(
770                          nullptr, OldCFI->getRegister(), NewOffset));
771     break;
772   }
773   return getCFIFor(Instr);
774 }
775 
776 IndirectBranchType
777 BinaryFunction::processIndirectBranch(MCInst &Instruction,
778                                       unsigned Size,
779                                       uint64_t Offset,
780                                       uint64_t &TargetAddress) {
781   const unsigned PtrSize = BC.AsmInfo->getCodePointerSize();
782 
783   // The instruction referencing memory used by the branch instruction.
784   // It could be the branch instruction itself or one of the instructions
785   // setting the value of the register used by the branch.
786   MCInst *MemLocInstr;
787 
788   // Address of the table referenced by MemLocInstr. Could be either an
789   // array of function pointers, or a jump table.
790   uint64_t ArrayStart = 0;
791 
792   unsigned BaseRegNum, IndexRegNum;
793   int64_t DispValue;
794   const MCExpr *DispExpr;
795 
796   // In AArch, identify the instruction adding the PC-relative offset to
797   // jump table entries to correctly decode it.
798   MCInst *PCRelBaseInstr;
799   uint64_t PCRelAddr = 0;
800 
801   auto Begin = Instructions.begin();
802   if (BC.isAArch64()) {
803     PreserveNops = BC.HasRelocations;
804     // Start at the last label as an approximation of the current basic block.
805     // This is a heuristic, since the full set of labels have yet to be
806     // determined
807     for (auto LI = Labels.rbegin(); LI != Labels.rend(); ++LI) {
808       auto II = Instructions.find(LI->first);
809       if (II != Instructions.end()) {
810         Begin = II;
811         break;
812       }
813     }
814   }
815 
816   IndirectBranchType BranchType =
817     BC.MIB->analyzeIndirectBranch(Instruction,
818                                   Begin,
819                                   Instructions.end(),
820                                   PtrSize,
821                                   MemLocInstr,
822                                   BaseRegNum,
823                                   IndexRegNum,
824                                   DispValue,
825                                   DispExpr,
826                                   PCRelBaseInstr);
827 
828   if (BranchType == IndirectBranchType::UNKNOWN && !MemLocInstr)
829     return BranchType;
830 
831   if (MemLocInstr != &Instruction)
832     IndexRegNum = BC.MIB->getNoRegister();
833 
834   if (BC.isAArch64()) {
835     const MCSymbol *Sym = BC.MIB->getTargetSymbol(*PCRelBaseInstr, 1);
836     assert(Sym && "Symbol extraction failed");
837     ErrorOr<uint64_t> SymValueOrError = BC.getSymbolValue(*Sym);
838     if (SymValueOrError) {
839       PCRelAddr = *SymValueOrError;
840     } else {
841       for (std::pair<const uint32_t, MCSymbol *> &Elmt : Labels) {
842         if (Elmt.second == Sym) {
843           PCRelAddr = Elmt.first + getAddress();
844           break;
845         }
846       }
847     }
848     uint64_t InstrAddr = 0;
849     for (auto II = Instructions.rbegin(); II != Instructions.rend(); ++II) {
850       if (&II->second == PCRelBaseInstr) {
851         InstrAddr = II->first + getAddress();
852         break;
853       }
854     }
855     assert(InstrAddr != 0 && "instruction not found");
856     // We do this to avoid spurious references to code locations outside this
857     // function (for example, if the indirect jump lives in the last basic
858     // block of the function, it will create a reference to the next function).
859     // This replaces a symbol reference with an immediate.
860     BC.MIB->replaceMemOperandDisp(*PCRelBaseInstr,
861                                   MCOperand::createImm(PCRelAddr - InstrAddr));
862     // FIXME: Disable full jump table processing for AArch64 until we have a
863     // proper way of determining the jump table limits.
864     return IndirectBranchType::UNKNOWN;
865   }
866 
867   // RIP-relative addressing should be converted to symbol form by now
868   // in processed instructions (but not in jump).
869   if (DispExpr) {
870     const MCSymbol *TargetSym;
871     uint64_t TargetOffset;
872     std::tie(TargetSym, TargetOffset) = BC.MIB->getTargetSymbolInfo(DispExpr);
873     ErrorOr<uint64_t> SymValueOrError = BC.getSymbolValue(*TargetSym);
874     assert(SymValueOrError && "global symbol needs a value");
875     ArrayStart = *SymValueOrError + TargetOffset;
876     BaseRegNum = BC.MIB->getNoRegister();
877     if (BC.isAArch64()) {
878       ArrayStart &= ~0xFFFULL;
879       ArrayStart += DispValue & 0xFFFULL;
880     }
881   } else {
882     ArrayStart = static_cast<uint64_t>(DispValue);
883   }
884 
885   if (BaseRegNum == BC.MRI->getProgramCounter())
886     ArrayStart += getAddress() + Offset + Size;
887 
888   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addressed memory is 0x"
889                     << Twine::utohexstr(ArrayStart) << '\n');
890 
891   ErrorOr<BinarySection &> Section = BC.getSectionForAddress(ArrayStart);
892   if (!Section) {
893     // No section - possibly an absolute address. Since we don't allow
894     // internal function addresses to escape the function scope - we
895     // consider it a tail call.
896     if (opts::Verbosity >= 1) {
897       errs() << "BOLT-WARNING: no section for address 0x"
898              << Twine::utohexstr(ArrayStart) << " referenced from function "
899              << *this << '\n';
900     }
901     return IndirectBranchType::POSSIBLE_TAIL_CALL;
902   }
903   if (Section->isVirtual()) {
904     // The contents are filled at runtime.
905     return IndirectBranchType::POSSIBLE_TAIL_CALL;
906   }
907 
908   if (BranchType == IndirectBranchType::POSSIBLE_FIXED_BRANCH) {
909     ErrorOr<uint64_t> Value = BC.getPointerAtAddress(ArrayStart);
910     if (!Value)
911       return IndirectBranchType::UNKNOWN;
912 
913     if (!BC.getSectionForAddress(ArrayStart)->isReadOnly())
914       return IndirectBranchType::UNKNOWN;
915 
916     outs() << "BOLT-INFO: fixed indirect branch detected in " << *this
917            << " at 0x" << Twine::utohexstr(getAddress() + Offset)
918            << " referencing data at 0x" << Twine::utohexstr(ArrayStart)
919            << " the destination value is 0x" << Twine::utohexstr(*Value)
920            << '\n';
921 
922     TargetAddress = *Value;
923     return BranchType;
924   }
925 
926   // Check if there's already a jump table registered at this address.
927   MemoryContentsType MemType;
928   if (JumpTable *JT = BC.getJumpTableContainingAddress(ArrayStart)) {
929     switch (JT->Type) {
930     case JumpTable::JTT_NORMAL:
931       MemType = MemoryContentsType::POSSIBLE_JUMP_TABLE;
932       break;
933     case JumpTable::JTT_PIC:
934       MemType = MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
935       break;
936     }
937   } else {
938     MemType = BC.analyzeMemoryAt(ArrayStart, *this);
939   }
940 
941   // Check that jump table type in instruction pattern matches memory contents.
942   JumpTable::JumpTableType JTType;
943   if (BranchType == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
944     if (MemType != MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
945       return IndirectBranchType::UNKNOWN;
946     JTType = JumpTable::JTT_PIC;
947   } else {
948     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
949       return IndirectBranchType::UNKNOWN;
950 
951     if (MemType == MemoryContentsType::UNKNOWN)
952       return IndirectBranchType::POSSIBLE_TAIL_CALL;
953 
954     BranchType = IndirectBranchType::POSSIBLE_JUMP_TABLE;
955     JTType = JumpTable::JTT_NORMAL;
956   }
957 
958   // Convert the instruction into jump table branch.
959   const MCSymbol *JTLabel = BC.getOrCreateJumpTable(*this, ArrayStart, JTType);
960   BC.MIB->replaceMemOperandDisp(*MemLocInstr, JTLabel, BC.Ctx.get());
961   BC.MIB->setJumpTable(Instruction, ArrayStart, IndexRegNum);
962 
963   JTSites.emplace_back(Offset, ArrayStart);
964 
965   return BranchType;
966 }
967 
968 MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address,
969                                                 bool CreatePastEnd) {
970   const uint64_t Offset = Address - getAddress();
971 
972   if ((Offset == getSize()) && CreatePastEnd)
973     return getFunctionEndLabel();
974 
975   auto LI = Labels.find(Offset);
976   if (LI != Labels.end())
977     return LI->second;
978 
979   // For AArch64, check if this address is part of a constant island.
980   if (BC.isAArch64()) {
981     if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address)) {
982       return IslandSym;
983     }
984   }
985 
986   MCSymbol *Label = BC.Ctx->createNamedTempSymbol();
987   Labels[Offset] = Label;
988 
989   return Label;
990 }
991 
992 ErrorOr<ArrayRef<uint8_t>> BinaryFunction::getData() const {
993   BinarySection &Section = *getOriginSection();
994   assert(Section.containsRange(getAddress(), getMaxSize()) &&
995          "wrong section for function");
996 
997   if (!Section.isText() || Section.isVirtual() || !Section.getSize()) {
998     return std::make_error_code(std::errc::bad_address);
999   }
1000 
1001   StringRef SectionContents = Section.getContents();
1002 
1003   assert(SectionContents.size() == Section.getSize() &&
1004          "section size mismatch");
1005 
1006   // Function offset from the section start.
1007   uint64_t Offset = getAddress() - Section.getAddress();
1008   auto *Bytes = reinterpret_cast<const uint8_t *>(SectionContents.data());
1009   return ArrayRef<uint8_t>(Bytes + Offset, getMaxSize());
1010 }
1011 
1012 size_t BinaryFunction::getSizeOfDataInCodeAt(uint64_t Offset) const {
1013   if (!Islands)
1014     return 0;
1015 
1016   if (Islands->DataOffsets.find(Offset) == Islands->DataOffsets.end())
1017     return 0;
1018 
1019   auto Iter = Islands->CodeOffsets.upper_bound(Offset);
1020   if (Iter != Islands->CodeOffsets.end()) {
1021     return *Iter - Offset;
1022   }
1023   return getSize() - Offset;
1024 }
1025 
1026 bool BinaryFunction::isZeroPaddingAt(uint64_t Offset) const {
1027   ArrayRef<uint8_t> FunctionData = *getData();
1028   uint64_t EndOfCode = getSize();
1029   if (Islands) {
1030     auto Iter = Islands->DataOffsets.upper_bound(Offset);
1031     if (Iter != Islands->DataOffsets.end())
1032       EndOfCode = *Iter;
1033   }
1034   for (uint64_t I = Offset; I < EndOfCode; ++I) {
1035     if (FunctionData[I] != 0) {
1036       return false;
1037     }
1038   }
1039 
1040   return true;
1041 }
1042 
1043 bool BinaryFunction::disassemble() {
1044   NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
1045                      "Build Binary Functions", opts::TimeBuild);
1046   ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
1047   assert(ErrorOrFunctionData && "function data is not available");
1048   ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
1049   assert(FunctionData.size() == getMaxSize() &&
1050          "function size does not match raw data size");
1051 
1052   auto &Ctx = BC.Ctx;
1053   auto &MIB = BC.MIB;
1054 
1055   // Insert a label at the beginning of the function. This will be our first
1056   // basic block.
1057   Labels[0] = Ctx->createNamedTempSymbol("BB0");
1058 
1059   auto handlePCRelOperand =
1060       [&](MCInst &Instruction, uint64_t Address, uint64_t Size) {
1061     uint64_t TargetAddress = 0;
1062     if (!MIB->evaluateMemOperandTarget(Instruction, TargetAddress, Address,
1063                                        Size)) {
1064       errs() << "BOLT-ERROR: PC-relative operand can't be evaluated:\n";
1065       BC.InstPrinter->printInst(&Instruction, 0, "", *BC.STI, errs());
1066       errs() << '\n';
1067       Instruction.dump_pretty(errs(), BC.InstPrinter.get());
1068       errs() << '\n';
1069       errs() << "BOLT-ERROR: cannot handle PC-relative operand at 0x"
1070              << Twine::utohexstr(Address) << ". Skipping function " << *this
1071              << ".\n";
1072       if (BC.HasRelocations)
1073         exit(1);
1074       IsSimple = false;
1075       return;
1076     }
1077     if (TargetAddress == 0 && opts::Verbosity >= 1) {
1078       outs() << "BOLT-INFO: PC-relative operand is zero in function " << *this
1079              << '\n';
1080     }
1081 
1082     const MCSymbol *TargetSymbol;
1083     uint64_t TargetOffset;
1084     std::tie(TargetSymbol, TargetOffset) =
1085       BC.handleAddressRef(TargetAddress, *this, /*IsPCRel*/ true);
1086     const MCExpr *Expr = MCSymbolRefExpr::create(TargetSymbol,
1087                                                  MCSymbolRefExpr::VK_None,
1088                                                  *BC.Ctx);
1089     if (TargetOffset) {
1090       const MCConstantExpr *Offset =
1091           MCConstantExpr::create(TargetOffset, *BC.Ctx);
1092       Expr = MCBinaryExpr::createAdd(Expr, Offset, *BC.Ctx);
1093     }
1094     MIB->replaceMemOperandDisp(
1095         Instruction, MCOperand::createExpr(BC.MIB->getTargetExprFor(
1096                          Instruction,
1097                          Expr,
1098                          *BC.Ctx, 0)));
1099   };
1100 
1101   // Used to fix the target of linker-generated AArch64 stubs with no relocation
1102   // info
1103   auto fixStubTarget = [&](MCInst &LoadLowBits, MCInst &LoadHiBits,
1104                            uint64_t Target) {
1105     const MCSymbol *TargetSymbol;
1106     uint64_t Addend = 0;
1107     std::tie(TargetSymbol, Addend) = BC.handleAddressRef(Target, *this, true);
1108 
1109     int64_t Val;
1110     MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(),
1111                                  Val, ELF::R_AARCH64_ADR_PREL_PG_HI21);
1112     MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
1113                                  Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
1114   };
1115 
1116   auto handleExternalReference = [&](MCInst &Instruction, uint64_t Size,
1117                                      uint64_t Offset, uint64_t TargetAddress,
1118                                      bool &IsCall) -> MCSymbol * {
1119     const bool IsCondBranch = MIB->isConditionalBranch(Instruction);
1120     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1121     MCSymbol *TargetSymbol = nullptr;
1122     InterproceduralReferences.insert(TargetAddress);
1123     if (opts::Verbosity >= 2 && !IsCall && Size == 2 && !BC.HasRelocations) {
1124       errs() << "BOLT-WARNING: relaxed tail call detected at 0x"
1125              << Twine::utohexstr(AbsoluteInstrAddr) << " in function " << *this
1126              << ". Code size will be increased.\n";
1127     }
1128 
1129     assert(!MIB->isTailCall(Instruction) &&
1130            "synthetic tail call instruction found");
1131 
1132     // This is a call regardless of the opcode.
1133     // Assign proper opcode for tail calls, so that they could be
1134     // treated as calls.
1135     if (!IsCall) {
1136       if (!MIB->convertJmpToTailCall(Instruction)) {
1137         assert(IsCondBranch && "unknown tail call instruction");
1138         if (opts::Verbosity >= 2) {
1139           errs() << "BOLT-WARNING: conditional tail call detected in "
1140                  << "function " << *this << " at 0x"
1141                  << Twine::utohexstr(AbsoluteInstrAddr) << ".\n";
1142         }
1143       }
1144       IsCall = true;
1145     }
1146 
1147     TargetSymbol = BC.getOrCreateGlobalSymbol(TargetAddress, "FUNCat");
1148     if (opts::Verbosity >= 2 && TargetAddress == 0) {
1149       // We actually see calls to address 0 in presence of weak
1150       // symbols originating from libraries. This code is never meant
1151       // to be executed.
1152       outs() << "BOLT-INFO: Function " << *this
1153              << " has a call to address zero.\n";
1154     }
1155 
1156     return TargetSymbol;
1157   };
1158 
1159   auto handleIndirectBranch = [&](MCInst &Instruction, uint64_t Size,
1160                                   uint64_t Offset) {
1161     uint64_t IndirectTarget = 0;
1162     IndirectBranchType Result =
1163         processIndirectBranch(Instruction, Size, Offset, IndirectTarget);
1164     switch (Result) {
1165     default:
1166       llvm_unreachable("unexpected result");
1167     case IndirectBranchType::POSSIBLE_TAIL_CALL: {
1168       bool Result = MIB->convertJmpToTailCall(Instruction);
1169       (void)Result;
1170       assert(Result);
1171       break;
1172     }
1173     case IndirectBranchType::POSSIBLE_JUMP_TABLE:
1174     case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE:
1175       if (opts::JumpTables == JTS_NONE)
1176         IsSimple = false;
1177       break;
1178     case IndirectBranchType::POSSIBLE_FIXED_BRANCH: {
1179       if (containsAddress(IndirectTarget)) {
1180         const MCSymbol *TargetSymbol = getOrCreateLocalLabel(IndirectTarget);
1181         Instruction.clear();
1182         MIB->createUncondBranch(Instruction, TargetSymbol, BC.Ctx.get());
1183         TakenBranches.emplace_back(Offset, IndirectTarget - getAddress());
1184         HasFixedIndirectBranch = true;
1185       } else {
1186         MIB->convertJmpToTailCall(Instruction);
1187         InterproceduralReferences.insert(IndirectTarget);
1188       }
1189       break;
1190     }
1191     case IndirectBranchType::UNKNOWN:
1192       // Keep processing. We'll do more checks and fixes in
1193       // postProcessIndirectBranches().
1194       UnknownIndirectBranchOffsets.emplace(Offset);
1195       break;
1196     }
1197   };
1198 
1199   // Check for linker veneers, which lack relocations and need manual
1200   // adjustments.
1201   auto handleAArch64IndirectCall = [&](MCInst &Instruction, uint64_t Offset) {
1202     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1203     MCInst *TargetHiBits, *TargetLowBits;
1204     uint64_t TargetAddress;
1205     if (MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1206                                AbsoluteInstrAddr, Instruction, TargetHiBits,
1207                                TargetLowBits, TargetAddress)) {
1208       MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1209 
1210       uint8_t Counter = 0;
1211       for (auto It = std::prev(Instructions.end()); Counter != 2;
1212            --It, ++Counter) {
1213         MIB->addAnnotation(It->second, "AArch64Veneer", true);
1214       }
1215 
1216       fixStubTarget(*TargetLowBits, *TargetHiBits, TargetAddress);
1217     }
1218   };
1219 
1220   uint64_t Size = 0;  // instruction size
1221   for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
1222     MCInst Instruction;
1223     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1224 
1225     // Check for data inside code and ignore it
1226     if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
1227       Size = DataInCodeSize;
1228       continue;
1229     }
1230 
1231     if (!BC.DisAsm->getInstruction(Instruction,
1232                                    Size,
1233                                    FunctionData.slice(Offset),
1234                                    AbsoluteInstrAddr,
1235                                    nulls())) {
1236       // Functions with "soft" boundaries, e.g. coming from assembly source,
1237       // can have 0-byte padding at the end.
1238       if (isZeroPaddingAt(Offset))
1239         break;
1240 
1241       errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
1242              << Twine::utohexstr(Offset) << " (address 0x"
1243              << Twine::utohexstr(AbsoluteInstrAddr) << ") in function "
1244              << *this << '\n';
1245       // Some AVX-512 instructions could not be disassembled at all.
1246       if (BC.HasRelocations && opts::TrapOnAVX512 && BC.isX86()) {
1247         setTrapOnEntry();
1248         BC.TrappedFunctions.push_back(this);
1249       } else {
1250         setIgnored();
1251       }
1252 
1253       break;
1254     }
1255 
1256     // Check integrity of LLVM assembler/disassembler.
1257     if (opts::CheckEncoding && !BC.MIB->isBranch(Instruction) &&
1258         !BC.MIB->isCall(Instruction) && !BC.MIB->isNoop(Instruction)) {
1259       if (!BC.validateEncoding(Instruction, FunctionData.slice(Offset, Size))) {
1260         errs() << "BOLT-WARNING: mismatching LLVM encoding detected in "
1261                << "function " << *this << " for instruction :\n";
1262         BC.printInstruction(errs(), Instruction, AbsoluteInstrAddr);
1263         errs() << '\n';
1264       }
1265     }
1266 
1267     // Special handling for AVX-512 instructions.
1268     if (MIB->hasEVEXEncoding(Instruction)) {
1269       if (BC.HasRelocations && opts::TrapOnAVX512) {
1270         setTrapOnEntry();
1271         BC.TrappedFunctions.push_back(this);
1272         break;
1273       }
1274 
1275       // Check if our disassembly is correct and matches the assembler output.
1276       if (!BC.validateEncoding(Instruction, FunctionData.slice(Offset, Size))) {
1277         if (opts::Verbosity >= 1) {
1278           errs() << "BOLT-WARNING: internal assembler/disassembler error "
1279                     "detected for AVX512 instruction:\n";
1280           BC.printInstruction(errs(), Instruction, AbsoluteInstrAddr);
1281           errs() << " in function " << *this << '\n';
1282         }
1283 
1284         setIgnored();
1285         break;
1286       }
1287     }
1288 
1289     // Check if there's a relocation associated with this instruction.
1290     bool UsedReloc = false;
1291     for (auto Itr = Relocations.lower_bound(Offset),
1292          ItrE = Relocations.lower_bound(Offset + Size); Itr != ItrE; ++Itr) {
1293       const Relocation &Relocation = Itr->second;
1294 
1295       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: replacing immediate 0x"
1296                         << Twine::utohexstr(Relocation.Value)
1297                         << " with relocation"
1298                            " against "
1299                         << Relocation.Symbol << "+" << Relocation.Addend
1300                         << " in function " << *this
1301                         << " for instruction at offset 0x"
1302                         << Twine::utohexstr(Offset) << '\n');
1303 
1304       // Process reference to the primary symbol.
1305       if (!Relocation.isPCRelative())
1306         BC.handleAddressRef(Relocation.Value - Relocation.Addend,
1307                             *this,
1308                             /*IsPCRel*/ false);
1309 
1310       int64_t Value = Relocation.Value;
1311       const bool Result = BC.MIB->replaceImmWithSymbolRef(
1312           Instruction, Relocation.Symbol, Relocation.Addend, Ctx.get(), Value,
1313           Relocation.Type);
1314       (void)Result;
1315       assert(Result && "cannot replace immediate with relocation");
1316 
1317       // For aarch, if we replaced an immediate with a symbol from a
1318       // relocation, we mark it so we do not try to further process a
1319       // pc-relative operand. All we need is the symbol.
1320       if (BC.isAArch64())
1321         UsedReloc = true;
1322 
1323       // Make sure we replaced the correct immediate (instruction
1324       // can have multiple immediate operands).
1325       if (BC.isX86()) {
1326         assert(truncateToSize(static_cast<uint64_t>(Value),
1327                               Relocation::getSizeForType(Relocation.Type)) ==
1328                truncateToSize(Relocation.Value,
1329                               Relocation::getSizeForType(Relocation.Type)) &&
1330                                 "immediate value mismatch in function");
1331       }
1332     }
1333 
1334     // Convert instruction to a shorter version that could be relaxed if
1335     // needed.
1336     MIB->shortenInstruction(Instruction);
1337 
1338     if (MIB->isBranch(Instruction) || MIB->isCall(Instruction)) {
1339       uint64_t TargetAddress = 0;
1340       if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
1341                               TargetAddress)) {
1342         // Check if the target is within the same function. Otherwise it's
1343         // a call, possibly a tail call.
1344         //
1345         // If the target *is* the function address it could be either a branch
1346         // or a recursive call.
1347         bool IsCall = MIB->isCall(Instruction);
1348         const bool IsCondBranch = MIB->isConditionalBranch(Instruction);
1349         MCSymbol *TargetSymbol = nullptr;
1350 
1351         if (BC.MIB->isUnsupportedBranch(Instruction.getOpcode())) {
1352           setIgnored();
1353           if (BinaryFunction *TargetFunc =
1354               BC.getBinaryFunctionContainingAddress(TargetAddress))
1355             TargetFunc->setIgnored();
1356         }
1357 
1358         if (IsCall && containsAddress(TargetAddress)) {
1359           if (TargetAddress == getAddress()) {
1360             // Recursive call.
1361             TargetSymbol = getSymbol();
1362           } else {
1363             if (BC.isX86()) {
1364               // Dangerous old-style x86 PIC code. We may need to freeze this
1365               // function, so preserve the function as is for now.
1366               PreserveNops = true;
1367             } else {
1368               errs() << "BOLT-WARNING: internal call detected at 0x"
1369                      << Twine::utohexstr(AbsoluteInstrAddr) << " in function "
1370                      << *this << ". Skipping.\n";
1371               IsSimple = false;
1372             }
1373           }
1374         }
1375 
1376         if (!TargetSymbol) {
1377           // Create either local label or external symbol.
1378           if (containsAddress(TargetAddress)) {
1379             TargetSymbol = getOrCreateLocalLabel(TargetAddress);
1380           } else {
1381             if (TargetAddress == getAddress() + getSize() &&
1382                 TargetAddress < getAddress() + getMaxSize()) {
1383               // Result of __builtin_unreachable().
1384               LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump past end detected at 0x"
1385                                 << Twine::utohexstr(AbsoluteInstrAddr)
1386                                 << " in function " << *this
1387                                 << " : replacing with nop.\n");
1388               BC.MIB->createNoop(Instruction);
1389               if (IsCondBranch) {
1390                 // Register branch offset for profile validation.
1391                 IgnoredBranches.emplace_back(Offset, Offset + Size);
1392               }
1393               goto add_instruction;
1394             }
1395             // May update Instruction and IsCall
1396             TargetSymbol = handleExternalReference(Instruction, Size, Offset,
1397                                                    TargetAddress, IsCall);
1398           }
1399         }
1400 
1401         if (!IsCall) {
1402           // Add taken branch info.
1403           TakenBranches.emplace_back(Offset, TargetAddress - getAddress());
1404         }
1405         BC.MIB->replaceBranchTarget(Instruction, TargetSymbol, &*Ctx);
1406 
1407         // Mark CTC.
1408         if (IsCondBranch && IsCall) {
1409           MIB->setConditionalTailCall(Instruction, TargetAddress);
1410         }
1411       } else {
1412         // Could not evaluate branch. Should be an indirect call or an
1413         // indirect branch. Bail out on the latter case.
1414         if (MIB->isIndirectBranch(Instruction))
1415           handleIndirectBranch(Instruction, Size, Offset);
1416         // Indirect call. We only need to fix it if the operand is RIP-relative.
1417         if (IsSimple && MIB->hasPCRelOperand(Instruction))
1418           handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size);
1419 
1420         if (BC.isAArch64())
1421           handleAArch64IndirectCall(Instruction, Offset);
1422       }
1423     } else if (MIB->hasPCRelOperand(Instruction) && !UsedReloc)
1424       handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size);
1425 
1426 add_instruction:
1427     if (getDWARFLineTable()) {
1428       Instruction.setLoc(
1429           findDebugLineInformationForInstructionAt(AbsoluteInstrAddr,
1430                                                    getDWARFUnit(),
1431                                                    getDWARFLineTable()));
1432     }
1433 
1434     // Record offset of the instruction for profile matching.
1435     if (BC.keepOffsetForInstruction(Instruction)) {
1436       MIB->addAnnotation(Instruction, "Offset", static_cast<uint32_t>(Offset));
1437     }
1438 
1439     addInstruction(Offset, std::move(Instruction));
1440   }
1441 
1442   clearList(Relocations);
1443 
1444   if (!IsSimple) {
1445     clearList(Instructions);
1446     return false;
1447   }
1448 
1449   updateState(State::Disassembled);
1450 
1451   return true;
1452 }
1453 
1454 bool BinaryFunction::scanExternalRefs() {
1455   bool Success = true;
1456   bool DisassemblyFailed = false;
1457 
1458   // Ignore pseudo functions.
1459   if (isPseudo())
1460     return Success;
1461 
1462   if (opts::NoScan) {
1463     clearList(Relocations);
1464     clearList(ExternallyReferencedOffsets);
1465 
1466     return false;
1467   }
1468 
1469   // List of external references for this function.
1470   std::vector<Relocation> FunctionRelocations;
1471 
1472   static BinaryContext::IndependentCodeEmitter Emitter =
1473       BC.createIndependentMCCodeEmitter();
1474 
1475   ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
1476   assert(ErrorOrFunctionData && "function data is not available");
1477   ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
1478   assert(FunctionData.size() == getMaxSize() &&
1479          "function size does not match raw data size");
1480 
1481   uint64_t Size = 0;  // instruction size
1482   for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
1483     // Check for data inside code and ignore it
1484     if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
1485       Size = DataInCodeSize;
1486       continue;
1487     }
1488 
1489     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1490     MCInst Instruction;
1491     if (!BC.DisAsm->getInstruction(Instruction,
1492                                    Size,
1493                                    FunctionData.slice(Offset),
1494                                    AbsoluteInstrAddr,
1495                                    nulls())) {
1496       if (opts::Verbosity >= 1 && !isZeroPaddingAt(Offset)) {
1497         errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
1498                << Twine::utohexstr(Offset) << " (address 0x"
1499                << Twine::utohexstr(AbsoluteInstrAddr) << ") in function "
1500                << *this << '\n';
1501       }
1502       Success = false;
1503       DisassemblyFailed = true;
1504       break;
1505     }
1506 
1507     // Return true if we can skip handling the Target function reference.
1508     auto ignoreFunctionRef = [&](const BinaryFunction &Target) {
1509       if (&Target == this)
1510         return true;
1511 
1512       // Note that later we may decide not to emit Target function. In that
1513       // case, we conservatively create references that will be ignored or
1514       // resolved to the same function.
1515       if (!BC.shouldEmit(Target))
1516         return true;
1517 
1518       return false;
1519     };
1520 
1521     // Return true if we can ignore reference to the symbol.
1522     auto ignoreReference = [&](const MCSymbol *TargetSymbol) {
1523       if (!TargetSymbol)
1524         return true;
1525 
1526       if (BC.forceSymbolRelocations(TargetSymbol->getName()))
1527         return false;
1528 
1529       BinaryFunction *TargetFunction = BC.getFunctionForSymbol(TargetSymbol);
1530       if (!TargetFunction)
1531         return true;
1532 
1533       return ignoreFunctionRef(*TargetFunction);
1534     };
1535 
1536     // Detect if the instruction references an address.
1537     // Without relocations, we can only trust PC-relative address modes.
1538     uint64_t TargetAddress = 0;
1539     bool IsPCRel = false;
1540     bool IsBranch = false;
1541     if (BC.MIB->hasPCRelOperand(Instruction)) {
1542       if (BC.MIB->evaluateMemOperandTarget(Instruction, TargetAddress,
1543                                            AbsoluteInstrAddr, Size)) {
1544         IsPCRel = true;
1545       }
1546     } else if (BC.MIB->isCall(Instruction) || BC.MIB->isBranch(Instruction)) {
1547       if (BC.MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
1548                                  TargetAddress)) {
1549         IsBranch = true;
1550       }
1551     }
1552 
1553     MCSymbol *TargetSymbol = nullptr;
1554 
1555     // Create an entry point at reference address if needed.
1556     BinaryFunction *TargetFunction =
1557         BC.getBinaryFunctionContainingAddress(TargetAddress);
1558     if (TargetFunction && !ignoreFunctionRef(*TargetFunction)) {
1559       const uint64_t FunctionOffset =
1560         TargetAddress - TargetFunction->getAddress();
1561       TargetSymbol = FunctionOffset
1562                         ? TargetFunction->addEntryPointAtOffset(FunctionOffset)
1563                         : TargetFunction->getSymbol();
1564     }
1565 
1566     // Can't find more references and not creating relocations.
1567     if (!BC.HasRelocations)
1568       continue;
1569 
1570     // Create a relocation against the TargetSymbol as the symbol might get
1571     // moved.
1572     if (TargetSymbol) {
1573       if (IsBranch) {
1574         BC.MIB->replaceBranchTarget(Instruction, TargetSymbol,
1575                                     Emitter.LocalCtx.get());
1576       } else if (IsPCRel) {
1577         const MCExpr *Expr = MCSymbolRefExpr::create(TargetSymbol,
1578                                                      MCSymbolRefExpr::VK_None,
1579                                                      *Emitter.LocalCtx.get());
1580         BC.MIB->replaceMemOperandDisp(
1581             Instruction, MCOperand::createExpr(BC.MIB->getTargetExprFor(
1582                              Instruction,
1583                              Expr,
1584                              *Emitter.LocalCtx.get(), 0)));
1585       }
1586     }
1587 
1588     // Create more relocations based on input file relocations.
1589     bool HasRel = false;
1590     for (auto Itr = Relocations.lower_bound(Offset),
1591          ItrE = Relocations.lower_bound(Offset + Size); Itr != ItrE; ++Itr) {
1592       Relocation &Relocation = Itr->second;
1593       if (ignoreReference(Relocation.Symbol))
1594         continue;
1595 
1596       int64_t Value = Relocation.Value;
1597       const bool Result =
1598         BC.MIB->replaceImmWithSymbolRef(Instruction,
1599                                         Relocation.Symbol,
1600                                         Relocation.Addend,
1601                                         Emitter.LocalCtx.get(),
1602                                         Value,
1603                                         Relocation.Type);
1604       (void)Result;
1605       assert(Result && "cannot replace immediate with relocation");
1606 
1607       HasRel = true;
1608     }
1609 
1610     if (!TargetSymbol && !HasRel)
1611       continue;
1612 
1613     // Emit the instruction using temp emitter and generate relocations.
1614     SmallString<256> Code;
1615     SmallVector<MCFixup, 4> Fixups;
1616     raw_svector_ostream VecOS(Code);
1617     Emitter.MCE->encodeInstruction(Instruction, VecOS, Fixups, *BC.STI);
1618 
1619     // Create relocation for every fixup.
1620     for (const MCFixup &Fixup : Fixups) {
1621       Optional<Relocation> Rel = BC.MIB->createRelocation(Fixup, *BC.MAB);
1622       if (!Rel) {
1623         Success = false;
1624         continue;
1625       }
1626 
1627       if (Relocation::getSizeForType(Rel->Type) < 4) {
1628         // If the instruction uses a short form, then we might not be able
1629         // to handle the rewrite without relaxation, and hence cannot reliably
1630         // create an external reference relocation.
1631         Success = false;
1632         continue;
1633       }
1634       Rel->Offset +=  getAddress() - getOriginSection()->getAddress() + Offset;
1635       FunctionRelocations.push_back(*Rel);
1636     }
1637 
1638     if (!Success)
1639       break;
1640   }
1641 
1642   // Add relocations unless disassembly failed for this function.
1643   if (!DisassemblyFailed) {
1644     for (Relocation &Rel : FunctionRelocations) {
1645       getOriginSection()->addPendingRelocation(Rel);
1646     }
1647   }
1648 
1649   // Inform BinaryContext that this function symbols will not be defined and
1650   // relocations should not be created against them.
1651   if (BC.HasRelocations) {
1652     for (std::pair<const uint32_t, MCSymbol *> &LI : Labels) {
1653       BC.UndefinedSymbols.insert(LI.second);
1654     }
1655     if (FunctionEndLabel) {
1656       BC.UndefinedSymbols.insert(FunctionEndLabel);
1657     }
1658   }
1659 
1660   clearList(Relocations);
1661   clearList(ExternallyReferencedOffsets);
1662 
1663   if (Success && BC.HasRelocations) {
1664     HasExternalRefRelocations = true;
1665   }
1666 
1667   if (opts::Verbosity >= 1 && !Success) {
1668     outs() << "BOLT-INFO: failed to scan refs for  " << *this << '\n';
1669   }
1670 
1671   return Success;
1672 }
1673 
1674 void BinaryFunction::postProcessEntryPoints() {
1675   if (!isSimple())
1676     return;
1677 
1678   for (auto &KV : Labels) {
1679     MCSymbol *Label = KV.second;
1680     if (!getSecondaryEntryPointSymbol(Label))
1681       continue;
1682 
1683     // In non-relocation mode there's potentially an external undetectable
1684     // reference to the entry point and hence we cannot move this entry
1685     // point. Optimizing without moving could be difficult.
1686     if (!BC.HasRelocations)
1687       setSimple(false);
1688 
1689     const uint32_t Offset = KV.first;
1690 
1691     // If we are at Offset 0 and there is no instruction associated with it,
1692     // this means this is an empty function. Just ignore. If we find an
1693     // instruction at this offset, this entry point is valid.
1694     if (!Offset || getInstructionAtOffset(Offset)) {
1695       continue;
1696     }
1697 
1698     // On AArch64 there are legitimate reasons to have references past the
1699     // end of the function, e.g. jump tables.
1700     if (BC.isAArch64() && Offset == getSize()) {
1701       continue;
1702     }
1703 
1704     errs() << "BOLT-WARNING: reference in the middle of instruction "
1705               "detected in function " << *this
1706            << " at offset 0x" << Twine::utohexstr(Offset) << '\n';
1707     if (BC.HasRelocations) {
1708       setIgnored();
1709     }
1710     setSimple(false);
1711     return;
1712   }
1713 }
1714 
1715 void BinaryFunction::postProcessJumpTables() {
1716   // Create labels for all entries.
1717   for (auto &JTI : JumpTables) {
1718     JumpTable &JT = *JTI.second;
1719     if (JT.Type == JumpTable::JTT_PIC && opts::JumpTables == JTS_BASIC) {
1720       opts::JumpTables = JTS_MOVE;
1721       outs() << "BOLT-INFO: forcing -jump-tables=move as PIC jump table was "
1722                 "detected in function " << *this << '\n';
1723     }
1724     for (unsigned I = 0; I < JT.OffsetEntries.size(); ++I) {
1725       MCSymbol *Label =
1726           getOrCreateLocalLabel(getAddress() + JT.OffsetEntries[I],
1727                                 /*CreatePastEnd*/ true);
1728       JT.Entries.push_back(Label);
1729     }
1730 
1731     const uint64_t BDSize =
1732         BC.getBinaryDataAtAddress(JT.getAddress())->getSize();
1733     if (!BDSize) {
1734       BC.setBinaryDataSize(JT.getAddress(), JT.getSize());
1735     } else {
1736       assert(BDSize >= JT.getSize() &&
1737              "jump table cannot be larger than the containing object");
1738     }
1739   }
1740 
1741   // Add TakenBranches from JumpTables.
1742   //
1743   // We want to do it after initial processing since we don't know jump tables'
1744   // boundaries until we process them all.
1745   for (auto &JTSite : JTSites) {
1746     const uint64_t JTSiteOffset = JTSite.first;
1747     const uint64_t JTAddress = JTSite.second;
1748     const JumpTable *JT = getJumpTableContainingAddress(JTAddress);
1749     assert(JT && "cannot find jump table for address");
1750 
1751     uint64_t EntryOffset = JTAddress - JT->getAddress();
1752     while (EntryOffset < JT->getSize()) {
1753       uint64_t TargetOffset = JT->OffsetEntries[EntryOffset / JT->EntrySize];
1754       if (TargetOffset < getSize()) {
1755         TakenBranches.emplace_back(JTSiteOffset, TargetOffset);
1756 
1757         if (opts::StrictMode)
1758           registerReferencedOffset(TargetOffset);
1759       }
1760 
1761       EntryOffset += JT->EntrySize;
1762 
1763       // A label at the next entry means the end of this jump table.
1764       if (JT->Labels.count(EntryOffset))
1765         break;
1766     }
1767   }
1768   clearList(JTSites);
1769 
1770   // Free memory used by jump table offsets.
1771   for (auto &JTI : JumpTables) {
1772     JumpTable &JT = *JTI.second;
1773     clearList(JT.OffsetEntries);
1774   }
1775 
1776   // Conservatively populate all possible destinations for unknown indirect
1777   // branches.
1778   if (opts::StrictMode && hasInternalReference()) {
1779     for (uint64_t Offset : UnknownIndirectBranchOffsets) {
1780       for (uint64_t PossibleDestination : ExternallyReferencedOffsets) {
1781         // Ignore __builtin_unreachable().
1782         if (PossibleDestination == getSize())
1783           continue;
1784         TakenBranches.emplace_back(Offset, PossibleDestination);
1785       }
1786     }
1787   }
1788 
1789   // Remove duplicates branches. We can get a bunch of them from jump tables.
1790   // Without doing jump table value profiling we don't have use for extra
1791   // (duplicate) branches.
1792   std::sort(TakenBranches.begin(), TakenBranches.end());
1793   auto NewEnd = std::unique(TakenBranches.begin(), TakenBranches.end());
1794   TakenBranches.erase(NewEnd, TakenBranches.end());
1795 }
1796 
1797 bool BinaryFunction::postProcessIndirectBranches(
1798     MCPlusBuilder::AllocatorIdTy AllocId) {
1799   auto addUnknownControlFlow = [&](BinaryBasicBlock &BB) {
1800     HasUnknownControlFlow = true;
1801     BB.removeAllSuccessors();
1802     for (uint64_t PossibleDestination : ExternallyReferencedOffsets) {
1803       if (BinaryBasicBlock *SuccBB = getBasicBlockAtOffset(PossibleDestination))
1804         BB.addSuccessor(SuccBB);
1805     }
1806   };
1807 
1808   uint64_t NumIndirectJumps = 0;
1809   MCInst *LastIndirectJump = nullptr;
1810   BinaryBasicBlock *LastIndirectJumpBB = nullptr;
1811   uint64_t LastJT = 0;
1812   uint16_t LastJTIndexReg = BC.MIB->getNoRegister();
1813   for (BinaryBasicBlock *BB : layout()) {
1814     for (MCInst &Instr : *BB) {
1815       if (!BC.MIB->isIndirectBranch(Instr))
1816         continue;
1817 
1818       // If there's an indirect branch in a single-block function -
1819       // it must be a tail call.
1820       if (layout_size() == 1) {
1821         BC.MIB->convertJmpToTailCall(Instr);
1822         return true;
1823       }
1824 
1825       ++NumIndirectJumps;
1826 
1827       if (opts::StrictMode && !hasInternalReference()) {
1828         BC.MIB->convertJmpToTailCall(Instr);
1829         break;
1830       }
1831 
1832       // Validate the tail call or jump table assumptions now that we know
1833       // basic block boundaries.
1834       if (BC.MIB->isTailCall(Instr) || BC.MIB->getJumpTable(Instr)) {
1835         const unsigned PtrSize = BC.AsmInfo->getCodePointerSize();
1836         MCInst *MemLocInstr;
1837         unsigned BaseRegNum, IndexRegNum;
1838         int64_t DispValue;
1839         const MCExpr *DispExpr;
1840         MCInst *PCRelBaseInstr;
1841         IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
1842             Instr, BB->begin(), BB->end(), PtrSize, MemLocInstr, BaseRegNum,
1843             IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
1844         if (Type != IndirectBranchType::UNKNOWN || MemLocInstr != nullptr)
1845           continue;
1846 
1847         if (!opts::StrictMode)
1848           return false;
1849 
1850         if (BC.MIB->isTailCall(Instr)) {
1851           BC.MIB->convertTailCallToJmp(Instr);
1852         } else {
1853           LastIndirectJump = &Instr;
1854           LastIndirectJumpBB = BB;
1855           LastJT = BC.MIB->getJumpTable(Instr);
1856           LastJTIndexReg = BC.MIB->getJumpTableIndexReg(Instr);
1857           BC.MIB->unsetJumpTable(Instr);
1858 
1859           JumpTable *JT = BC.getJumpTableContainingAddress(LastJT);
1860           if (JT->Type == JumpTable::JTT_NORMAL) {
1861             // Invalidating the jump table may also invalidate other jump table
1862             // boundaries. Until we have/need a support for this, mark the
1863             // function as non-simple.
1864             LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejected jump table reference"
1865                               << JT->getName() << " in " << *this << '\n');
1866             return false;
1867           }
1868         }
1869 
1870         addUnknownControlFlow(*BB);
1871         continue;
1872       }
1873 
1874       // If this block contains an epilogue code and has an indirect branch,
1875       // then most likely it's a tail call. Otherwise, we cannot tell for sure
1876       // what it is and conservatively reject the function's CFG.
1877       bool IsEpilogue = false;
1878       for (const MCInst &Instr : *BB) {
1879         if (BC.MIB->isLeave(Instr) || BC.MIB->isPop(Instr)) {
1880           IsEpilogue = true;
1881           break;
1882         }
1883       }
1884       if (IsEpilogue) {
1885         BC.MIB->convertJmpToTailCall(Instr);
1886         BB->removeAllSuccessors();
1887         continue;
1888       }
1889 
1890       if (opts::Verbosity >= 2) {
1891         outs() << "BOLT-INFO: rejected potential indirect tail call in "
1892                << "function " << *this << " in basic block "
1893                << BB->getName() << ".\n";
1894         LLVM_DEBUG(BC.printInstructions(dbgs(), BB->begin(), BB->end(),
1895                                         BB->getOffset(), this, true));
1896       }
1897 
1898       if (!opts::StrictMode)
1899         return false;
1900 
1901       addUnknownControlFlow(*BB);
1902     }
1903   }
1904 
1905   if (HasInternalLabelReference)
1906     return false;
1907 
1908   // If there's only one jump table, and one indirect jump, and no other
1909   // references, then we should be able to derive the jump table even if we
1910   // fail to match the pattern.
1911   if (HasUnknownControlFlow && NumIndirectJumps == 1 &&
1912       JumpTables.size() == 1 && LastIndirectJump) {
1913     BC.MIB->setJumpTable(*LastIndirectJump, LastJT, LastJTIndexReg, AllocId);
1914     HasUnknownControlFlow = false;
1915 
1916     // re-populate successors based on the jump table.
1917     std::set<const MCSymbol *> JTLabels;
1918     LastIndirectJumpBB->removeAllSuccessors();
1919     const JumpTable *JT = getJumpTableContainingAddress(LastJT);
1920     for (const MCSymbol *Label : JT->Entries) {
1921       JTLabels.emplace(Label);
1922     }
1923     for (const MCSymbol *Label : JTLabels) {
1924       BinaryBasicBlock *BB = getBasicBlockForLabel(Label);
1925       // Ignore __builtin_unreachable()
1926       if (!BB) {
1927         assert(Label == getFunctionEndLabel() && "if no BB found, must be end");
1928         continue;
1929       }
1930       LastIndirectJumpBB->addSuccessor(BB);
1931     }
1932   }
1933 
1934   if (HasFixedIndirectBranch)
1935     return false;
1936 
1937   if (HasUnknownControlFlow && !BC.HasRelocations)
1938     return false;
1939 
1940   return true;
1941 }
1942 
1943 void BinaryFunction::recomputeLandingPads() {
1944   updateBBIndices(0);
1945 
1946   for (BinaryBasicBlock *BB : BasicBlocks) {
1947     BB->LandingPads.clear();
1948     BB->Throwers.clear();
1949   }
1950 
1951   for (BinaryBasicBlock *BB : BasicBlocks) {
1952     std::unordered_set<const BinaryBasicBlock *> BBLandingPads;
1953     for (MCInst &Instr : *BB) {
1954       if (!BC.MIB->isInvoke(Instr))
1955         continue;
1956 
1957       const Optional<MCPlus::MCLandingPad> EHInfo = BC.MIB->getEHInfo(Instr);
1958       if (!EHInfo || !EHInfo->first)
1959         continue;
1960 
1961       BinaryBasicBlock *LPBlock = getBasicBlockForLabel(EHInfo->first);
1962       if (!BBLandingPads.count(LPBlock)) {
1963         BBLandingPads.insert(LPBlock);
1964         BB->LandingPads.emplace_back(LPBlock);
1965         LPBlock->Throwers.emplace_back(BB);
1966       }
1967     }
1968   }
1969 }
1970 
1971 bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
1972   auto &MIB = BC.MIB;
1973 
1974   if (!isSimple()) {
1975     assert(!BC.HasRelocations &&
1976            "cannot process file with non-simple function in relocs mode");
1977     return false;
1978   }
1979 
1980   if (CurrentState != State::Disassembled)
1981     return false;
1982 
1983   assert(BasicBlocks.empty() && "basic block list should be empty");
1984   assert((Labels.find(0) != Labels.end()) &&
1985          "first instruction should always have a label");
1986 
1987   // Create basic blocks in the original layout order:
1988   //
1989   //  * Every instruction with associated label marks
1990   //    the beginning of a basic block.
1991   //  * Conditional instruction marks the end of a basic block,
1992   //    except when the following instruction is an
1993   //    unconditional branch, and the unconditional branch is not
1994   //    a destination of another branch. In the latter case, the
1995   //    basic block will consist of a single unconditional branch
1996   //    (missed "double-jump" optimization).
1997   //
1998   // Created basic blocks are sorted in layout order since they are
1999   // created in the same order as instructions, and instructions are
2000   // sorted by offsets.
2001   BinaryBasicBlock *InsertBB = nullptr;
2002   BinaryBasicBlock *PrevBB = nullptr;
2003   bool IsLastInstrNop = false;
2004   uint64_t LastInstrOffset = 0;
2005 
2006   auto addCFIPlaceholders =
2007       [this](uint64_t CFIOffset, BinaryBasicBlock *InsertBB) {
2008         for (auto FI = OffsetToCFI.lower_bound(CFIOffset),
2009                   FE = OffsetToCFI.upper_bound(CFIOffset);
2010              FI != FE; ++FI) {
2011           addCFIPseudo(InsertBB, InsertBB->end(), FI->second);
2012         }
2013       };
2014 
2015   // For profiling purposes we need to save the offset of the last instruction
2016   // in the basic block. But in certain cases we don't if the instruction was
2017   // the last one, and we have to go back and update its offset.
2018   auto updateOffset = [&](uint64_t Offset) {
2019     assert(PrevBB && PrevBB != InsertBB && "invalid previous block");
2020     MCInst *PrevInstr = PrevBB->getLastNonPseudoInstr();
2021     if (PrevInstr && !MIB->hasAnnotation(*PrevInstr, "Offset"))
2022       MIB->addAnnotation(*PrevInstr, "Offset", static_cast<uint32_t>(Offset),
2023                          AllocatorId);
2024   };
2025 
2026   for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) {
2027     const uint32_t Offset = I->first;
2028     MCInst &Instr = I->second;
2029 
2030     auto LI = Labels.find(Offset);
2031     if (LI != Labels.end()) {
2032       // Always create new BB at branch destination.
2033       PrevBB = InsertBB;
2034       InsertBB = addBasicBlock(LI->first, LI->second,
2035                                opts::PreserveBlocksAlignment && IsLastInstrNop);
2036       if (PrevBB)
2037         updateOffset(LastInstrOffset);
2038     }
2039 
2040     const uint64_t InstrInputAddr = I->first + Address;
2041     bool IsSDTMarker =
2042         MIB->isNoop(Instr) && BC.SDTMarkers.count(InstrInputAddr);
2043     bool IsLKMarker = BC.LKMarkers.count(InstrInputAddr);
2044     if (IsSDTMarker || IsLKMarker) {
2045       HasSDTMarker = true;
2046       LLVM_DEBUG(dbgs() << "SDTMarker or LKMarker detected in the input at : "
2047                         << utohexstr(InstrInputAddr) << "\n");
2048       if (!MIB->hasAnnotation(Instr, "Offset")) {
2049         MIB->addAnnotation(Instr, "Offset", static_cast<uint32_t>(Offset),
2050                            AllocatorId);
2051       }
2052     }
2053 
2054     // Ignore nops except SDT markers. We use nops to derive alignment of the
2055     // next basic block. It will not always work, as some blocks are naturally
2056     // aligned, but it's just part of heuristic for block alignment.
2057     if (MIB->isNoop(Instr) && !PreserveNops && !IsSDTMarker && !IsLKMarker) {
2058       IsLastInstrNop = true;
2059       continue;
2060     }
2061     if (!InsertBB) {
2062       // It must be a fallthrough or unreachable code. Create a new block unless
2063       // we see an unconditional branch following a conditional one. The latter
2064       // should not be a conditional tail call.
2065       assert(PrevBB && "no previous basic block for a fall through");
2066       MCInst *PrevInstr = PrevBB->getLastNonPseudoInstr();
2067       assert(PrevInstr && "no previous instruction for a fall through");
2068       if (MIB->isUnconditionalBranch(Instr) &&
2069           !MIB->isUnconditionalBranch(*PrevInstr) &&
2070           !MIB->getConditionalTailCall(*PrevInstr)) {
2071         // Temporarily restore inserter basic block.
2072         InsertBB = PrevBB;
2073       } else {
2074         MCSymbol *Label;
2075         {
2076           auto L = BC.scopeLock();
2077           Label = BC.Ctx->createNamedTempSymbol("FT");
2078         }
2079         InsertBB = addBasicBlock(
2080             Offset, Label, opts::PreserveBlocksAlignment && IsLastInstrNop);
2081         updateOffset(LastInstrOffset);
2082       }
2083     }
2084     if (Offset == 0) {
2085       // Add associated CFI pseudos in the first offset (0)
2086       addCFIPlaceholders(0, InsertBB);
2087     }
2088 
2089     const bool IsBlockEnd = MIB->isTerminator(Instr);
2090     IsLastInstrNop = MIB->isNoop(Instr);
2091     LastInstrOffset = Offset;
2092     InsertBB->addInstruction(std::move(Instr));
2093 
2094     // Add associated CFI instrs. We always add the CFI instruction that is
2095     // located immediately after this instruction, since the next CFI
2096     // instruction reflects the change in state caused by this instruction.
2097     auto NextInstr = std::next(I);
2098     uint64_t CFIOffset;
2099     if (NextInstr != E)
2100       CFIOffset = NextInstr->first;
2101     else
2102       CFIOffset = getSize();
2103 
2104     // Note: this potentially invalidates instruction pointers/iterators.
2105     addCFIPlaceholders(CFIOffset, InsertBB);
2106 
2107     if (IsBlockEnd) {
2108       PrevBB = InsertBB;
2109       InsertBB = nullptr;
2110     }
2111   }
2112 
2113   if (BasicBlocks.empty()) {
2114     setSimple(false);
2115     return false;
2116   }
2117 
2118   // Intermediate dump.
2119   LLVM_DEBUG(print(dbgs(), "after creating basic blocks"));
2120 
2121   // TODO: handle properly calls to no-return functions,
2122   // e.g. exit(3), etc. Otherwise we'll see a false fall-through
2123   // blocks.
2124 
2125   for (std::pair<uint32_t, uint32_t> &Branch : TakenBranches) {
2126     LLVM_DEBUG(dbgs() << "registering branch [0x"
2127                       << Twine::utohexstr(Branch.first) << "] -> [0x"
2128                       << Twine::utohexstr(Branch.second) << "]\n");
2129     BinaryBasicBlock *FromBB = getBasicBlockContainingOffset(Branch.first);
2130     BinaryBasicBlock *ToBB = getBasicBlockAtOffset(Branch.second);
2131     if (!FromBB || !ToBB) {
2132       if (!FromBB)
2133         errs() << "BOLT-ERROR: cannot find BB containing the branch.\n";
2134       if (!ToBB)
2135         errs() << "BOLT-ERROR: cannot find BB containing branch destination.\n";
2136       BC.exitWithBugReport("disassembly failed - inconsistent branch found.",
2137                            *this);
2138     }
2139 
2140     FromBB->addSuccessor(ToBB);
2141   }
2142 
2143   // Add fall-through branches.
2144   PrevBB = nullptr;
2145   bool IsPrevFT = false; // Is previous block a fall-through.
2146   for (BinaryBasicBlock *BB : BasicBlocks) {
2147     if (IsPrevFT) {
2148       PrevBB->addSuccessor(BB);
2149     }
2150     if (BB->empty()) {
2151       IsPrevFT = true;
2152       PrevBB = BB;
2153       continue;
2154     }
2155 
2156     MCInst *LastInstr = BB->getLastNonPseudoInstr();
2157     assert(LastInstr &&
2158            "should have non-pseudo instruction in non-empty block");
2159 
2160     if (BB->succ_size() == 0) {
2161       // Since there's no existing successors, we know the last instruction is
2162       // not a conditional branch. Thus if it's a terminator, it shouldn't be a
2163       // fall-through.
2164       //
2165       // Conditional tail call is a special case since we don't add a taken
2166       // branch successor for it.
2167       IsPrevFT = !MIB->isTerminator(*LastInstr) ||
2168                  MIB->getConditionalTailCall(*LastInstr);
2169     } else if (BB->succ_size() == 1) {
2170       IsPrevFT = MIB->isConditionalBranch(*LastInstr);
2171     } else {
2172       IsPrevFT = false;
2173     }
2174 
2175     PrevBB = BB;
2176   }
2177 
2178   // Assign landing pads and throwers info.
2179   recomputeLandingPads();
2180 
2181   // Assign CFI information to each BB entry.
2182   annotateCFIState();
2183 
2184   // Annotate invoke instructions with GNU_args_size data.
2185   propagateGnuArgsSizeInfo(AllocatorId);
2186 
2187   // Set the basic block layout to the original order and set end offsets.
2188   PrevBB = nullptr;
2189   for (BinaryBasicBlock *BB : BasicBlocks) {
2190     BasicBlocksLayout.emplace_back(BB);
2191     if (PrevBB)
2192       PrevBB->setEndOffset(BB->getOffset());
2193     PrevBB = BB;
2194   }
2195   PrevBB->setEndOffset(getSize());
2196 
2197   updateLayoutIndices();
2198 
2199   normalizeCFIState();
2200 
2201   // Clean-up memory taken by intermediate structures.
2202   //
2203   // NB: don't clear Labels list as we may need them if we mark the function
2204   //     as non-simple later in the process of discovering extra entry points.
2205   clearList(Instructions);
2206   clearList(OffsetToCFI);
2207   clearList(TakenBranches);
2208 
2209   // Update the state.
2210   CurrentState = State::CFG;
2211 
2212   // Make any necessary adjustments for indirect branches.
2213   if (!postProcessIndirectBranches(AllocatorId)) {
2214     if (opts::Verbosity) {
2215       errs() << "BOLT-WARNING: failed to post-process indirect branches for "
2216              << *this << '\n';
2217     }
2218     // In relocation mode we want to keep processing the function but avoid
2219     // optimizing it.
2220     setSimple(false);
2221   }
2222 
2223   clearList(ExternallyReferencedOffsets);
2224   clearList(UnknownIndirectBranchOffsets);
2225 
2226   return true;
2227 }
2228 
2229 void BinaryFunction::postProcessCFG() {
2230   if (isSimple() && !BasicBlocks.empty()) {
2231     // Convert conditional tail call branches to conditional branches that jump
2232     // to a tail call.
2233     removeConditionalTailCalls();
2234 
2235     postProcessProfile();
2236 
2237     // Eliminate inconsistencies between branch instructions and CFG.
2238     postProcessBranches();
2239   }
2240 
2241   calculateMacroOpFusionStats();
2242 
2243   // The final cleanup of intermediate structures.
2244   clearList(IgnoredBranches);
2245 
2246   // Remove "Offset" annotations, unless we need an address-translation table
2247   // later. This has no cost, since annotations are allocated by a bumpptr
2248   // allocator and won't be released anyway until late in the pipeline.
2249   if (!requiresAddressTranslation() && !opts::Instrument)
2250     for (BinaryBasicBlock *BB : layout())
2251       for (MCInst &Inst : *BB)
2252         BC.MIB->removeAnnotation(Inst, "Offset");
2253 
2254   assert((!isSimple() || validateCFG()) &&
2255          "invalid CFG detected after post-processing");
2256 }
2257 
2258 void BinaryFunction::calculateMacroOpFusionStats() {
2259   if (!getBinaryContext().isX86())
2260     return;
2261   for (BinaryBasicBlock *BB : layout()) {
2262     auto II = BB->getMacroOpFusionPair();
2263     if (II == BB->end())
2264       continue;
2265 
2266     // Check offset of the second instruction.
2267     // FIXME: arch-specific.
2268     const uint32_t Offset =
2269         BC.MIB->getAnnotationWithDefault<uint32_t>(*std::next(II), "Offset", 0);
2270     if (!Offset || (getAddress() + Offset) % 64)
2271       continue;
2272 
2273     LLVM_DEBUG(dbgs() << "\nmissed macro-op fusion at address 0x"
2274                       << Twine::utohexstr(getAddress() + Offset)
2275                       << " in function " << *this << "; executed "
2276                       << BB->getKnownExecutionCount() << " times.\n");
2277     ++BC.MissedMacroFusionPairs;
2278     BC.MissedMacroFusionExecCount += BB->getKnownExecutionCount();
2279   }
2280 }
2281 
2282 void BinaryFunction::removeTagsFromProfile() {
2283   for (BinaryBasicBlock *BB : BasicBlocks) {
2284     if (BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
2285       BB->ExecutionCount = 0;
2286     for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) {
2287       if (BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
2288           BI.MispredictedCount != BinaryBasicBlock::COUNT_NO_PROFILE)
2289         continue;
2290       BI.Count = 0;
2291       BI.MispredictedCount = 0;
2292     }
2293   }
2294 }
2295 
2296 void BinaryFunction::removeConditionalTailCalls() {
2297   // Blocks to be appended at the end.
2298   std::vector<std::unique_ptr<BinaryBasicBlock>> NewBlocks;
2299 
2300   for (auto BBI = begin(); BBI != end(); ++BBI) {
2301     BinaryBasicBlock &BB = *BBI;
2302     MCInst *CTCInstr = BB.getLastNonPseudoInstr();
2303     if (!CTCInstr)
2304       continue;
2305 
2306     Optional<uint64_t> TargetAddressOrNone =
2307         BC.MIB->getConditionalTailCall(*CTCInstr);
2308     if (!TargetAddressOrNone)
2309       continue;
2310 
2311     // Gather all necessary information about CTC instruction before
2312     // annotations are destroyed.
2313     const int32_t CFIStateBeforeCTC = BB.getCFIStateAtInstr(CTCInstr);
2314     uint64_t CTCTakenCount = BinaryBasicBlock::COUNT_NO_PROFILE;
2315     uint64_t CTCMispredCount = BinaryBasicBlock::COUNT_NO_PROFILE;
2316     if (hasValidProfile()) {
2317       CTCTakenCount =
2318         BC.MIB->getAnnotationWithDefault<uint64_t>(*CTCInstr, "CTCTakenCount");
2319       CTCMispredCount =
2320         BC.MIB->getAnnotationWithDefault<uint64_t>(*CTCInstr,
2321                                                    "CTCMispredCount");
2322     }
2323 
2324     // Assert that the tail call does not throw.
2325     assert(!BC.MIB->getEHInfo(*CTCInstr) &&
2326            "found tail call with associated landing pad");
2327 
2328     // Create a basic block with an unconditional tail call instruction using
2329     // the same destination.
2330     const MCSymbol *CTCTargetLabel = BC.MIB->getTargetSymbol(*CTCInstr);
2331     assert(CTCTargetLabel && "symbol expected for conditional tail call");
2332     MCInst TailCallInstr;
2333     BC.MIB->createTailCall(TailCallInstr, CTCTargetLabel, BC.Ctx.get());
2334     // Link new BBs to the original input offset of the BB where the CTC
2335     // is, so we can map samples recorded in new BBs back to the original BB
2336     // seem in the input binary (if using BAT)
2337     std::unique_ptr<BinaryBasicBlock> TailCallBB = createBasicBlock(
2338         BB.getInputOffset(), BC.Ctx->createNamedTempSymbol("TC"));
2339     TailCallBB->addInstruction(TailCallInstr);
2340     TailCallBB->setCFIState(CFIStateBeforeCTC);
2341 
2342     // Add CFG edge with profile info from BB to TailCallBB.
2343     BB.addSuccessor(TailCallBB.get(), CTCTakenCount, CTCMispredCount);
2344 
2345     // Add execution count for the block.
2346     TailCallBB->setExecutionCount(CTCTakenCount);
2347 
2348     BC.MIB->convertTailCallToJmp(*CTCInstr);
2349 
2350     BC.MIB->replaceBranchTarget(*CTCInstr, TailCallBB->getLabel(),
2351                                 BC.Ctx.get());
2352 
2353     // Add basic block to the list that will be added to the end.
2354     NewBlocks.emplace_back(std::move(TailCallBB));
2355 
2356     // Swap edges as the TailCallBB corresponds to the taken branch.
2357     BB.swapConditionalSuccessors();
2358 
2359     // This branch is no longer a conditional tail call.
2360     BC.MIB->unsetConditionalTailCall(*CTCInstr);
2361   }
2362 
2363   insertBasicBlocks(std::prev(end()),
2364                     std::move(NewBlocks),
2365                     /* UpdateLayout */ true,
2366                     /* UpdateCFIState */ false);
2367 }
2368 
2369 uint64_t BinaryFunction::getFunctionScore() const {
2370   if (FunctionScore != -1)
2371     return FunctionScore;
2372 
2373   if (!isSimple() || !hasValidProfile()) {
2374     FunctionScore = 0;
2375     return FunctionScore;
2376   }
2377 
2378   uint64_t TotalScore = 0ULL;
2379   for (BinaryBasicBlock *BB : layout()) {
2380     uint64_t BBExecCount = BB->getExecutionCount();
2381     if (BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
2382       continue;
2383     TotalScore += BBExecCount;
2384   }
2385   FunctionScore = TotalScore;
2386   return FunctionScore;
2387 }
2388 
2389 void BinaryFunction::annotateCFIState() {
2390   assert(CurrentState == State::Disassembled && "unexpected function state");
2391   assert(!BasicBlocks.empty() && "basic block list should not be empty");
2392 
2393   // This is an index of the last processed CFI in FDE CFI program.
2394   uint32_t State = 0;
2395 
2396   // This is an index of RememberState CFI reflecting effective state right
2397   // after execution of RestoreState CFI.
2398   //
2399   // It differs from State iff the CFI at (State-1)
2400   // was RestoreState (modulo GNU_args_size CFIs, which are ignored).
2401   //
2402   // This allows us to generate shorter replay sequences when producing new
2403   // CFI programs.
2404   uint32_t EffectiveState = 0;
2405 
2406   // For tracking RememberState/RestoreState sequences.
2407   std::stack<uint32_t> StateStack;
2408 
2409   for (BinaryBasicBlock *BB : BasicBlocks) {
2410     BB->setCFIState(EffectiveState);
2411 
2412     for (const MCInst &Instr : *BB) {
2413       const MCCFIInstruction *CFI = getCFIFor(Instr);
2414       if (!CFI)
2415         continue;
2416 
2417       ++State;
2418 
2419       switch (CFI->getOperation()) {
2420       case MCCFIInstruction::OpRememberState:
2421         StateStack.push(EffectiveState);
2422         EffectiveState = State;
2423         break;
2424       case MCCFIInstruction::OpRestoreState:
2425         assert(!StateStack.empty() && "corrupt CFI stack");
2426         EffectiveState = StateStack.top();
2427         StateStack.pop();
2428         break;
2429       case MCCFIInstruction::OpGnuArgsSize:
2430         // OpGnuArgsSize CFIs do not affect the CFI state.
2431         break;
2432       default:
2433         // Any other CFI updates the state.
2434         EffectiveState = State;
2435         break;
2436       }
2437     }
2438   }
2439 
2440   assert(StateStack.empty() && "corrupt CFI stack");
2441 }
2442 
2443 namespace {
2444 
2445 /// Our full interpretation of a DWARF CFI machine state at a given point
2446 struct CFISnapshot {
2447   /// CFA register number and offset defining the canonical frame at this
2448   /// point, or the number of a rule (CFI state) that computes it with a
2449   /// DWARF expression. This number will be negative if it refers to a CFI
2450   /// located in the CIE instead of the FDE.
2451   uint32_t CFAReg;
2452   int32_t CFAOffset;
2453   int32_t CFARule;
2454   /// Mapping of rules (CFI states) that define the location of each
2455   /// register. If absent, no rule defining the location of such register
2456   /// was ever read. This number will be negative if it refers to a CFI
2457   /// located in the CIE instead of the FDE.
2458   DenseMap<int32_t, int32_t> RegRule;
2459 
2460   /// References to CIE, FDE and expanded instructions after a restore state
2461   const BinaryFunction::CFIInstrMapType &CIE;
2462   const BinaryFunction::CFIInstrMapType &FDE;
2463   const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents;
2464 
2465   /// Current FDE CFI number representing the state where the snapshot is at
2466   int32_t CurState;
2467 
2468   /// Used when we don't have information about which state/rule to apply
2469   /// to recover the location of either the CFA or a specific register
2470   constexpr static int32_t UNKNOWN = std::numeric_limits<int32_t>::min();
2471 
2472 private:
2473   /// Update our snapshot by executing a single CFI
2474   void update(const MCCFIInstruction &Instr, int32_t RuleNumber) {
2475     switch (Instr.getOperation()) {
2476     case MCCFIInstruction::OpSameValue:
2477     case MCCFIInstruction::OpRelOffset:
2478     case MCCFIInstruction::OpOffset:
2479     case MCCFIInstruction::OpRestore:
2480     case MCCFIInstruction::OpUndefined:
2481     case MCCFIInstruction::OpRegister:
2482       RegRule[Instr.getRegister()] = RuleNumber;
2483       break;
2484     case MCCFIInstruction::OpDefCfaRegister:
2485       CFAReg = Instr.getRegister();
2486       CFARule = UNKNOWN;
2487       break;
2488     case MCCFIInstruction::OpDefCfaOffset:
2489       CFAOffset = Instr.getOffset();
2490       CFARule = UNKNOWN;
2491       break;
2492     case MCCFIInstruction::OpDefCfa:
2493       CFAReg = Instr.getRegister();
2494       CFAOffset = Instr.getOffset();
2495       CFARule = UNKNOWN;
2496       break;
2497     case MCCFIInstruction::OpEscape: {
2498       Optional<uint8_t> Reg = readDWARFExpressionTargetReg(Instr.getValues());
2499       // Handle DW_CFA_def_cfa_expression
2500       if (!Reg) {
2501         CFARule = RuleNumber;
2502         break;
2503       }
2504       RegRule[*Reg] = RuleNumber;
2505       break;
2506     }
2507     case MCCFIInstruction::OpAdjustCfaOffset:
2508     case MCCFIInstruction::OpWindowSave:
2509     case MCCFIInstruction::OpNegateRAState:
2510     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2511       llvm_unreachable("unsupported CFI opcode");
2512       break;
2513     case MCCFIInstruction::OpRememberState:
2514     case MCCFIInstruction::OpRestoreState:
2515     case MCCFIInstruction::OpGnuArgsSize:
2516       // do not affect CFI state
2517       break;
2518     }
2519   }
2520 
2521 public:
2522   /// Advance state reading FDE CFI instructions up to State number
2523   void advanceTo(int32_t State) {
2524     for (int32_t I = CurState, E = State; I != E; ++I) {
2525       const MCCFIInstruction &Instr = FDE[I];
2526       if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) {
2527         update(Instr, I);
2528         continue;
2529       }
2530       // If restore state instruction, fetch the equivalent CFIs that have
2531       // the same effect of this restore. This is used to ensure remember-
2532       // restore pairs are completely removed.
2533       auto Iter = FrameRestoreEquivalents.find(I);
2534       if (Iter == FrameRestoreEquivalents.end())
2535         continue;
2536       for (int32_t RuleNumber : Iter->second) {
2537         update(FDE[RuleNumber], RuleNumber);
2538       }
2539     }
2540 
2541     assert(((CFAReg != (uint32_t)UNKNOWN && CFAOffset != UNKNOWN) ||
2542             CFARule != UNKNOWN) &&
2543            "CIE did not define default CFA?");
2544 
2545     CurState = State;
2546   }
2547 
2548   /// Interpret all CIE and FDE instructions up until CFI State number and
2549   /// populate this snapshot
2550   CFISnapshot(
2551       const BinaryFunction::CFIInstrMapType &CIE,
2552       const BinaryFunction::CFIInstrMapType &FDE,
2553       const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents,
2554       int32_t State)
2555       : CIE(CIE), FDE(FDE), FrameRestoreEquivalents(FrameRestoreEquivalents) {
2556     CFAReg = UNKNOWN;
2557     CFAOffset = UNKNOWN;
2558     CFARule = UNKNOWN;
2559     CurState = 0;
2560 
2561     for (int32_t I = 0, E = CIE.size(); I != E; ++I) {
2562       const MCCFIInstruction &Instr = CIE[I];
2563       update(Instr, -I);
2564     }
2565 
2566     advanceTo(State);
2567   }
2568 };
2569 
2570 /// A CFI snapshot with the capability of checking if incremental additions to
2571 /// it are redundant. This is used to ensure we do not emit two CFI instructions
2572 /// back-to-back that are doing the same state change, or to avoid emitting a
2573 /// CFI at all when the state at that point would not be modified after that CFI
2574 struct CFISnapshotDiff : public CFISnapshot {
2575   bool RestoredCFAReg{false};
2576   bool RestoredCFAOffset{false};
2577   DenseMap<int32_t, bool> RestoredRegs;
2578 
2579   CFISnapshotDiff(const CFISnapshot &S) : CFISnapshot(S) {}
2580 
2581   CFISnapshotDiff(
2582       const BinaryFunction::CFIInstrMapType &CIE,
2583       const BinaryFunction::CFIInstrMapType &FDE,
2584       const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents,
2585       int32_t State)
2586       : CFISnapshot(CIE, FDE, FrameRestoreEquivalents, State) {}
2587 
2588   /// Return true if applying Instr to this state is redundant and can be
2589   /// dismissed.
2590   bool isRedundant(const MCCFIInstruction &Instr) {
2591     switch (Instr.getOperation()) {
2592     case MCCFIInstruction::OpSameValue:
2593     case MCCFIInstruction::OpRelOffset:
2594     case MCCFIInstruction::OpOffset:
2595     case MCCFIInstruction::OpRestore:
2596     case MCCFIInstruction::OpUndefined:
2597     case MCCFIInstruction::OpRegister:
2598     case MCCFIInstruction::OpEscape: {
2599       uint32_t Reg;
2600       if (Instr.getOperation() != MCCFIInstruction::OpEscape) {
2601         Reg = Instr.getRegister();
2602       } else {
2603         Optional<uint8_t> R = readDWARFExpressionTargetReg(Instr.getValues());
2604         // Handle DW_CFA_def_cfa_expression
2605         if (!R) {
2606           if (RestoredCFAReg && RestoredCFAOffset)
2607             return true;
2608           RestoredCFAReg = true;
2609           RestoredCFAOffset = true;
2610           return false;
2611         }
2612         Reg = *R;
2613       }
2614       if (RestoredRegs[Reg])
2615         return true;
2616       RestoredRegs[Reg] = true;
2617       const int32_t CurRegRule =
2618           RegRule.find(Reg) != RegRule.end() ? RegRule[Reg] : UNKNOWN;
2619       if (CurRegRule == UNKNOWN) {
2620         if (Instr.getOperation() == MCCFIInstruction::OpRestore ||
2621             Instr.getOperation() == MCCFIInstruction::OpSameValue)
2622           return true;
2623         return false;
2624       }
2625       const MCCFIInstruction &LastDef =
2626           CurRegRule < 0 ? CIE[-CurRegRule] : FDE[CurRegRule];
2627       return LastDef == Instr;
2628     }
2629     case MCCFIInstruction::OpDefCfaRegister:
2630       if (RestoredCFAReg)
2631         return true;
2632       RestoredCFAReg = true;
2633       return CFAReg == Instr.getRegister();
2634     case MCCFIInstruction::OpDefCfaOffset:
2635       if (RestoredCFAOffset)
2636         return true;
2637       RestoredCFAOffset = true;
2638       return CFAOffset == Instr.getOffset();
2639     case MCCFIInstruction::OpDefCfa:
2640       if (RestoredCFAReg && RestoredCFAOffset)
2641         return true;
2642       RestoredCFAReg = true;
2643       RestoredCFAOffset = true;
2644       return CFAReg == Instr.getRegister() && CFAOffset == Instr.getOffset();
2645     case MCCFIInstruction::OpAdjustCfaOffset:
2646     case MCCFIInstruction::OpWindowSave:
2647     case MCCFIInstruction::OpNegateRAState:
2648     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2649       llvm_unreachable("unsupported CFI opcode");
2650       return false;
2651     case MCCFIInstruction::OpRememberState:
2652     case MCCFIInstruction::OpRestoreState:
2653     case MCCFIInstruction::OpGnuArgsSize:
2654       // do not affect CFI state
2655       return true;
2656     }
2657     return false;
2658   }
2659 };
2660 
2661 } // end anonymous namespace
2662 
2663 bool BinaryFunction::replayCFIInstrs(int32_t FromState, int32_t ToState,
2664                                      BinaryBasicBlock *InBB,
2665                                      BinaryBasicBlock::iterator InsertIt) {
2666   if (FromState == ToState)
2667     return true;
2668   assert(FromState < ToState && "can only replay CFIs forward");
2669 
2670   CFISnapshotDiff CFIDiff(CIEFrameInstructions, FrameInstructions,
2671                           FrameRestoreEquivalents, FromState);
2672 
2673   std::vector<uint32_t> NewCFIs;
2674   for (int32_t CurState = FromState; CurState < ToState; ++CurState) {
2675     MCCFIInstruction *Instr = &FrameInstructions[CurState];
2676     if (Instr->getOperation() == MCCFIInstruction::OpRestoreState) {
2677       auto Iter = FrameRestoreEquivalents.find(CurState);
2678       assert(Iter != FrameRestoreEquivalents.end());
2679       NewCFIs.insert(NewCFIs.end(), Iter->second.begin(),
2680                      Iter->second.end());
2681       // RestoreState / Remember will be filtered out later by CFISnapshotDiff,
2682       // so we might as well fall-through here.
2683     }
2684     NewCFIs.push_back(CurState);
2685     continue;
2686   }
2687 
2688   // Replay instructions while avoiding duplicates
2689   for (auto I = NewCFIs.rbegin(), E = NewCFIs.rend(); I != E; ++I) {
2690     if (CFIDiff.isRedundant(FrameInstructions[*I]))
2691       continue;
2692     InsertIt = addCFIPseudo(InBB, InsertIt, *I);
2693   }
2694 
2695   return true;
2696 }
2697 
2698 SmallVector<int32_t, 4>
2699 BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState,
2700                                BinaryBasicBlock *InBB,
2701                                BinaryBasicBlock::iterator &InsertIt) {
2702   SmallVector<int32_t, 4> NewStates;
2703 
2704   CFISnapshot ToCFITable(CIEFrameInstructions, FrameInstructions,
2705                                 FrameRestoreEquivalents, ToState);
2706   CFISnapshotDiff FromCFITable(ToCFITable);
2707   FromCFITable.advanceTo(FromState);
2708 
2709   auto undoStateDefCfa = [&]() {
2710     if (ToCFITable.CFARule == CFISnapshot::UNKNOWN) {
2711       FrameInstructions.emplace_back(MCCFIInstruction::cfiDefCfa(
2712           nullptr, ToCFITable.CFAReg, ToCFITable.CFAOffset));
2713       if (FromCFITable.isRedundant(FrameInstructions.back())) {
2714         FrameInstructions.pop_back();
2715         return;
2716       }
2717       NewStates.push_back(FrameInstructions.size() - 1);
2718       InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size() - 1);
2719       ++InsertIt;
2720     } else if (ToCFITable.CFARule < 0) {
2721       if (FromCFITable.isRedundant(CIEFrameInstructions[-ToCFITable.CFARule]))
2722         return;
2723       NewStates.push_back(FrameInstructions.size());
2724       InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size());
2725       ++InsertIt;
2726       FrameInstructions.emplace_back(CIEFrameInstructions[-ToCFITable.CFARule]);
2727     } else if (!FromCFITable.isRedundant(
2728                    FrameInstructions[ToCFITable.CFARule])) {
2729       NewStates.push_back(ToCFITable.CFARule);
2730       InsertIt = addCFIPseudo(InBB, InsertIt, ToCFITable.CFARule);
2731       ++InsertIt;
2732     }
2733   };
2734 
2735   auto undoState = [&](const MCCFIInstruction &Instr) {
2736     switch (Instr.getOperation()) {
2737     case MCCFIInstruction::OpRememberState:
2738     case MCCFIInstruction::OpRestoreState:
2739       break;
2740     case MCCFIInstruction::OpSameValue:
2741     case MCCFIInstruction::OpRelOffset:
2742     case MCCFIInstruction::OpOffset:
2743     case MCCFIInstruction::OpRestore:
2744     case MCCFIInstruction::OpUndefined:
2745     case MCCFIInstruction::OpEscape:
2746     case MCCFIInstruction::OpRegister: {
2747       uint32_t Reg;
2748       if (Instr.getOperation() != MCCFIInstruction::OpEscape) {
2749         Reg = Instr.getRegister();
2750       } else {
2751         Optional<uint8_t> R = readDWARFExpressionTargetReg(Instr.getValues());
2752         // Handle DW_CFA_def_cfa_expression
2753         if (!R) {
2754           undoStateDefCfa();
2755           return;
2756         }
2757         Reg = *R;
2758       }
2759 
2760       if (ToCFITable.RegRule.find(Reg) == ToCFITable.RegRule.end()) {
2761         FrameInstructions.emplace_back(
2762             MCCFIInstruction::createRestore(nullptr, Reg));
2763         if (FromCFITable.isRedundant(FrameInstructions.back())) {
2764           FrameInstructions.pop_back();
2765           break;
2766         }
2767         NewStates.push_back(FrameInstructions.size() - 1);
2768         InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size() - 1);
2769         ++InsertIt;
2770         break;
2771       }
2772       const int32_t Rule = ToCFITable.RegRule[Reg];
2773       if (Rule < 0) {
2774         if (FromCFITable.isRedundant(CIEFrameInstructions[-Rule]))
2775           break;
2776         NewStates.push_back(FrameInstructions.size());
2777         InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size());
2778         ++InsertIt;
2779         FrameInstructions.emplace_back(CIEFrameInstructions[-Rule]);
2780         break;
2781       }
2782       if (FromCFITable.isRedundant(FrameInstructions[Rule]))
2783         break;
2784       NewStates.push_back(Rule);
2785       InsertIt = addCFIPseudo(InBB, InsertIt, Rule);
2786       ++InsertIt;
2787       break;
2788     }
2789     case MCCFIInstruction::OpDefCfaRegister:
2790     case MCCFIInstruction::OpDefCfaOffset:
2791     case MCCFIInstruction::OpDefCfa:
2792       undoStateDefCfa();
2793       break;
2794     case MCCFIInstruction::OpAdjustCfaOffset:
2795     case MCCFIInstruction::OpWindowSave:
2796     case MCCFIInstruction::OpNegateRAState:
2797     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2798       llvm_unreachable("unsupported CFI opcode");
2799       break;
2800     case MCCFIInstruction::OpGnuArgsSize:
2801       // do not affect CFI state
2802       break;
2803     }
2804   };
2805 
2806   // Undo all modifications from ToState to FromState
2807   for (int32_t I = ToState, E = FromState; I != E; ++I) {
2808     const MCCFIInstruction &Instr = FrameInstructions[I];
2809     if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) {
2810       undoState(Instr);
2811       continue;
2812     }
2813     auto Iter = FrameRestoreEquivalents.find(I);
2814     if (Iter == FrameRestoreEquivalents.end())
2815       continue;
2816     for (int32_t State : Iter->second)
2817       undoState(FrameInstructions[State]);
2818   }
2819 
2820   return NewStates;
2821 }
2822 
2823 void BinaryFunction::normalizeCFIState() {
2824   // Reordering blocks with remember-restore state instructions can be specially
2825   // tricky. When rewriting the CFI, we omit remember-restore state instructions
2826   // entirely. For restore state, we build a map expanding each restore to the
2827   // equivalent unwindCFIState sequence required at that point to achieve the
2828   // same effect of the restore. All remember state are then just ignored.
2829   std::stack<int32_t> Stack;
2830   for (BinaryBasicBlock *CurBB : BasicBlocksLayout) {
2831     for (auto II = CurBB->begin(); II != CurBB->end(); ++II) {
2832       if (const MCCFIInstruction *CFI = getCFIFor(*II)) {
2833         if (CFI->getOperation() == MCCFIInstruction::OpRememberState) {
2834           Stack.push(II->getOperand(0).getImm());
2835           continue;
2836         }
2837         if (CFI->getOperation() == MCCFIInstruction::OpRestoreState) {
2838           const int32_t RememberState = Stack.top();
2839           const int32_t CurState = II->getOperand(0).getImm();
2840           FrameRestoreEquivalents[CurState] =
2841               unwindCFIState(CurState, RememberState, CurBB, II);
2842           Stack.pop();
2843         }
2844       }
2845     }
2846   }
2847 }
2848 
2849 bool BinaryFunction::finalizeCFIState() {
2850   LLVM_DEBUG(
2851       dbgs() << "Trying to fix CFI states for each BB after reordering.\n");
2852   LLVM_DEBUG(dbgs() << "This is the list of CFI states for each BB of " << *this
2853                     << ": ");
2854 
2855   int32_t State = 0;
2856   bool SeenCold = false;
2857   const char *Sep = "";
2858   (void)Sep;
2859   for (BinaryBasicBlock *BB : BasicBlocksLayout) {
2860     const int32_t CFIStateAtExit = BB->getCFIStateAtExit();
2861 
2862     // Hot-cold border: check if this is the first BB to be allocated in a cold
2863     // region (with a different FDE). If yes, we need to reset the CFI state.
2864     if (!SeenCold && BB->isCold()) {
2865       State = 0;
2866       SeenCold = true;
2867     }
2868 
2869     // We need to recover the correct state if it doesn't match expected
2870     // state at BB entry point.
2871     if (BB->getCFIState() < State) {
2872       // In this case, State is currently higher than what this BB expect it
2873       // to be. To solve this, we need to insert CFI instructions to undo
2874       // the effect of all CFI from BB's state to current State.
2875       auto InsertIt = BB->begin();
2876       unwindCFIState(State, BB->getCFIState(), BB, InsertIt);
2877     } else if (BB->getCFIState() > State) {
2878       // If BB's CFI state is greater than State, it means we are behind in the
2879       // state. Just emit all instructions to reach this state at the
2880       // beginning of this BB. If this sequence of instructions involve
2881       // remember state or restore state, bail out.
2882       if (!replayCFIInstrs(State, BB->getCFIState(), BB, BB->begin()))
2883         return false;
2884     }
2885 
2886     State = CFIStateAtExit;
2887     LLVM_DEBUG(dbgs() << Sep << State; Sep = ", ");
2888   }
2889   LLVM_DEBUG(dbgs() << "\n");
2890 
2891   for (BinaryBasicBlock *BB : BasicBlocksLayout) {
2892     for (auto II = BB->begin(); II != BB->end(); ) {
2893       const MCCFIInstruction *CFI = getCFIFor(*II);
2894       if (CFI &&
2895           (CFI->getOperation() == MCCFIInstruction::OpRememberState ||
2896            CFI->getOperation() == MCCFIInstruction::OpRestoreState)) {
2897         II = BB->eraseInstruction(II);
2898       } else {
2899         ++II;
2900       }
2901     }
2902   }
2903 
2904   return true;
2905 }
2906 
2907 bool BinaryFunction::requiresAddressTranslation() const {
2908   return opts::EnableBAT || hasSDTMarker() || hasPseudoProbe();
2909 }
2910 
2911 uint64_t BinaryFunction::getInstructionCount() const {
2912   uint64_t Count = 0;
2913   for (BinaryBasicBlock *const &Block : BasicBlocksLayout) {
2914     Count += Block->getNumNonPseudos();
2915   }
2916   return Count;
2917 }
2918 
2919 bool BinaryFunction::hasLayoutChanged() const {
2920   return ModifiedLayout;
2921 }
2922 
2923 uint64_t BinaryFunction::getEditDistance() const {
2924   return ComputeEditDistance<BinaryBasicBlock *>(BasicBlocksPreviousLayout,
2925                                                  BasicBlocksLayout);
2926 }
2927 
2928 void BinaryFunction::clearDisasmState() {
2929   clearList(Instructions);
2930   clearList(IgnoredBranches);
2931   clearList(TakenBranches);
2932   clearList(InterproceduralReferences);
2933 
2934   if (BC.HasRelocations) {
2935     for (std::pair<const uint32_t, MCSymbol *> &LI : Labels) {
2936       BC.UndefinedSymbols.insert(LI.second);
2937     }
2938     if (FunctionEndLabel) {
2939       BC.UndefinedSymbols.insert(FunctionEndLabel);
2940     }
2941   }
2942 }
2943 
2944 void BinaryFunction::setTrapOnEntry() {
2945   clearDisasmState();
2946 
2947   auto addTrapAtOffset = [&](uint64_t Offset) {
2948     MCInst TrapInstr;
2949     BC.MIB->createTrap(TrapInstr);
2950     addInstruction(Offset, std::move(TrapInstr));
2951   };
2952 
2953   addTrapAtOffset(0);
2954   for (const std::pair<const uint32_t, MCSymbol *> &KV : getLabels()) {
2955     if (getSecondaryEntryPointSymbol(KV.second)) {
2956       addTrapAtOffset(KV.first);
2957     }
2958   }
2959 
2960   TrapsOnEntry = true;
2961 }
2962 
2963 void BinaryFunction::setIgnored() {
2964   if (opts::processAllFunctions()) {
2965     // We can accept ignored functions before they've been disassembled.
2966     // In that case, they would still get disassembled and emited, but not
2967     // optimized.
2968     assert(CurrentState == State::Empty &&
2969            "cannot ignore non-empty functions in current mode");
2970     IsIgnored = true;
2971     return;
2972   }
2973 
2974   clearDisasmState();
2975 
2976   // Clear CFG state too.
2977   if (hasCFG()) {
2978     releaseCFG();
2979 
2980     for (BinaryBasicBlock *BB : BasicBlocks) {
2981       delete BB;
2982     }
2983     clearList(BasicBlocks);
2984 
2985     for (BinaryBasicBlock *BB : DeletedBasicBlocks) {
2986       delete BB;
2987     }
2988     clearList(DeletedBasicBlocks);
2989 
2990     clearList(BasicBlocksLayout);
2991     clearList(BasicBlocksPreviousLayout);
2992   }
2993 
2994   CurrentState = State::Empty;
2995 
2996   IsIgnored = true;
2997   IsSimple = false;
2998   LLVM_DEBUG(dbgs() << "Ignoring " << getPrintName() << '\n');
2999 }
3000 
3001 void BinaryFunction::duplicateConstantIslands() {
3002   assert(Islands && "function expected to have constant islands");
3003 
3004   for (BinaryBasicBlock *BB : layout()) {
3005     if (!BB->isCold())
3006       continue;
3007 
3008     for (MCInst &Inst : *BB) {
3009       int OpNum = 0;
3010       for (MCOperand &Operand : Inst) {
3011         if (!Operand.isExpr()) {
3012           ++OpNum;
3013           continue;
3014         }
3015         const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Inst, OpNum);
3016         // Check if this is an island symbol
3017         if (!Islands->Symbols.count(Symbol) &&
3018             !Islands->ProxySymbols.count(Symbol))
3019           continue;
3020 
3021         // Create cold symbol, if missing
3022         auto ISym = Islands->ColdSymbols.find(Symbol);
3023         MCSymbol *ColdSymbol;
3024         if (ISym != Islands->ColdSymbols.end()) {
3025           ColdSymbol = ISym->second;
3026         } else {
3027           ColdSymbol = BC.Ctx->getOrCreateSymbol(Symbol->getName() + ".cold");
3028           Islands->ColdSymbols[Symbol] = ColdSymbol;
3029           // Check if this is a proxy island symbol and update owner proxy map
3030           if (Islands->ProxySymbols.count(Symbol)) {
3031             BinaryFunction *Owner = Islands->ProxySymbols[Symbol];
3032             auto IProxiedSym = Owner->Islands->Proxies[this].find(Symbol);
3033             Owner->Islands->ColdProxies[this][IProxiedSym->second] = ColdSymbol;
3034           }
3035         }
3036 
3037         // Update instruction reference
3038         Operand = MCOperand::createExpr(BC.MIB->getTargetExprFor(
3039             Inst,
3040             MCSymbolRefExpr::create(ColdSymbol, MCSymbolRefExpr::VK_None,
3041                                     *BC.Ctx),
3042             *BC.Ctx, 0));
3043         ++OpNum;
3044       }
3045     }
3046   }
3047 }
3048 
3049 namespace {
3050 
3051 #ifndef MAX_PATH
3052 #define MAX_PATH 255
3053 #endif
3054 
3055 std::string constructFilename(std::string Filename,
3056                               std::string Annotation,
3057                               std::string Suffix) {
3058   std::replace(Filename.begin(), Filename.end(), '/', '-');
3059   if (!Annotation.empty()) {
3060     Annotation.insert(0, "-");
3061   }
3062   if (Filename.size() + Annotation.size() + Suffix.size() > MAX_PATH) {
3063     assert(Suffix.size() + Annotation.size() <= MAX_PATH);
3064     if (opts::Verbosity >= 1) {
3065       errs() << "BOLT-WARNING: Filename \"" << Filename << Annotation << Suffix
3066              << "\" exceeds the " << MAX_PATH << " size limit, truncating.\n";
3067     }
3068     Filename.resize(MAX_PATH - (Suffix.size() + Annotation.size()));
3069   }
3070   Filename += Annotation;
3071   Filename += Suffix;
3072   return Filename;
3073 }
3074 
3075 std::string formatEscapes(const std::string& Str) {
3076   std::string Result;
3077   for (unsigned I = 0; I < Str.size(); ++I) {
3078     char C = Str[I];
3079     switch (C) {
3080     case '\n':
3081       Result += "&#13;";
3082       break;
3083     case '"':
3084       break;
3085     default:
3086       Result += C;
3087       break;
3088     }
3089   }
3090   return Result;
3091 }
3092 
3093 }
3094 
3095 void BinaryFunction::dumpGraph(raw_ostream& OS) const {
3096   OS << "strict digraph \"" << getPrintName() << "\" {\n";
3097   uint64_t Offset = Address;
3098   for (BinaryBasicBlock *BB : BasicBlocks) {
3099     auto LayoutPos = std::find(BasicBlocksLayout.begin(),
3100                                BasicBlocksLayout.end(),
3101                                BB);
3102     unsigned Layout = LayoutPos - BasicBlocksLayout.begin();
3103     const char* ColdStr = BB->isCold() ? " (cold)" : "";
3104     OS << format("\"%s\" [label=\"%s%s\\n(C:%lu,O:%lu,I:%u,L:%u:CFI:%u)\"]\n",
3105                  BB->getName().data(),
3106                  BB->getName().data(),
3107                  ColdStr,
3108                  (BB->ExecutionCount != BinaryBasicBlock::COUNT_NO_PROFILE
3109                   ? BB->ExecutionCount
3110                   : 0),
3111                  BB->getOffset(),
3112                  getIndex(BB),
3113                  Layout,
3114                  BB->getCFIState());
3115     OS << format("\"%s\" [shape=box]\n", BB->getName().data());
3116     if (opts::DotToolTipCode) {
3117       std::string Str;
3118       raw_string_ostream CS(Str);
3119       Offset = BC.printInstructions(CS, BB->begin(), BB->end(), Offset, this);
3120       const std::string Code = formatEscapes(CS.str());
3121       OS << format("\"%s\" [tooltip=\"%s\"]\n",
3122                    BB->getName().data(),
3123                    Code.c_str());
3124     }
3125 
3126     // analyzeBranch is just used to get the names of the branch
3127     // opcodes.
3128     const MCSymbol *TBB = nullptr;
3129     const MCSymbol *FBB = nullptr;
3130     MCInst *CondBranch = nullptr;
3131     MCInst *UncondBranch = nullptr;
3132     const bool Success = BB->analyzeBranch(TBB,
3133                                            FBB,
3134                                            CondBranch,
3135                                            UncondBranch);
3136 
3137     const MCInst *LastInstr = BB->getLastNonPseudoInstr();
3138     const bool IsJumpTable = LastInstr && BC.MIB->getJumpTable(*LastInstr);
3139 
3140     auto BI = BB->branch_info_begin();
3141     for (BinaryBasicBlock *Succ : BB->successors()) {
3142       std::string Branch;
3143       if (Success) {
3144         if (Succ == BB->getConditionalSuccessor(true)) {
3145           Branch = CondBranch ? std::string(BC.InstPrinter->getOpcodeName(
3146                                     CondBranch->getOpcode()))
3147                               : "TB";
3148         } else if (Succ == BB->getConditionalSuccessor(false)) {
3149           Branch = UncondBranch ? std::string(BC.InstPrinter->getOpcodeName(
3150                                       UncondBranch->getOpcode()))
3151                                 : "FB";
3152         } else {
3153           Branch = "FT";
3154         }
3155       }
3156       if (IsJumpTable) {
3157         Branch = "JT";
3158       }
3159       OS << format("\"%s\" -> \"%s\" [label=\"%s",
3160                    BB->getName().data(),
3161                    Succ->getName().data(),
3162                    Branch.c_str());
3163 
3164       if (BB->getExecutionCount() != COUNT_NO_PROFILE &&
3165           BI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
3166         OS << "\\n(C:" << BI->Count << ",M:" << BI->MispredictedCount << ")";
3167       } else if (ExecutionCount != COUNT_NO_PROFILE &&
3168                  BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
3169         OS << "\\n(IC:" << BI->Count << ")";
3170       }
3171       OS << "\"]\n";
3172 
3173       ++BI;
3174     }
3175     for (BinaryBasicBlock *LP : BB->landing_pads()) {
3176       OS << format("\"%s\" -> \"%s\" [constraint=false style=dashed]\n",
3177                    BB->getName().data(),
3178                    LP->getName().data());
3179     }
3180   }
3181   OS << "}\n";
3182 }
3183 
3184 void BinaryFunction::viewGraph() const {
3185   SmallString<MAX_PATH> Filename;
3186   if (std::error_code EC =
3187           sys::fs::createTemporaryFile("bolt-cfg", "dot", Filename)) {
3188     errs() << "BOLT-ERROR: " << EC.message() << ", unable to create "
3189            << " bolt-cfg-XXXXX.dot temporary file.\n";
3190     return;
3191   }
3192   dumpGraphToFile(std::string(Filename));
3193   if (DisplayGraph(Filename)) {
3194     errs() << "BOLT-ERROR: Can't display " << Filename << " with graphviz.\n";
3195   }
3196   if (std::error_code EC = sys::fs::remove(Filename)) {
3197     errs() << "BOLT-WARNING: " << EC.message() << ", failed to remove "
3198            << Filename << "\n";
3199   }
3200 }
3201 
3202 void BinaryFunction::dumpGraphForPass(std::string Annotation) const {
3203   std::string Filename = constructFilename(getPrintName(), Annotation, ".dot");
3204   outs() << "BOLT-DEBUG: Dumping CFG to " << Filename << "\n";
3205   dumpGraphToFile(Filename);
3206 }
3207 
3208 void BinaryFunction::dumpGraphToFile(std::string Filename) const {
3209   std::error_code EC;
3210   raw_fd_ostream of(Filename, EC, sys::fs::OF_None);
3211   if (EC) {
3212     if (opts::Verbosity >= 1) {
3213       errs() << "BOLT-WARNING: " << EC.message() << ", unable to open "
3214              << Filename << " for output.\n";
3215     }
3216     return;
3217   }
3218   dumpGraph(of);
3219 }
3220 
3221 bool BinaryFunction::validateCFG() const {
3222   bool Valid = true;
3223   for (BinaryBasicBlock *BB : BasicBlocks) {
3224     Valid &= BB->validateSuccessorInvariants();
3225   }
3226 
3227   if (!Valid)
3228     return Valid;
3229 
3230   // Make sure all blocks in CFG are valid.
3231   auto validateBlock = [this](const BinaryBasicBlock *BB, StringRef Desc) {
3232     if (!BB->isValid()) {
3233       errs() << "BOLT-ERROR: deleted " << Desc << " " << BB->getName()
3234              << " detected in:\n";
3235       this->dump();
3236       return false;
3237     }
3238     return true;
3239   };
3240   for (const BinaryBasicBlock *BB : BasicBlocks) {
3241     if (!validateBlock(BB, "block"))
3242       return false;
3243     for (const BinaryBasicBlock *PredBB : BB->predecessors())
3244       if (!validateBlock(PredBB, "predecessor"))
3245         return false;
3246     for (const BinaryBasicBlock *SuccBB : BB->successors())
3247       if (!validateBlock(SuccBB, "successor"))
3248         return false;
3249     for (const BinaryBasicBlock *LP : BB->landing_pads())
3250       if (!validateBlock(LP, "landing pad"))
3251         return false;
3252     for (const BinaryBasicBlock *Thrower : BB->throwers())
3253       if (!validateBlock(Thrower, "thrower"))
3254         return false;
3255   }
3256 
3257   for (const BinaryBasicBlock *BB : BasicBlocks) {
3258     std::unordered_set<const BinaryBasicBlock *> BBLandingPads;
3259     for (const BinaryBasicBlock *LP : BB->landing_pads()) {
3260       if (BBLandingPads.count(LP)) {
3261         errs() << "BOLT-ERROR: duplicate landing pad detected in"
3262                << BB->getName() << " in function " << *this << '\n';
3263         return false;
3264       }
3265       BBLandingPads.insert(LP);
3266     }
3267 
3268     std::unordered_set<const BinaryBasicBlock *> BBThrowers;
3269     for (const BinaryBasicBlock *Thrower : BB->throwers()) {
3270       if (BBThrowers.count(Thrower)) {
3271         errs() << "BOLT-ERROR: duplicate thrower detected in"
3272                << BB->getName() << " in function " << *this << '\n';
3273         return false;
3274       }
3275       BBThrowers.insert(Thrower);
3276     }
3277 
3278     for (const BinaryBasicBlock *LPBlock : BB->landing_pads()) {
3279       if (std::find(LPBlock->throw_begin(), LPBlock->throw_end(), BB)
3280             == LPBlock->throw_end()) {
3281         errs() << "BOLT-ERROR: inconsistent landing pad detected in "
3282                << *this << ": " << BB->getName()
3283                << " is in LandingPads but not in " << LPBlock->getName()
3284                << " Throwers\n";
3285         return false;
3286       }
3287     }
3288     for (const BinaryBasicBlock *Thrower : BB->throwers()) {
3289       if (std::find(Thrower->lp_begin(), Thrower->lp_end(), BB)
3290             == Thrower->lp_end()) {
3291         errs() << "BOLT-ERROR: inconsistent thrower detected in "
3292                << *this << ": " << BB->getName()
3293                << " is in Throwers list but not in " << Thrower->getName()
3294                << " LandingPads\n";
3295         return false;
3296       }
3297     }
3298   }
3299 
3300   return Valid;
3301 }
3302 
3303 void BinaryFunction::fixBranches() {
3304   auto &MIB = BC.MIB;
3305   MCContext *Ctx = BC.Ctx.get();
3306 
3307   for (unsigned I = 0, E = BasicBlocksLayout.size(); I != E; ++I) {
3308     BinaryBasicBlock *BB = BasicBlocksLayout[I];
3309     const MCSymbol *TBB = nullptr;
3310     const MCSymbol *FBB = nullptr;
3311     MCInst *CondBranch = nullptr;
3312     MCInst *UncondBranch = nullptr;
3313     if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch))
3314       continue;
3315 
3316     // We will create unconditional branch with correct destination if needed.
3317     if (UncondBranch)
3318       BB->eraseInstruction(BB->findInstruction(UncondBranch));
3319 
3320     // Basic block that follows the current one in the final layout.
3321     const BinaryBasicBlock *NextBB = nullptr;
3322     if (I + 1 != E && BB->isCold() == BasicBlocksLayout[I + 1]->isCold())
3323       NextBB = BasicBlocksLayout[I + 1];
3324 
3325     if (BB->succ_size() == 1) {
3326       // __builtin_unreachable() could create a conditional branch that
3327       // falls-through into the next function - hence the block will have only
3328       // one valid successor. Since behaviour is undefined - we replace
3329       // the conditional branch with an unconditional if required.
3330       if (CondBranch)
3331         BB->eraseInstruction(BB->findInstruction(CondBranch));
3332       if (BB->getSuccessor() == NextBB)
3333         continue;
3334       BB->addBranchInstruction(BB->getSuccessor());
3335     } else if (BB->succ_size() == 2) {
3336       assert(CondBranch && "conditional branch expected");
3337       const BinaryBasicBlock *TSuccessor = BB->getConditionalSuccessor(true);
3338       const BinaryBasicBlock *FSuccessor = BB->getConditionalSuccessor(false);
3339       // Check whether we support reversing this branch direction
3340       const bool IsSupported =
3341           !MIB->isUnsupportedBranch(CondBranch->getOpcode());
3342       if (NextBB && NextBB == TSuccessor && IsSupported) {
3343         std::swap(TSuccessor, FSuccessor);
3344         {
3345           auto L = BC.scopeLock();
3346           MIB->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(), Ctx);
3347         }
3348         BB->swapConditionalSuccessors();
3349       } else {
3350         auto L = BC.scopeLock();
3351         MIB->replaceBranchTarget(*CondBranch, TSuccessor->getLabel(), Ctx);
3352       }
3353       if (TSuccessor == FSuccessor) {
3354         BB->removeDuplicateConditionalSuccessor(CondBranch);
3355       }
3356       if (!NextBB ||
3357           ((NextBB != TSuccessor || !IsSupported) && NextBB != FSuccessor)) {
3358         // If one of the branches is guaranteed to be "long" while the other
3359         // could be "short", then prioritize short for "taken". This will
3360         // generate a sequence 1 byte shorter on x86.
3361         if (IsSupported && BC.isX86() &&
3362             TSuccessor->isCold() != FSuccessor->isCold() &&
3363             BB->isCold() != TSuccessor->isCold()) {
3364           std::swap(TSuccessor, FSuccessor);
3365           {
3366             auto L = BC.scopeLock();
3367             MIB->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(),
3368                                         Ctx);
3369           }
3370           BB->swapConditionalSuccessors();
3371         }
3372         BB->addBranchInstruction(FSuccessor);
3373       }
3374     }
3375     // Cases where the number of successors is 0 (block ends with a
3376     // terminator) or more than 2 (switch table) don't require branch
3377     // instruction adjustments.
3378   }
3379   assert((!isSimple() || validateCFG())
3380          && "Invalid CFG detected after fixing branches");
3381 }
3382 
3383 void BinaryFunction::propagateGnuArgsSizeInfo(
3384     MCPlusBuilder::AllocatorIdTy AllocId) {
3385   assert(CurrentState == State::Disassembled && "unexpected function state");
3386 
3387   if (!hasEHRanges() || !usesGnuArgsSize())
3388     return;
3389 
3390   // The current value of DW_CFA_GNU_args_size affects all following
3391   // invoke instructions until the next CFI overrides it.
3392   // It is important to iterate basic blocks in the original order when
3393   // assigning the value.
3394   uint64_t CurrentGnuArgsSize = 0;
3395   for (BinaryBasicBlock *BB : BasicBlocks) {
3396     for (auto II = BB->begin(); II != BB->end(); ) {
3397       MCInst &Instr = *II;
3398       if (BC.MIB->isCFI(Instr)) {
3399         const MCCFIInstruction *CFI = getCFIFor(Instr);
3400         if (CFI->getOperation() == MCCFIInstruction::OpGnuArgsSize) {
3401           CurrentGnuArgsSize = CFI->getOffset();
3402           // Delete DW_CFA_GNU_args_size instructions and only regenerate
3403           // during the final code emission. The information is embedded
3404           // inside call instructions.
3405           II = BB->erasePseudoInstruction(II);
3406           continue;
3407         }
3408       } else if (BC.MIB->isInvoke(Instr)) {
3409         // Add the value of GNU_args_size as an extra operand to invokes.
3410         BC.MIB->addGnuArgsSize(Instr, CurrentGnuArgsSize, AllocId);
3411       }
3412       ++II;
3413     }
3414   }
3415 }
3416 
3417 void BinaryFunction::postProcessBranches() {
3418   if (!isSimple())
3419     return;
3420   for (BinaryBasicBlock *BB : BasicBlocksLayout) {
3421     auto LastInstrRI = BB->getLastNonPseudo();
3422     if (BB->succ_size() == 1) {
3423       if (LastInstrRI != BB->rend() &&
3424           BC.MIB->isConditionalBranch(*LastInstrRI)) {
3425         // __builtin_unreachable() could create a conditional branch that
3426         // falls-through into the next function - hence the block will have only
3427         // one valid successor. Such behaviour is undefined and thus we remove
3428         // the conditional branch while leaving a valid successor.
3429         BB->eraseInstruction(std::prev(LastInstrRI.base()));
3430         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: erasing conditional branch in "
3431                           << BB->getName() << " in function " << *this << '\n');
3432       }
3433     } else if (BB->succ_size() == 0) {
3434       // Ignore unreachable basic blocks.
3435       if (BB->pred_size() == 0 || BB->isLandingPad())
3436         continue;
3437 
3438       // If it's the basic block that does not end up with a terminator - we
3439       // insert a return instruction unless it's a call instruction.
3440       if (LastInstrRI == BB->rend()) {
3441         LLVM_DEBUG(
3442             dbgs() << "BOLT-DEBUG: at least one instruction expected in BB "
3443                    << BB->getName() << " in function " << *this << '\n');
3444         continue;
3445       }
3446       if (!BC.MIB->isTerminator(*LastInstrRI) &&
3447           !BC.MIB->isCall(*LastInstrRI)) {
3448         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding return to basic block "
3449                           << BB->getName() << " in function " << *this << '\n');
3450         MCInst ReturnInstr;
3451         BC.MIB->createReturn(ReturnInstr);
3452         BB->addInstruction(ReturnInstr);
3453       }
3454     }
3455   }
3456   assert(validateCFG() && "invalid CFG");
3457 }
3458 
3459 MCSymbol *BinaryFunction::addEntryPointAtOffset(uint64_t Offset) {
3460   assert(Offset && "cannot add primary entry point");
3461   assert(CurrentState == State::Empty || CurrentState == State::Disassembled);
3462 
3463   const uint64_t EntryPointAddress = getAddress() + Offset;
3464   MCSymbol *LocalSymbol = getOrCreateLocalLabel(EntryPointAddress);
3465 
3466   MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(LocalSymbol);
3467   if (EntrySymbol)
3468     return EntrySymbol;
3469 
3470   if (BinaryData *EntryBD = BC.getBinaryDataAtAddress(EntryPointAddress)) {
3471     EntrySymbol = EntryBD->getSymbol();
3472   } else {
3473     EntrySymbol =
3474       BC.getOrCreateGlobalSymbol(EntryPointAddress,
3475                                  Twine("__ENTRY_") + getOneName() + "@");
3476   }
3477   SecondaryEntryPoints[LocalSymbol] = EntrySymbol;
3478 
3479   BC.setSymbolToFunctionMap(EntrySymbol, this);
3480 
3481   return EntrySymbol;
3482 }
3483 
3484 MCSymbol *BinaryFunction::addEntryPoint(const BinaryBasicBlock &BB) {
3485   assert(CurrentState == State::CFG &&
3486          "basic block can be added as an entry only in a function with CFG");
3487 
3488   if (&BB == BasicBlocks.front())
3489     return getSymbol();
3490 
3491   MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(BB);
3492   if (EntrySymbol)
3493     return EntrySymbol;
3494 
3495   EntrySymbol =
3496     BC.Ctx->getOrCreateSymbol("__ENTRY_" + BB.getLabel()->getName());
3497 
3498   SecondaryEntryPoints[BB.getLabel()] = EntrySymbol;
3499 
3500   BC.setSymbolToFunctionMap(EntrySymbol, this);
3501 
3502   return EntrySymbol;
3503 }
3504 
3505 MCSymbol *BinaryFunction::getSymbolForEntryID(uint64_t EntryID) {
3506   if (EntryID == 0)
3507     return getSymbol();
3508 
3509   if (!isMultiEntry())
3510     return nullptr;
3511 
3512   uint64_t NumEntries = 0;
3513   if (hasCFG()) {
3514     for (BinaryBasicBlock *BB : BasicBlocks) {
3515       MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
3516       if (!EntrySymbol)
3517         continue;
3518       if (NumEntries == EntryID)
3519         return EntrySymbol;
3520       ++NumEntries;
3521     }
3522   } else {
3523     for (std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3524       MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3525       if (!EntrySymbol)
3526         continue;
3527       if (NumEntries == EntryID)
3528         return EntrySymbol;
3529       ++NumEntries;
3530     }
3531   }
3532 
3533   return nullptr;
3534 }
3535 
3536 uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
3537   if (!isMultiEntry())
3538     return 0;
3539 
3540   for (const MCSymbol *FunctionSymbol : getSymbols())
3541     if (FunctionSymbol == Symbol)
3542       return 0;
3543 
3544   // Check all secondary entries available as either basic blocks or lables.
3545   uint64_t NumEntries = 0;
3546   for (const BinaryBasicBlock *BB : BasicBlocks) {
3547     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
3548     if (!EntrySymbol)
3549       continue;
3550     if (EntrySymbol == Symbol)
3551       return NumEntries;
3552     ++NumEntries;
3553   }
3554   NumEntries = 0;
3555   for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3556     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3557     if (!EntrySymbol)
3558       continue;
3559     if (EntrySymbol == Symbol)
3560       return NumEntries;
3561     ++NumEntries;
3562   }
3563 
3564   llvm_unreachable("symbol not found");
3565 }
3566 
3567 bool BinaryFunction::forEachEntryPoint(EntryPointCallbackTy Callback) const {
3568   bool Status = Callback(0, getSymbol());
3569   if (!isMultiEntry())
3570     return Status;
3571 
3572   for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3573     if (!Status)
3574       break;
3575 
3576     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3577     if (!EntrySymbol)
3578       continue;
3579 
3580     Status = Callback(KV.first, EntrySymbol);
3581   }
3582 
3583   return Status;
3584 }
3585 
3586 BinaryFunction::BasicBlockOrderType BinaryFunction::dfs() const {
3587   BasicBlockOrderType DFS;
3588   unsigned Index = 0;
3589   std::stack<BinaryBasicBlock *> Stack;
3590 
3591   // Push entry points to the stack in reverse order.
3592   //
3593   // NB: we rely on the original order of entries to match.
3594   for (auto BBI = layout_rbegin(); BBI != layout_rend(); ++BBI) {
3595     BinaryBasicBlock *BB = *BBI;
3596     if (isEntryPoint(*BB))
3597       Stack.push(BB);
3598     BB->setLayoutIndex(BinaryBasicBlock::InvalidIndex);
3599   }
3600 
3601   while (!Stack.empty()) {
3602     BinaryBasicBlock *BB = Stack.top();
3603     Stack.pop();
3604 
3605     if (BB->getLayoutIndex() != BinaryBasicBlock::InvalidIndex)
3606       continue;
3607 
3608     BB->setLayoutIndex(Index++);
3609     DFS.push_back(BB);
3610 
3611     for (BinaryBasicBlock *SuccBB : BB->landing_pads()) {
3612       Stack.push(SuccBB);
3613     }
3614 
3615     const MCSymbol *TBB = nullptr;
3616     const MCSymbol *FBB = nullptr;
3617     MCInst *CondBranch = nullptr;
3618     MCInst *UncondBranch = nullptr;
3619     if (BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch) &&
3620         CondBranch && BB->succ_size() == 2) {
3621       if (BC.MIB->getCanonicalBranchCondCode(BC.MIB->getCondCode(
3622               *CondBranch)) == BC.MIB->getCondCode(*CondBranch)) {
3623         Stack.push(BB->getConditionalSuccessor(true));
3624         Stack.push(BB->getConditionalSuccessor(false));
3625       } else {
3626         Stack.push(BB->getConditionalSuccessor(false));
3627         Stack.push(BB->getConditionalSuccessor(true));
3628       }
3629     } else {
3630       for (BinaryBasicBlock *SuccBB : BB->successors()) {
3631         Stack.push(SuccBB);
3632       }
3633     }
3634   }
3635 
3636   return DFS;
3637 }
3638 
3639 size_t BinaryFunction::computeHash(bool UseDFS,
3640                                    OperandHashFuncTy OperandHashFunc) const {
3641   if (size() == 0)
3642     return 0;
3643 
3644   assert(hasCFG() && "function is expected to have CFG");
3645 
3646   const BasicBlockOrderType &Order = UseDFS ? dfs() : BasicBlocksLayout;
3647 
3648   // The hash is computed by creating a string of all instruction opcodes and
3649   // possibly their operands and then hashing that string with std::hash.
3650   std::string HashString;
3651   for (const BinaryBasicBlock *BB : Order) {
3652     for (const MCInst &Inst : *BB) {
3653       unsigned Opcode = Inst.getOpcode();
3654 
3655       if (BC.MIB->isPseudo(Inst))
3656         continue;
3657 
3658       // Ignore unconditional jumps since we check CFG consistency by processing
3659       // basic blocks in order and do not rely on branches to be in-sync with
3660       // CFG. Note that we still use condition code of conditional jumps.
3661       if (BC.MIB->isUnconditionalBranch(Inst))
3662         continue;
3663 
3664       if (Opcode == 0)
3665         HashString.push_back(0);
3666 
3667       while (Opcode) {
3668         uint8_t LSB = Opcode & 0xff;
3669         HashString.push_back(LSB);
3670         Opcode = Opcode >> 8;
3671       }
3672 
3673       for (unsigned I = 0, E = MCPlus::getNumPrimeOperands(Inst); I != E; ++I) {
3674         HashString.append(OperandHashFunc(Inst.getOperand(I)));
3675       }
3676     }
3677   }
3678 
3679   return Hash = std::hash<std::string>{}(HashString);
3680 }
3681 
3682 void BinaryFunction::insertBasicBlocks(
3683     BinaryBasicBlock *Start,
3684     std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
3685     const bool UpdateLayout,
3686     const bool UpdateCFIState,
3687     const bool RecomputeLandingPads) {
3688   const auto StartIndex = Start ? getIndex(Start) : -1;
3689   const size_t NumNewBlocks = NewBBs.size();
3690 
3691   BasicBlocks.insert(BasicBlocks.begin() + (StartIndex + 1),
3692                      NumNewBlocks,
3693                      nullptr);
3694 
3695   auto I = StartIndex + 1;
3696   for (std::unique_ptr<BinaryBasicBlock> &BB : NewBBs) {
3697     assert(!BasicBlocks[I]);
3698     BasicBlocks[I++] = BB.release();
3699   }
3700 
3701   if (RecomputeLandingPads) {
3702     recomputeLandingPads();
3703   } else {
3704     updateBBIndices(0);
3705   }
3706 
3707   if (UpdateLayout) {
3708     updateLayout(Start, NumNewBlocks);
3709   }
3710 
3711   if (UpdateCFIState) {
3712     updateCFIState(Start, NumNewBlocks);
3713   }
3714 }
3715 
3716 BinaryFunction::iterator BinaryFunction::insertBasicBlocks(
3717     BinaryFunction::iterator StartBB,
3718     std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
3719     const bool UpdateLayout,
3720     const bool UpdateCFIState,
3721     const bool RecomputeLandingPads) {
3722   const unsigned StartIndex = getIndex(&*StartBB);
3723   const size_t NumNewBlocks = NewBBs.size();
3724 
3725   BasicBlocks.insert(BasicBlocks.begin() + StartIndex + 1, NumNewBlocks,
3726                      nullptr);
3727   auto RetIter = BasicBlocks.begin() + StartIndex + 1;
3728 
3729   unsigned I = StartIndex + 1;
3730   for (std::unique_ptr<BinaryBasicBlock> &BB : NewBBs) {
3731     assert(!BasicBlocks[I]);
3732     BasicBlocks[I++] = BB.release();
3733   }
3734 
3735   if (RecomputeLandingPads) {
3736     recomputeLandingPads();
3737   } else {
3738     updateBBIndices(0);
3739   }
3740 
3741   if (UpdateLayout) {
3742     updateLayout(*std::prev(RetIter), NumNewBlocks);
3743   }
3744 
3745   if (UpdateCFIState) {
3746     updateCFIState(*std::prev(RetIter), NumNewBlocks);
3747   }
3748 
3749   return RetIter;
3750 }
3751 
3752 void BinaryFunction::updateBBIndices(const unsigned StartIndex) {
3753   for (unsigned I = StartIndex; I < BasicBlocks.size(); ++I) {
3754     BasicBlocks[I]->Index = I;
3755   }
3756 }
3757 
3758 void BinaryFunction::updateCFIState(BinaryBasicBlock *Start,
3759                                     const unsigned NumNewBlocks) {
3760   const int32_t CFIState = Start->getCFIStateAtExit();
3761   const unsigned StartIndex = getIndex(Start) + 1;
3762   for (unsigned I = 0; I < NumNewBlocks; ++I) {
3763     BasicBlocks[StartIndex + I]->setCFIState(CFIState);
3764   }
3765 }
3766 
3767 void BinaryFunction::updateLayout(BinaryBasicBlock *Start,
3768                                   const unsigned NumNewBlocks) {
3769   // If start not provided insert new blocks at the beginning
3770   if (!Start) {
3771     BasicBlocksLayout.insert(layout_begin(), BasicBlocks.begin(),
3772                              BasicBlocks.begin() + NumNewBlocks);
3773     updateLayoutIndices();
3774     return;
3775   }
3776 
3777   // Insert new blocks in the layout immediately after Start.
3778   auto Pos = std::find(layout_begin(), layout_end(), Start);
3779   assert(Pos != layout_end());
3780   BasicBlockListType::iterator Begin =
3781       std::next(BasicBlocks.begin(), getIndex(Start) + 1);
3782   BasicBlockListType::iterator End =
3783       std::next(BasicBlocks.begin(), getIndex(Start) + NumNewBlocks + 1);
3784   BasicBlocksLayout.insert(Pos + 1, Begin, End);
3785   updateLayoutIndices();
3786 }
3787 
3788 bool BinaryFunction::checkForAmbiguousJumpTables() {
3789   SmallSet<uint64_t, 4> JumpTables;
3790   for (BinaryBasicBlock *&BB : BasicBlocks) {
3791     for (MCInst &Inst : *BB) {
3792       if (!BC.MIB->isIndirectBranch(Inst))
3793         continue;
3794       uint64_t JTAddress = BC.MIB->getJumpTable(Inst);
3795       if (!JTAddress)
3796         continue;
3797       // This address can be inside another jump table, but we only consider
3798       // it ambiguous when the same start address is used, not the same JT
3799       // object.
3800       if (!JumpTables.count(JTAddress)) {
3801         JumpTables.insert(JTAddress);
3802         continue;
3803       }
3804       return true;
3805     }
3806   }
3807   return false;
3808 }
3809 
3810 void BinaryFunction::disambiguateJumpTables(
3811     MCPlusBuilder::AllocatorIdTy AllocId) {
3812   assert((opts::JumpTables != JTS_BASIC && isSimple()) || !BC.HasRelocations);
3813   SmallPtrSet<JumpTable *, 4> JumpTables;
3814   for (BinaryBasicBlock *&BB : BasicBlocks) {
3815     for (MCInst &Inst : *BB) {
3816       if (!BC.MIB->isIndirectBranch(Inst))
3817         continue;
3818       JumpTable *JT = getJumpTable(Inst);
3819       if (!JT)
3820         continue;
3821       auto Iter = JumpTables.find(JT);
3822       if (Iter == JumpTables.end()) {
3823         JumpTables.insert(JT);
3824         continue;
3825       }
3826       // This instruction is an indirect jump using a jump table, but it is
3827       // using the same jump table of another jump. Try all our tricks to
3828       // extract the jump table symbol and make it point to a new, duplicated JT
3829       MCPhysReg BaseReg1;
3830       uint64_t Scale;
3831       const MCSymbol *Target;
3832       // In case we match if our first matcher, first instruction is the one to
3833       // patch
3834       MCInst *JTLoadInst = &Inst;
3835       // Try a standard indirect jump matcher, scale 8
3836       std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher =
3837           BC.MIB->matchIndJmp(BC.MIB->matchReg(BaseReg1),
3838                               BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3839                               /*Offset=*/BC.MIB->matchSymbol(Target));
3840       if (!IndJmpMatcher->match(
3841               *BC.MRI, *BC.MIB,
3842               MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3843           BaseReg1 != BC.MIB->getNoRegister() ||
3844           Scale != 8) {
3845         MCPhysReg BaseReg2;
3846         uint64_t Offset;
3847         // Standard JT matching failed. Trying now:
3848         //     movq  "jt.2397/1"(,%rax,8), %rax
3849         //     jmpq  *%rax
3850         std::unique_ptr<MCPlusBuilder::MCInstMatcher> LoadMatcherOwner =
3851             BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg1),
3852                               BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3853                               /*Offset=*/BC.MIB->matchSymbol(Target));
3854         MCPlusBuilder::MCInstMatcher *LoadMatcher = LoadMatcherOwner.get();
3855         std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher2 =
3856             BC.MIB->matchIndJmp(std::move(LoadMatcherOwner));
3857         if (!IndJmpMatcher2->match(
3858                 *BC.MRI, *BC.MIB,
3859                 MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3860             BaseReg1 != BC.MIB->getNoRegister() || Scale != 8) {
3861           // JT matching failed. Trying now:
3862           // PIC-style matcher, scale 4
3863           //    addq    %rdx, %rsi
3864           //    addq    %rdx, %rdi
3865           //    leaq    DATAat0x402450(%rip), %r11
3866           //    movslq  (%r11,%rdx,4), %rcx
3867           //    addq    %r11, %rcx
3868           //    jmpq    *%rcx # JUMPTABLE @0x402450
3869           std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICIndJmpMatcher =
3870               BC.MIB->matchIndJmp(BC.MIB->matchAdd(
3871                   BC.MIB->matchReg(BaseReg1),
3872                   BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg2),
3873                                     BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3874                                     BC.MIB->matchImm(Offset))));
3875           std::unique_ptr<MCPlusBuilder::MCInstMatcher> LEAMatcherOwner =
3876               BC.MIB->matchLoadAddr(BC.MIB->matchSymbol(Target));
3877           MCPlusBuilder::MCInstMatcher *LEAMatcher = LEAMatcherOwner.get();
3878           std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICBaseAddrMatcher =
3879               BC.MIB->matchIndJmp(BC.MIB->matchAdd(std::move(LEAMatcherOwner),
3880                                                    BC.MIB->matchAnyOperand()));
3881           if (!PICIndJmpMatcher->match(
3882                   *BC.MRI, *BC.MIB,
3883                   MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3884               Scale != 4 || BaseReg1 != BaseReg2 || Offset != 0 ||
3885               !PICBaseAddrMatcher->match(
3886                   *BC.MRI, *BC.MIB,
3887                   MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1)) {
3888             llvm_unreachable("Failed to extract jump table base");
3889             continue;
3890           }
3891           // Matched PIC, identify the instruction with the reference to the JT
3892           JTLoadInst = LEAMatcher->CurInst;
3893         } else {
3894           // Matched non-PIC
3895           JTLoadInst = LoadMatcher->CurInst;
3896         }
3897       }
3898 
3899       uint64_t NewJumpTableID = 0;
3900       const MCSymbol *NewJTLabel;
3901       std::tie(NewJumpTableID, NewJTLabel) =
3902           BC.duplicateJumpTable(*this, JT, Target);
3903       {
3904         auto L = BC.scopeLock();
3905         BC.MIB->replaceMemOperandDisp(*JTLoadInst, NewJTLabel, BC.Ctx.get());
3906       }
3907       // We use a unique ID with the high bit set as address for this "injected"
3908       // jump table (not originally in the input binary).
3909       BC.MIB->setJumpTable(Inst, NewJumpTableID, 0, AllocId);
3910     }
3911   }
3912 }
3913 
3914 bool BinaryFunction::replaceJumpTableEntryIn(BinaryBasicBlock *BB,
3915                                              BinaryBasicBlock *OldDest,
3916                                              BinaryBasicBlock *NewDest) {
3917   MCInst *Instr = BB->getLastNonPseudoInstr();
3918   if (!Instr || !BC.MIB->isIndirectBranch(*Instr))
3919     return false;
3920   uint64_t JTAddress = BC.MIB->getJumpTable(*Instr);
3921   assert(JTAddress && "Invalid jump table address");
3922   JumpTable *JT = getJumpTableContainingAddress(JTAddress);
3923   assert(JT && "No jump table structure for this indirect branch");
3924   bool Patched = JT->replaceDestination(JTAddress, OldDest->getLabel(),
3925                                         NewDest->getLabel());
3926   (void)Patched;
3927   assert(Patched && "Invalid entry to be replaced in jump table");
3928   return true;
3929 }
3930 
3931 BinaryBasicBlock *BinaryFunction::splitEdge(BinaryBasicBlock *From,
3932                                             BinaryBasicBlock *To) {
3933   // Create intermediate BB
3934   MCSymbol *Tmp;
3935   {
3936     auto L = BC.scopeLock();
3937     Tmp = BC.Ctx->createNamedTempSymbol("SplitEdge");
3938   }
3939   // Link new BBs to the original input offset of the From BB, so we can map
3940   // samples recorded in new BBs back to the original BB seem in the input
3941   // binary (if using BAT)
3942   std::unique_ptr<BinaryBasicBlock> NewBB =
3943       createBasicBlock(From->getInputOffset(), Tmp);
3944   BinaryBasicBlock *NewBBPtr = NewBB.get();
3945 
3946   // Update "From" BB
3947   auto I = From->succ_begin();
3948   auto BI = From->branch_info_begin();
3949   for (; I != From->succ_end(); ++I) {
3950     if (*I == To)
3951       break;
3952     ++BI;
3953   }
3954   assert(I != From->succ_end() && "Invalid CFG edge in splitEdge!");
3955   uint64_t OrigCount = BI->Count;
3956   uint64_t OrigMispreds = BI->MispredictedCount;
3957   replaceJumpTableEntryIn(From, To, NewBBPtr);
3958   From->replaceSuccessor(To, NewBBPtr, OrigCount, OrigMispreds);
3959 
3960   NewBB->addSuccessor(To, OrigCount, OrigMispreds);
3961   NewBB->setExecutionCount(OrigCount);
3962   NewBB->setIsCold(From->isCold());
3963 
3964   // Update CFI and BB layout with new intermediate BB
3965   std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs;
3966   NewBBs.emplace_back(std::move(NewBB));
3967   insertBasicBlocks(From, std::move(NewBBs), true, true,
3968                     /*RecomputeLandingPads=*/false);
3969   return NewBBPtr;
3970 }
3971 
3972 void BinaryFunction::deleteConservativeEdges() {
3973   // Our goal is to aggressively remove edges from the CFG that we believe are
3974   // wrong. This is used for instrumentation, where it is safe to remove
3975   // fallthrough edges because we won't reorder blocks.
3976   for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) {
3977     BinaryBasicBlock *BB = *I;
3978     if (BB->succ_size() != 1 || BB->size() == 0)
3979       continue;
3980 
3981     auto NextBB = std::next(I);
3982     MCInst* Last = BB->getLastNonPseudoInstr();
3983     // Fallthrough is a landing pad? Delete this edge (as long as we don't
3984     // have a direct jump to it)
3985     if ((*BB->succ_begin())->isLandingPad() && NextBB != E &&
3986         *BB->succ_begin() == *NextBB && Last && !BC.MIB->isBranch(*Last)) {
3987       BB->removeAllSuccessors();
3988       continue;
3989     }
3990 
3991     // Look for suspicious calls at the end of BB where gcc may optimize it and
3992     // remove the jump to the epilogue when it knows the call won't return.
3993     if (!Last || !BC.MIB->isCall(*Last))
3994       continue;
3995 
3996     const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(*Last);
3997     if (!CalleeSymbol)
3998       continue;
3999 
4000     StringRef CalleeName = CalleeSymbol->getName();
4001     if (CalleeName != "__cxa_throw@PLT" &&
4002         CalleeName != "_Unwind_Resume@PLT" &&
4003         CalleeName != "__cxa_rethrow@PLT" &&
4004         CalleeName != "exit@PLT" &&
4005         CalleeName != "abort@PLT" )
4006       continue;
4007 
4008     BB->removeAllSuccessors();
4009   }
4010 }
4011 
4012 bool BinaryFunction::isDataMarker(const SymbolRef &Symbol,
4013                                   uint64_t SymbolSize) const {
4014   // For aarch64, the ABI defines mapping symbols so we identify data in the
4015   // code section (see IHI0056B). $d identifies a symbol starting data contents.
4016   if (BC.isAArch64() && Symbol.getType() &&
4017       cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && SymbolSize == 0 &&
4018       Symbol.getName() &&
4019       (cantFail(Symbol.getName()) == "$d" ||
4020        cantFail(Symbol.getName()).startswith("$d.")))
4021     return true;
4022   return false;
4023 }
4024 
4025 bool BinaryFunction::isCodeMarker(const SymbolRef &Symbol,
4026                                   uint64_t SymbolSize) const {
4027   // For aarch64, the ABI defines mapping symbols so we identify data in the
4028   // code section (see IHI0056B). $x identifies a symbol starting code or the
4029   // end of a data chunk inside code.
4030   if (BC.isAArch64() && Symbol.getType() &&
4031       cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && SymbolSize == 0 &&
4032       Symbol.getName() &&
4033       (cantFail(Symbol.getName()) == "$x" ||
4034        cantFail(Symbol.getName()).startswith("$x.")))
4035     return true;
4036   return false;
4037 }
4038 
4039 bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
4040                                           uint64_t SymbolSize) const {
4041   // If this symbol is in a different section from the one where the
4042   // function symbol is, don't consider it as valid.
4043   if (!getOriginSection()->containsAddress(
4044           cantFail(Symbol.getAddress(), "cannot get symbol address")))
4045     return false;
4046 
4047   // Some symbols are tolerated inside function bodies, others are not.
4048   // The real function boundaries may not be known at this point.
4049   if (isDataMarker(Symbol, SymbolSize) || isCodeMarker(Symbol, SymbolSize))
4050     return true;
4051 
4052   // It's okay to have a zero-sized symbol in the middle of non-zero-sized
4053   // function.
4054   if (SymbolSize == 0 && containsAddress(cantFail(Symbol.getAddress())))
4055     return true;
4056 
4057   if (cantFail(Symbol.getType()) != SymbolRef::ST_Unknown)
4058     return false;
4059 
4060   if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)
4061     return false;
4062 
4063   return true;
4064 }
4065 
4066 void BinaryFunction::adjustExecutionCount(uint64_t Count) {
4067   if (getKnownExecutionCount() == 0 || Count == 0)
4068     return;
4069 
4070   if (ExecutionCount < Count)
4071     Count = ExecutionCount;
4072 
4073   double AdjustmentRatio = ((double) ExecutionCount - Count) / ExecutionCount;
4074   if (AdjustmentRatio < 0.0)
4075     AdjustmentRatio = 0.0;
4076 
4077   for (BinaryBasicBlock *&BB : layout())
4078     BB->adjustExecutionCount(AdjustmentRatio);
4079 
4080   ExecutionCount -= Count;
4081 }
4082 
4083 BinaryFunction::~BinaryFunction() {
4084   for (BinaryBasicBlock *BB : BasicBlocks) {
4085     delete BB;
4086   }
4087   for (BinaryBasicBlock *BB : DeletedBasicBlocks) {
4088     delete BB;
4089   }
4090 }
4091 
4092 void BinaryFunction::calculateLoopInfo() {
4093   // Discover loops.
4094   BinaryDominatorTree DomTree;
4095   DomTree.recalculate(*this);
4096   BLI.reset(new BinaryLoopInfo());
4097   BLI->analyze(DomTree);
4098 
4099   // Traverse discovered loops and add depth and profile information.
4100   std::stack<BinaryLoop *> St;
4101   for (auto I = BLI->begin(), E = BLI->end(); I != E; ++I) {
4102     St.push(*I);
4103     ++BLI->OuterLoops;
4104   }
4105 
4106   while (!St.empty()) {
4107     BinaryLoop *L = St.top();
4108     St.pop();
4109     ++BLI->TotalLoops;
4110     BLI->MaximumDepth = std::max(L->getLoopDepth(), BLI->MaximumDepth);
4111 
4112     // Add nested loops in the stack.
4113     for (BinaryLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
4114       St.push(*I);
4115     }
4116 
4117     // Skip if no valid profile is found.
4118     if (!hasValidProfile()) {
4119       L->EntryCount = COUNT_NO_PROFILE;
4120       L->ExitCount = COUNT_NO_PROFILE;
4121       L->TotalBackEdgeCount = COUNT_NO_PROFILE;
4122       continue;
4123     }
4124 
4125     // Compute back edge count.
4126     SmallVector<BinaryBasicBlock *, 1> Latches;
4127     L->getLoopLatches(Latches);
4128 
4129     for (BinaryBasicBlock *Latch : Latches) {
4130       auto BI = Latch->branch_info_begin();
4131       for (BinaryBasicBlock *Succ : Latch->successors()) {
4132         if (Succ == L->getHeader()) {
4133           assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
4134                  "profile data not found");
4135           L->TotalBackEdgeCount += BI->Count;
4136         }
4137         ++BI;
4138       }
4139     }
4140 
4141     // Compute entry count.
4142     L->EntryCount = L->getHeader()->getExecutionCount() - L->TotalBackEdgeCount;
4143 
4144     // Compute exit count.
4145     SmallVector<BinaryLoop::Edge, 1> ExitEdges;
4146     L->getExitEdges(ExitEdges);
4147     for (BinaryLoop::Edge &Exit : ExitEdges) {
4148       const BinaryBasicBlock *Exiting = Exit.first;
4149       const BinaryBasicBlock *ExitTarget = Exit.second;
4150       auto BI = Exiting->branch_info_begin();
4151       for (BinaryBasicBlock *Succ : Exiting->successors()) {
4152         if (Succ == ExitTarget) {
4153           assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
4154                  "profile data not found");
4155           L->ExitCount += BI->Count;
4156         }
4157         ++BI;
4158       }
4159     }
4160   }
4161 }
4162 
4163 void BinaryFunction::updateOutputValues(const MCAsmLayout &Layout) {
4164   if (!isEmitted()) {
4165     assert(!isInjected() && "injected function should be emitted");
4166     setOutputAddress(getAddress());
4167     setOutputSize(getSize());
4168     return;
4169   }
4170 
4171   const uint64_t BaseAddress = getCodeSection()->getOutputAddress();
4172   ErrorOr<BinarySection &> ColdSection = getColdCodeSection();
4173   const uint64_t ColdBaseAddress =
4174       isSplit() ? ColdSection->getOutputAddress() : 0;
4175   if (BC.HasRelocations || isInjected()) {
4176     const uint64_t StartOffset = Layout.getSymbolOffset(*getSymbol());
4177     const uint64_t EndOffset = Layout.getSymbolOffset(*getFunctionEndLabel());
4178     setOutputAddress(BaseAddress + StartOffset);
4179     setOutputSize(EndOffset - StartOffset);
4180     if (hasConstantIsland()) {
4181       const uint64_t DataOffset =
4182           Layout.getSymbolOffset(*getFunctionConstantIslandLabel());
4183       setOutputDataAddress(BaseAddress + DataOffset);
4184     }
4185     if (isSplit()) {
4186       const MCSymbol *ColdStartSymbol = getColdSymbol();
4187       assert(ColdStartSymbol && ColdStartSymbol->isDefined() &&
4188              "split function should have defined cold symbol");
4189       const MCSymbol *ColdEndSymbol = getFunctionColdEndLabel();
4190       assert(ColdEndSymbol && ColdEndSymbol->isDefined() &&
4191              "split function should have defined cold end symbol");
4192       const uint64_t ColdStartOffset = Layout.getSymbolOffset(*ColdStartSymbol);
4193       const uint64_t ColdEndOffset = Layout.getSymbolOffset(*ColdEndSymbol);
4194       cold().setAddress(ColdBaseAddress + ColdStartOffset);
4195       cold().setImageSize(ColdEndOffset - ColdStartOffset);
4196       if (hasConstantIsland()) {
4197         const uint64_t DataOffset =
4198             Layout.getSymbolOffset(*getFunctionColdConstantIslandLabel());
4199         setOutputColdDataAddress(ColdBaseAddress + DataOffset);
4200       }
4201     }
4202   } else {
4203     setOutputAddress(getAddress());
4204     setOutputSize(
4205         Layout.getSymbolOffset(*getFunctionEndLabel()));
4206   }
4207 
4208   // Update basic block output ranges for the debug info, if we have
4209   // secondary entry points in the symbol table to update or if writing BAT.
4210   if (!opts::UpdateDebugSections && !isMultiEntry() &&
4211       !requiresAddressTranslation())
4212     return;
4213 
4214   // Output ranges should match the input if the body hasn't changed.
4215   if (!isSimple() && !BC.HasRelocations)
4216     return;
4217 
4218   // AArch64 may have functions that only contains a constant island (no code).
4219   if (layout_begin() == layout_end())
4220     return;
4221 
4222   BinaryBasicBlock *PrevBB = nullptr;
4223   for (auto BBI = layout_begin(), BBE = layout_end(); BBI != BBE; ++BBI) {
4224     BinaryBasicBlock *BB = *BBI;
4225     assert(BB->getLabel()->isDefined() && "symbol should be defined");
4226     const uint64_t BBBaseAddress = BB->isCold() ? ColdBaseAddress : BaseAddress;
4227     if (!BC.HasRelocations) {
4228       if (BB->isCold()) {
4229         assert(BBBaseAddress == cold().getAddress());
4230       } else {
4231         assert(BBBaseAddress == getOutputAddress());
4232       }
4233     }
4234     const uint64_t BBOffset = Layout.getSymbolOffset(*BB->getLabel());
4235     const uint64_t BBAddress = BBBaseAddress + BBOffset;
4236     BB->setOutputStartAddress(BBAddress);
4237 
4238     if (PrevBB) {
4239       uint64_t PrevBBEndAddress = BBAddress;
4240       if (BB->isCold() != PrevBB->isCold()) {
4241         PrevBBEndAddress =
4242             getOutputAddress() + getOutputSize();
4243       }
4244       PrevBB->setOutputEndAddress(PrevBBEndAddress);
4245     }
4246     PrevBB = BB;
4247 
4248     BB->updateOutputValues(Layout);
4249   }
4250   PrevBB->setOutputEndAddress(PrevBB->isCold() ?
4251       cold().getAddress() + cold().getImageSize() :
4252       getOutputAddress() + getOutputSize());
4253 }
4254 
4255 DebugAddressRangesVector BinaryFunction::getOutputAddressRanges() const {
4256   DebugAddressRangesVector OutputRanges;
4257 
4258   if (isFolded())
4259     return OutputRanges;
4260 
4261   if (IsFragment)
4262     return OutputRanges;
4263 
4264   OutputRanges.emplace_back(getOutputAddress(),
4265                             getOutputAddress() + getOutputSize());
4266   if (isSplit()) {
4267     assert(isEmitted() && "split function should be emitted");
4268     OutputRanges.emplace_back(cold().getAddress(),
4269                               cold().getAddress() + cold().getImageSize());
4270   }
4271 
4272   if (isSimple())
4273     return OutputRanges;
4274 
4275   for (BinaryFunction *Frag : Fragments) {
4276     assert(!Frag->isSimple() &&
4277            "fragment of non-simple function should also be non-simple");
4278     OutputRanges.emplace_back(Frag->getOutputAddress(),
4279                               Frag->getOutputAddress() + Frag->getOutputSize());
4280   }
4281 
4282   return OutputRanges;
4283 }
4284 
4285 uint64_t BinaryFunction::translateInputToOutputAddress(uint64_t Address) const {
4286   if (isFolded())
4287     return 0;
4288 
4289   // If the function hasn't changed return the same address.
4290   if (!isEmitted())
4291     return Address;
4292 
4293   if (Address < getAddress())
4294     return 0;
4295 
4296   // Check if the address is associated with an instruction that is tracked
4297   // by address translation.
4298   auto KV = InputOffsetToAddressMap.find(Address - getAddress());
4299   if (KV != InputOffsetToAddressMap.end()) {
4300     return KV->second;
4301   }
4302 
4303   // FIXME: #18950828 - we rely on relative offsets inside basic blocks to stay
4304   //        intact. Instead we can use pseudo instructions and/or annotations.
4305   const uint64_t Offset = Address - getAddress();
4306   const BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
4307   if (!BB) {
4308     // Special case for address immediately past the end of the function.
4309     if (Offset == getSize())
4310       return getOutputAddress() + getOutputSize();
4311 
4312     return 0;
4313   }
4314 
4315   return std::min(BB->getOutputAddressRange().first + Offset - BB->getOffset(),
4316                   BB->getOutputAddressRange().second);
4317 }
4318 
4319 DebugAddressRangesVector BinaryFunction::translateInputToOutputRanges(
4320     const DWARFAddressRangesVector &InputRanges) const {
4321   DebugAddressRangesVector OutputRanges;
4322 
4323   if (isFolded())
4324     return OutputRanges;
4325 
4326   // If the function hasn't changed return the same ranges.
4327   if (!isEmitted()) {
4328     OutputRanges.resize(InputRanges.size());
4329     std::transform(InputRanges.begin(), InputRanges.end(),
4330                    OutputRanges.begin(),
4331                    [](const DWARFAddressRange &Range) {
4332                      return DebugAddressRange(Range.LowPC, Range.HighPC);
4333                    });
4334     return OutputRanges;
4335   }
4336 
4337   // Even though we will merge ranges in a post-processing pass, we attempt to
4338   // merge them in a main processing loop as it improves the processing time.
4339   uint64_t PrevEndAddress = 0;
4340   for (const DWARFAddressRange &Range : InputRanges) {
4341     if (!containsAddress(Range.LowPC)) {
4342       LLVM_DEBUG(
4343           dbgs() << "BOLT-DEBUG: invalid debug address range detected for "
4344                  << *this << " : [0x" << Twine::utohexstr(Range.LowPC) << ", 0x"
4345                  << Twine::utohexstr(Range.HighPC) << "]\n");
4346       PrevEndAddress = 0;
4347       continue;
4348     }
4349     uint64_t InputOffset = Range.LowPC - getAddress();
4350     const uint64_t InputEndOffset =
4351         std::min(Range.HighPC - getAddress(), getSize());
4352 
4353     auto BBI = std::upper_bound(BasicBlockOffsets.begin(),
4354                                 BasicBlockOffsets.end(),
4355                                 BasicBlockOffset(InputOffset, nullptr),
4356                                 CompareBasicBlockOffsets());
4357     --BBI;
4358     do {
4359       const BinaryBasicBlock *BB = BBI->second;
4360       if (InputOffset < BB->getOffset() || InputOffset >= BB->getEndOffset()) {
4361         LLVM_DEBUG(
4362             dbgs() << "BOLT-DEBUG: invalid debug address range detected for "
4363                    << *this << " : [0x" << Twine::utohexstr(Range.LowPC)
4364                    << ", 0x" << Twine::utohexstr(Range.HighPC) << "]\n");
4365         PrevEndAddress = 0;
4366         break;
4367       }
4368 
4369       // Skip the range if the block was deleted.
4370       if (const uint64_t OutputStart = BB->getOutputAddressRange().first) {
4371         const uint64_t StartAddress =
4372             OutputStart + InputOffset - BB->getOffset();
4373         uint64_t EndAddress = BB->getOutputAddressRange().second;
4374         if (InputEndOffset < BB->getEndOffset())
4375           EndAddress = StartAddress + InputEndOffset - InputOffset;
4376 
4377         if (StartAddress == PrevEndAddress) {
4378           OutputRanges.back().HighPC = std::max(OutputRanges.back().HighPC,
4379                                                 EndAddress);
4380         } else {
4381           OutputRanges.emplace_back(StartAddress,
4382                                     std::max(StartAddress, EndAddress));
4383         }
4384         PrevEndAddress = OutputRanges.back().HighPC;
4385       }
4386 
4387       InputOffset = BB->getEndOffset();
4388       ++BBI;
4389     } while (InputOffset < InputEndOffset);
4390   }
4391 
4392   // Post-processing pass to sort and merge ranges.
4393   std::sort(OutputRanges.begin(), OutputRanges.end());
4394   DebugAddressRangesVector MergedRanges;
4395   PrevEndAddress = 0;
4396   for (const DebugAddressRange &Range : OutputRanges) {
4397     if (Range.LowPC <= PrevEndAddress) {
4398       MergedRanges.back().HighPC = std::max(MergedRanges.back().HighPC,
4399                                             Range.HighPC);
4400     } else {
4401       MergedRanges.emplace_back(Range.LowPC, Range.HighPC);
4402     }
4403     PrevEndAddress = MergedRanges.back().HighPC;
4404   }
4405 
4406   return MergedRanges;
4407 }
4408 
4409 MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) {
4410   if (CurrentState == State::Disassembled) {
4411     auto II = Instructions.find(Offset);
4412     return (II == Instructions.end()) ? nullptr : &II->second;
4413   } else if (CurrentState == State::CFG) {
4414     BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
4415     if (!BB)
4416       return nullptr;
4417 
4418     for (MCInst &Inst : *BB) {
4419       constexpr uint32_t InvalidOffset = std::numeric_limits<uint32_t>::max();
4420       if (Offset == BC.MIB->getAnnotationWithDefault<uint32_t>(Inst, "Offset",
4421                                                                InvalidOffset))
4422         return &Inst;
4423     }
4424 
4425     return nullptr;
4426   } else {
4427     llvm_unreachable("invalid CFG state to use getInstructionAtOffset()");
4428   }
4429 }
4430 
4431 DebugLocationsVector BinaryFunction::translateInputToOutputLocationList(
4432     const DebugLocationsVector &InputLL) const {
4433   DebugLocationsVector OutputLL;
4434 
4435   if (isFolded()) {
4436     return OutputLL;
4437   }
4438 
4439   // If the function hasn't changed - there's nothing to update.
4440   if (!isEmitted()) {
4441     return InputLL;
4442   }
4443 
4444   uint64_t PrevEndAddress = 0;
4445   SmallVectorImpl<uint8_t> *PrevExpr = nullptr;
4446   for (const DebugLocationEntry &Entry : InputLL) {
4447     const uint64_t Start = Entry.LowPC;
4448     const uint64_t End = Entry.HighPC;
4449     if (!containsAddress(Start)) {
4450       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invalid debug address range detected "
4451                             "for "
4452                         << *this << " : [0x" << Twine::utohexstr(Start)
4453                         << ", 0x" << Twine::utohexstr(End) << "]\n");
4454       continue;
4455     }
4456     uint64_t InputOffset = Start - getAddress();
4457     const uint64_t InputEndOffset = std::min(End - getAddress(), getSize());
4458     auto BBI = std::upper_bound(BasicBlockOffsets.begin(),
4459                                 BasicBlockOffsets.end(),
4460                                 BasicBlockOffset(InputOffset, nullptr),
4461                                 CompareBasicBlockOffsets());
4462     --BBI;
4463     do {
4464       const BinaryBasicBlock *BB = BBI->second;
4465       if (InputOffset < BB->getOffset() || InputOffset >= BB->getEndOffset()) {
4466         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invalid debug address range detected "
4467                              "for "
4468                           << *this << " : [0x" << Twine::utohexstr(Start)
4469                           << ", 0x" << Twine::utohexstr(End) << "]\n");
4470         PrevEndAddress = 0;
4471         break;
4472       }
4473 
4474       // Skip the range if the block was deleted.
4475       if (const uint64_t OutputStart = BB->getOutputAddressRange().first) {
4476         const uint64_t StartAddress =
4477             OutputStart + InputOffset - BB->getOffset();
4478         uint64_t EndAddress = BB->getOutputAddressRange().second;
4479         if (InputEndOffset < BB->getEndOffset())
4480           EndAddress = StartAddress + InputEndOffset - InputOffset;
4481 
4482         if (StartAddress == PrevEndAddress && Entry.Expr == *PrevExpr) {
4483           OutputLL.back().HighPC = std::max(OutputLL.back().HighPC, EndAddress);
4484         } else {
4485           OutputLL.emplace_back(
4486               DebugLocationEntry{StartAddress,
4487                                  std::max(StartAddress, EndAddress),
4488                                  Entry.Expr});
4489         }
4490         PrevEndAddress = OutputLL.back().HighPC;
4491         PrevExpr = &OutputLL.back().Expr;
4492       }
4493 
4494       ++BBI;
4495       InputOffset = BB->getEndOffset();
4496     } while (InputOffset < InputEndOffset);
4497   }
4498 
4499   // Sort and merge adjacent entries with identical location.
4500   std::stable_sort(OutputLL.begin(), OutputLL.end(),
4501       [] (const DebugLocationEntry &A, const DebugLocationEntry &B) {
4502         return A.LowPC < B.LowPC;
4503       });
4504   DebugLocationsVector MergedLL;
4505   PrevEndAddress = 0;
4506   PrevExpr = nullptr;
4507   for (const DebugLocationEntry &Entry : OutputLL) {
4508     if (Entry.LowPC <= PrevEndAddress && *PrevExpr == Entry.Expr) {
4509       MergedLL.back().HighPC = std::max(Entry.HighPC, MergedLL.back().HighPC);
4510     } else {
4511       const uint64_t Begin = std::max(Entry.LowPC, PrevEndAddress);
4512       const uint64_t End = std::max(Begin, Entry.HighPC);
4513       MergedLL.emplace_back(DebugLocationEntry{Begin, End, Entry.Expr});
4514     }
4515     PrevEndAddress = MergedLL.back().HighPC;
4516     PrevExpr = &MergedLL.back().Expr;
4517   }
4518 
4519   return MergedLL;
4520 }
4521 
4522 void BinaryFunction::printLoopInfo(raw_ostream &OS) const {
4523   OS << "Loop Info for Function \"" << *this << "\"";
4524   if (hasValidProfile()) {
4525     OS << " (count: " << getExecutionCount() << ")";
4526   }
4527   OS << "\n";
4528 
4529   std::stack<BinaryLoop *> St;
4530   for (auto I = BLI->begin(), E = BLI->end(); I != E; ++I) {
4531     St.push(*I);
4532   }
4533   while (!St.empty()) {
4534     BinaryLoop *L = St.top();
4535     St.pop();
4536 
4537     for (BinaryLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
4538       St.push(*I);
4539     }
4540 
4541     if (!hasValidProfile())
4542       continue;
4543 
4544     OS << (L->getLoopDepth() > 1 ? "Nested" : "Outer") << " loop header: "
4545        << L->getHeader()->getName();
4546     OS << "\n";
4547     OS << "Loop basic blocks: ";
4548     const char *Sep = "";
4549     for (auto BI = L->block_begin(), BE = L->block_end(); BI != BE; ++BI) {
4550       OS << Sep << (*BI)->getName();
4551       Sep = ", ";
4552     }
4553     OS << "\n";
4554     if (hasValidProfile()) {
4555       OS << "Total back edge count: " << L->TotalBackEdgeCount << "\n";
4556       OS << "Loop entry count: " << L->EntryCount << "\n";
4557       OS << "Loop exit count: " << L->ExitCount << "\n";
4558       if (L->EntryCount > 0) {
4559         OS << "Average iters per entry: "
4560            << format("%.4lf", (double)L->TotalBackEdgeCount / L->EntryCount)
4561            << "\n";
4562       }
4563     }
4564     OS << "----\n";
4565   }
4566 
4567   OS << "Total number of loops: "<< BLI->TotalLoops << "\n";
4568   OS << "Number of outer loops: " << BLI->OuterLoops << "\n";
4569   OS << "Maximum nested loop depth: " << BLI->MaximumDepth << "\n\n";
4570 }
4571 
4572 bool BinaryFunction::isAArch64Veneer() const {
4573   if (BasicBlocks.size() != 1)
4574     return false;
4575 
4576   BinaryBasicBlock &BB = **BasicBlocks.begin();
4577   if (BB.size() != 3)
4578     return false;
4579 
4580   for (MCInst &Inst : BB) {
4581     if (!BC.MIB->hasAnnotation(Inst, "AArch64Veneer"))
4582       return false;
4583   }
4584 
4585   return true;
4586 }
4587 
4588 } // namespace bolt
4589 } // namespace llvm
4590