xref: /llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp (revision 141574bacb2c10b795490d0fa5ea31acbc5d8c6e)
1 //===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Replaces repeated sequences of instructions with function calls.
11 ///
12 /// This works by placing every instruction from every basic block in a
13 /// suffix tree, and repeatedly querying that tree for repeated sequences of
14 /// instructions. If a sequence of instructions appears often, then it ought
15 /// to be beneficial to pull out into a function.
16 ///
17 /// The MachineOutliner communicates with a given target using hooks defined in
18 /// TargetInstrInfo.h. The target supplies the outliner with information on how
19 /// a specific sequence of instructions should be outlined. This information
20 /// is used to deduce the number of instructions necessary to
21 ///
22 /// * Create an outlined function
23 /// * Call that outlined function
24 ///
25 /// Targets must implement
26 ///   * getOutliningCandidateInfo
27 ///   * buildOutlinedFrame
28 ///   * insertOutlinedCall
29 ///   * isFunctionSafeToOutlineFrom
30 ///
31 /// in order to make use of the MachineOutliner.
32 ///
33 /// This was originally presented at the 2016 LLVM Developers' Meeting in the
34 /// talk "Reducing Code Size Using Outlining". For a high-level overview of
35 /// how this pass works, the talk is available on YouTube at
36 ///
37 /// https://www.youtube.com/watch?v=yorld-WSOeU
38 ///
39 /// The slides for the talk are available at
40 ///
41 /// http://www.llvm.org/devmtg/2016-11/Slides/Paquette-Outliner.pdf
42 ///
43 /// The talk provides an overview of how the outliner finds candidates and
44 /// ultimately outlines them. It describes how the main data structure for this
45 /// pass, the suffix tree, is queried and purged for candidates. It also gives
46 /// a simplified suffix tree construction algorithm for suffix trees based off
47 /// of the algorithm actually used here, Ukkonen's algorithm.
48 ///
49 /// For the original RFC for this pass, please see
50 ///
51 /// http://lists.llvm.org/pipermail/llvm-dev/2016-August/104170.html
52 ///
53 /// For more information on the suffix tree data structure, please see
54 /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
55 ///
56 //===----------------------------------------------------------------------===//
57 #include "llvm/CodeGen/MachineOutliner.h"
58 #include "llvm/ADT/DenseMap.h"
59 #include "llvm/ADT/SmallSet.h"
60 #include "llvm/ADT/Statistic.h"
61 #include "llvm/ADT/Twine.h"
62 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
63 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
64 #include "llvm/CGData/CodeGenDataReader.h"
65 #include "llvm/CodeGen/LivePhysRegs.h"
66 #include "llvm/CodeGen/MachineModuleInfo.h"
67 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
68 #include "llvm/CodeGen/Passes.h"
69 #include "llvm/CodeGen/TargetInstrInfo.h"
70 #include "llvm/CodeGen/TargetSubtargetInfo.h"
71 #include "llvm/IR/DIBuilder.h"
72 #include "llvm/IR/IRBuilder.h"
73 #include "llvm/IR/Mangler.h"
74 #include "llvm/IR/Module.h"
75 #include "llvm/InitializePasses.h"
76 #include "llvm/Support/CommandLine.h"
77 #include "llvm/Support/Debug.h"
78 #include "llvm/Support/SuffixTree.h"
79 #include "llvm/Support/raw_ostream.h"
80 #include "llvm/Transforms/Utils/ModuleUtils.h"
81 #include <functional>
82 #include <tuple>
83 #include <vector>
84 
85 #define DEBUG_TYPE "machine-outliner"
86 
87 using namespace llvm;
88 using namespace ore;
89 using namespace outliner;
90 
91 // Statistics for outlined functions.
92 STATISTIC(NumOutlined, "Number of candidates outlined");
93 STATISTIC(FunctionsCreated, "Number of functions created");
94 
95 // Statistics for instruction mapping.
96 STATISTIC(NumLegalInUnsignedVec, "Outlinable instructions mapped");
97 STATISTIC(NumIllegalInUnsignedVec,
98           "Unoutlinable instructions mapped + number of sentinel values");
99 STATISTIC(NumSentinels, "Sentinel values inserted during mapping");
100 STATISTIC(NumInvisible,
101           "Invisible instructions skipped during mapping");
102 STATISTIC(UnsignedVecSize,
103           "Total number of instructions mapped and saved to mapping vector");
104 STATISTIC(StableHashAttempts,
105           "Count of hashing attempts made for outlined functions");
106 STATISTIC(StableHashDropped,
107           "Count of unsuccessful hashing attempts for outlined functions");
108 
109 // Set to true if the user wants the outliner to run on linkonceodr linkage
110 // functions. This is false by default because the linker can dedupe linkonceodr
111 // functions. Since the outliner is confined to a single module (modulo LTO),
112 // this is off by default. It should, however, be the default behaviour in
113 // LTO.
114 static cl::opt<bool> EnableLinkOnceODROutlining(
115     "enable-linkonceodr-outlining", cl::Hidden,
116     cl::desc("Enable the machine outliner on linkonceodr functions"),
117     cl::init(false));
118 
119 /// Number of times to re-run the outliner. This is not the total number of runs
120 /// as the outliner will run at least one time. The default value is set to 0,
121 /// meaning the outliner will run one time and rerun zero times after that.
122 static cl::opt<unsigned> OutlinerReruns(
123     "machine-outliner-reruns", cl::init(0), cl::Hidden,
124     cl::desc(
125         "Number of times to rerun the outliner after the initial outline"));
126 
127 static cl::opt<unsigned> OutlinerBenefitThreshold(
128     "outliner-benefit-threshold", cl::init(1), cl::Hidden,
129     cl::desc(
130         "The minimum size in bytes before an outlining candidate is accepted"));
131 
132 static cl::opt<bool> OutlinerLeafDescendants(
133     "outliner-leaf-descendants", cl::init(true), cl::Hidden,
134     cl::desc("Consider all leaf descendants of internal nodes of the suffix "
135              "tree as candidates for outlining (if false, only leaf children "
136              "are considered)"));
137 
138 static cl::opt<bool>
139     DisableGlobalOutlining("disable-global-outlining", cl::Hidden,
140                            cl::desc("Disable global outlining only by ignoring "
141                                     "the codegen data generation or use"),
142                            cl::init(false));
143 
144 static cl::opt<bool> AppendContentHashToOutlinedName(
145     "append-content-hash-outlined-name", cl::Hidden,
146     cl::desc("This appends the content hash to the globally outlined function "
147              "name. It's beneficial for enhancing the precision of the stable "
148              "hash and for ordering the outlined functions."),
149     cl::init(true));
150 
151 namespace {
152 
153 /// Maps \p MachineInstrs to unsigned integers and stores the mappings.
154 struct InstructionMapper {
155   const MachineModuleInfo &MMI;
156 
157   /// The next available integer to assign to a \p MachineInstr that
158   /// cannot be outlined.
159   ///
160   /// Set to -3 for compatability with \p DenseMapInfo<unsigned>.
161   unsigned IllegalInstrNumber = -3;
162 
163   /// The next available integer to assign to a \p MachineInstr that can
164   /// be outlined.
165   unsigned LegalInstrNumber = 0;
166 
167   /// Correspondence from \p MachineInstrs to unsigned integers.
168   DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>
169       InstructionIntegerMap;
170 
171   /// Correspondence between \p MachineBasicBlocks and target-defined flags.
172   DenseMap<MachineBasicBlock *, unsigned> MBBFlagsMap;
173 
174   /// The vector of unsigned integers that the module is mapped to.
175   SmallVector<unsigned> UnsignedVec;
176 
177   /// Stores the location of the instruction associated with the integer
178   /// at index i in \p UnsignedVec for each index i.
179   SmallVector<MachineBasicBlock::iterator> InstrList;
180 
181   // Set if we added an illegal number in the previous step.
182   // Since each illegal number is unique, we only need one of them between
183   // each range of legal numbers. This lets us make sure we don't add more
184   // than one illegal number per range.
185   bool AddedIllegalLastTime = false;
186 
187   /// Maps \p *It to a legal integer.
188   ///
189   /// Updates \p CanOutlineWithPrevInstr, \p HaveLegalRange, \p InstrListForMBB,
190   /// \p UnsignedVecForMBB, \p InstructionIntegerMap, and \p LegalInstrNumber.
191   ///
192   /// \returns The integer that \p *It was mapped to.
193   unsigned mapToLegalUnsigned(
194       MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
195       bool &HaveLegalRange, unsigned &NumLegalInBlock,
196       SmallVector<unsigned> &UnsignedVecForMBB,
197       SmallVector<MachineBasicBlock::iterator> &InstrListForMBB) {
198     // We added something legal, so we should unset the AddedLegalLastTime
199     // flag.
200     AddedIllegalLastTime = false;
201 
202     // If we have at least two adjacent legal instructions (which may have
203     // invisible instructions in between), remember that.
204     if (CanOutlineWithPrevInstr)
205       HaveLegalRange = true;
206     CanOutlineWithPrevInstr = true;
207 
208     // Keep track of the number of legal instructions we insert.
209     NumLegalInBlock++;
210 
211     // Get the integer for this instruction or give it the current
212     // LegalInstrNumber.
213     InstrListForMBB.push_back(It);
214     MachineInstr &MI = *It;
215     bool WasInserted;
216     DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator
217         ResultIt;
218     std::tie(ResultIt, WasInserted) =
219         InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber));
220     unsigned MINumber = ResultIt->second;
221 
222     // There was an insertion.
223     if (WasInserted)
224       LegalInstrNumber++;
225 
226     UnsignedVecForMBB.push_back(MINumber);
227 
228     // Make sure we don't overflow or use any integers reserved by the DenseMap.
229     if (LegalInstrNumber >= IllegalInstrNumber)
230       report_fatal_error("Instruction mapping overflow!");
231 
232     assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
233            "Tried to assign DenseMap tombstone or empty key to instruction.");
234     assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
235            "Tried to assign DenseMap tombstone or empty key to instruction.");
236 
237     // Statistics.
238     ++NumLegalInUnsignedVec;
239     return MINumber;
240   }
241 
242   /// Maps \p *It to an illegal integer.
243   ///
244   /// Updates \p InstrListForMBB, \p UnsignedVecForMBB, and \p
245   /// IllegalInstrNumber.
246   ///
247   /// \returns The integer that \p *It was mapped to.
248   unsigned mapToIllegalUnsigned(
249       MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
250       SmallVector<unsigned> &UnsignedVecForMBB,
251       SmallVector<MachineBasicBlock::iterator> &InstrListForMBB) {
252     // Can't outline an illegal instruction. Set the flag.
253     CanOutlineWithPrevInstr = false;
254 
255     // Only add one illegal number per range of legal numbers.
256     if (AddedIllegalLastTime)
257       return IllegalInstrNumber;
258 
259     // Remember that we added an illegal number last time.
260     AddedIllegalLastTime = true;
261     unsigned MINumber = IllegalInstrNumber;
262 
263     InstrListForMBB.push_back(It);
264     UnsignedVecForMBB.push_back(IllegalInstrNumber);
265     IllegalInstrNumber--;
266     // Statistics.
267     ++NumIllegalInUnsignedVec;
268 
269     assert(LegalInstrNumber < IllegalInstrNumber &&
270            "Instruction mapping overflow!");
271 
272     assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
273            "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
274 
275     assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
276            "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
277 
278     return MINumber;
279   }
280 
281   /// Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds
282   /// and appends it to \p UnsignedVec and \p InstrList.
283   ///
284   /// Two instructions are assigned the same integer if they are identical.
285   /// If an instruction is deemed unsafe to outline, then it will be assigned an
286   /// unique integer. The resulting mapping is placed into a suffix tree and
287   /// queried for candidates.
288   ///
289   /// \param MBB The \p MachineBasicBlock to be translated into integers.
290   /// \param TII \p TargetInstrInfo for the function.
291   void convertToUnsignedVec(MachineBasicBlock &MBB,
292                             const TargetInstrInfo &TII) {
293     LLVM_DEBUG(dbgs() << "*** Converting MBB '" << MBB.getName()
294                       << "' to unsigned vector ***\n");
295     unsigned Flags = 0;
296 
297     // Don't even map in this case.
298     if (!TII.isMBBSafeToOutlineFrom(MBB, Flags))
299       return;
300 
301     auto OutlinableRanges = TII.getOutlinableRanges(MBB, Flags);
302     LLVM_DEBUG(dbgs() << MBB.getName() << ": " << OutlinableRanges.size()
303                       << " outlinable range(s)\n");
304     if (OutlinableRanges.empty())
305       return;
306 
307     // Store info for the MBB for later outlining.
308     MBBFlagsMap[&MBB] = Flags;
309 
310     MachineBasicBlock::iterator It = MBB.begin();
311 
312     // The number of instructions in this block that will be considered for
313     // outlining.
314     unsigned NumLegalInBlock = 0;
315 
316     // True if we have at least two legal instructions which aren't separated
317     // by an illegal instruction.
318     bool HaveLegalRange = false;
319 
320     // True if we can perform outlining given the last mapped (non-invisible)
321     // instruction. This lets us know if we have a legal range.
322     bool CanOutlineWithPrevInstr = false;
323 
324     // FIXME: Should this all just be handled in the target, rather than using
325     // repeated calls to getOutliningType?
326     SmallVector<unsigned> UnsignedVecForMBB;
327     SmallVector<MachineBasicBlock::iterator> InstrListForMBB;
328 
329     LLVM_DEBUG(dbgs() << "*** Mapping outlinable ranges ***\n");
330     for (auto &OutlinableRange : OutlinableRanges) {
331       auto OutlinableRangeBegin = OutlinableRange.first;
332       auto OutlinableRangeEnd = OutlinableRange.second;
333 #ifndef NDEBUG
334       LLVM_DEBUG(
335           dbgs() << "Mapping "
336                  << std::distance(OutlinableRangeBegin, OutlinableRangeEnd)
337                  << " instruction range\n");
338       // Everything outside of an outlinable range is illegal.
339       unsigned NumSkippedInRange = 0;
340 #endif
341       for (; It != OutlinableRangeBegin; ++It) {
342 #ifndef NDEBUG
343         ++NumSkippedInRange;
344 #endif
345         mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
346                              InstrListForMBB);
347       }
348 #ifndef NDEBUG
349       LLVM_DEBUG(dbgs() << "Skipped " << NumSkippedInRange
350                         << " instructions outside outlinable range\n");
351 #endif
352       assert(It != MBB.end() && "Should still have instructions?");
353       // `It` is now positioned at the beginning of a range of instructions
354       // which may be outlinable. Check if each instruction is known to be safe.
355       for (; It != OutlinableRangeEnd; ++It) {
356         // Keep track of where this instruction is in the module.
357         switch (TII.getOutliningType(MMI, It, Flags)) {
358         case InstrType::Illegal:
359           mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
360                                InstrListForMBB);
361           break;
362 
363         case InstrType::Legal:
364           mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
365                              NumLegalInBlock, UnsignedVecForMBB,
366                              InstrListForMBB);
367           break;
368 
369         case InstrType::LegalTerminator:
370           mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
371                              NumLegalInBlock, UnsignedVecForMBB,
372                              InstrListForMBB);
373           // The instruction also acts as a terminator, so we have to record
374           // that in the string.
375           mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
376                                InstrListForMBB);
377           break;
378 
379         case InstrType::Invisible:
380           // Normally this is set by mapTo(Blah)Unsigned, but we just want to
381           // skip this instruction. So, unset the flag here.
382           ++NumInvisible;
383           AddedIllegalLastTime = false;
384           break;
385         }
386       }
387     }
388 
389     LLVM_DEBUG(dbgs() << "HaveLegalRange = " << HaveLegalRange << "\n");
390 
391     // Are there enough legal instructions in the block for outlining to be
392     // possible?
393     if (HaveLegalRange) {
394       // After we're done every insertion, uniquely terminate this part of the
395       // "string". This makes sure we won't match across basic block or function
396       // boundaries since the "end" is encoded uniquely and thus appears in no
397       // repeated substring.
398       mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
399                            InstrListForMBB);
400       ++NumSentinels;
401       append_range(InstrList, InstrListForMBB);
402       append_range(UnsignedVec, UnsignedVecForMBB);
403     }
404   }
405 
406   InstructionMapper(const MachineModuleInfo &MMI_) : MMI(MMI_) {
407     // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
408     // changed.
409     assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
410            "DenseMapInfo<unsigned>'s empty key isn't -1!");
411     assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 &&
412            "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
413   }
414 };
415 
416 /// An interprocedural pass which finds repeated sequences of
417 /// instructions and replaces them with calls to functions.
418 ///
419 /// Each instruction is mapped to an unsigned integer and placed in a string.
420 /// The resulting mapping is then placed in a \p SuffixTree. The \p SuffixTree
421 /// is then repeatedly queried for repeated sequences of instructions. Each
422 /// non-overlapping repeated sequence is then placed in its own
423 /// \p MachineFunction and each instance is then replaced with a call to that
424 /// function.
425 struct MachineOutliner : public ModulePass {
426 
427   static char ID;
428 
429   MachineModuleInfo *MMI = nullptr;
430 
431   /// Set to true if the outliner should consider functions with
432   /// linkonceodr linkage.
433   bool OutlineFromLinkOnceODRs = false;
434 
435   /// The current repeat number of machine outlining.
436   unsigned OutlineRepeatedNum = 0;
437 
438   /// Set to true if the outliner should run on all functions in the module
439   /// considered safe for outlining.
440   /// Set to true by default for compatibility with llc's -run-pass option.
441   /// Set when the pass is constructed in TargetPassConfig.
442   bool RunOnAllFunctions = true;
443 
444   /// This is a compact representation of hash sequences of outlined functions.
445   /// It is used when OutlinerMode = CGDataMode::Write.
446   /// The resulting hash tree will be emitted into __llvm_outlined section
447   /// which will be dead-stripped not going to the final binary.
448   /// A post-process using llvm-cgdata, lld, or ThinLTO can merge them into
449   /// a global oulined hash tree for the subsequent codegen.
450   std::unique_ptr<OutlinedHashTree> LocalHashTree;
451 
452   /// The mode of the outliner.
453   /// When is's CGDataMode::None, candidates are populated with the suffix tree
454   /// within a module and outlined.
455   /// When it's CGDataMode::Write, in addition to CGDataMode::None, the hash
456   /// sequences of outlined functions are published into LocalHashTree.
457   /// When it's CGDataMode::Read, candidates are populated with the global
458   /// outlined hash tree that has been built by the previous codegen.
459   CGDataMode OutlinerMode = CGDataMode::None;
460 
461   StringRef getPassName() const override { return "Machine Outliner"; }
462 
463   void getAnalysisUsage(AnalysisUsage &AU) const override {
464     AU.addRequired<MachineModuleInfoWrapperPass>();
465     AU.addPreserved<MachineModuleInfoWrapperPass>();
466     AU.addUsedIfAvailable<ImmutableModuleSummaryIndexWrapperPass>();
467     AU.setPreservesAll();
468     ModulePass::getAnalysisUsage(AU);
469   }
470 
471   MachineOutliner() : ModulePass(ID) {
472     initializeMachineOutlinerPass(*PassRegistry::getPassRegistry());
473   }
474 
475   /// Remark output explaining that not outlining a set of candidates would be
476   /// better than outlining that set.
477   void emitNotOutliningCheaperRemark(
478       unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
479       OutlinedFunction &OF);
480 
481   /// Remark output explaining that a function was outlined.
482   void emitOutlinedFunctionRemark(OutlinedFunction &OF);
483 
484   /// Find all repeated substrings that satisfy the outlining cost model by
485   /// constructing a suffix tree.
486   ///
487   /// If a substring appears at least twice, then it must be represented by
488   /// an internal node which appears in at least two suffixes. Each suffix
489   /// is represented by a leaf node. To do this, we visit each internal node
490   /// in the tree, using the leaf children of each internal node. If an
491   /// internal node represents a beneficial substring, then we use each of
492   /// its leaf children to find the locations of its substring.
493   ///
494   /// \param Mapper Contains outlining mapping information.
495   /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
496   /// each type of candidate.
497   void
498   findCandidates(InstructionMapper &Mapper,
499                  std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList);
500 
501   /// Find all repeated substrings that match in the global outlined hash
502   /// tree built from the previous codegen.
503   ///
504   /// \param Mapper Contains outlining mapping information.
505   /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
506   /// each type of candidate.
507   void findGlobalCandidates(
508       InstructionMapper &Mapper,
509       std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList);
510 
511   /// Replace the sequences of instructions represented by \p OutlinedFunctions
512   /// with calls to functions.
513   ///
514   /// \param M The module we are outlining from.
515   /// \param FunctionList A list of functions to be inserted into the module.
516   /// \param Mapper Contains the instruction mappings for the module.
517   /// \param[out] OutlinedFunctionNum The outlined function number.
518   bool outline(Module &M,
519                std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList,
520                InstructionMapper &Mapper, unsigned &OutlinedFunctionNum);
521 
522   /// Creates a function for \p OF and inserts it into the module.
523   MachineFunction *createOutlinedFunction(Module &M, OutlinedFunction &OF,
524                                           InstructionMapper &Mapper,
525                                           unsigned Name);
526 
527   /// Compute and publish the stable hash sequence of instructions in the
528   /// outlined function, \p MF. The parameter \p CandSize represents the number
529   /// of candidates that have identical instruction sequences to \p MF.
530   void computeAndPublishHashSequence(MachineFunction &MF, unsigned CandSize);
531 
532   /// Initialize the outliner mode.
533   void initializeOutlinerMode(const Module &M);
534 
535   /// Emit the outlined hash tree into __llvm_outline section.
536   void emitOutlinedHashTree(Module &M);
537 
538   /// Calls 'doOutline()' 1 + OutlinerReruns times.
539   bool runOnModule(Module &M) override;
540 
541   /// Construct a suffix tree on the instructions in \p M and outline repeated
542   /// strings from that tree.
543   bool doOutline(Module &M, unsigned &OutlinedFunctionNum);
544 
545   /// Return a DISubprogram for OF if one exists, and null otherwise. Helper
546   /// function for remark emission.
547   DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
548     for (const Candidate &C : OF.Candidates)
549       if (MachineFunction *MF = C.getMF())
550         if (DISubprogram *SP = MF->getFunction().getSubprogram())
551           return SP;
552     return nullptr;
553   }
554 
555   /// Populate and \p InstructionMapper with instruction-to-integer mappings.
556   /// These are used to construct a suffix tree.
557   void populateMapper(InstructionMapper &Mapper, Module &M);
558 
559   /// Initialize information necessary to output a size remark.
560   /// FIXME: This should be handled by the pass manager, not the outliner.
561   /// FIXME: This is nearly identical to the initSizeRemarkInfo in the legacy
562   /// pass manager.
563   void initSizeRemarkInfo(const Module &M,
564                           StringMap<unsigned> &FunctionToInstrCount);
565 
566   /// Emit the remark.
567   // FIXME: This should be handled by the pass manager, not the outliner.
568   void
569   emitInstrCountChangedRemark(const Module &M,
570                               const StringMap<unsigned> &FunctionToInstrCount);
571 };
572 } // Anonymous namespace.
573 
574 char MachineOutliner::ID = 0;
575 
576 namespace llvm {
577 ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions) {
578   MachineOutliner *OL = new MachineOutliner();
579   OL->RunOnAllFunctions = RunOnAllFunctions;
580   return OL;
581 }
582 
583 } // namespace llvm
584 
585 INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
586                 false)
587 
588 void MachineOutliner::emitNotOutliningCheaperRemark(
589     unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
590     OutlinedFunction &OF) {
591   // FIXME: Right now, we arbitrarily choose some Candidate from the
592   // OutlinedFunction. This isn't necessarily fixed, nor does it have to be.
593   // We should probably sort these by function name or something to make sure
594   // the remarks are stable.
595   Candidate &C = CandidatesForRepeatedSeq.front();
596   MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr);
597   MORE.emit([&]() {
598     MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
599                                       C.front().getDebugLoc(), C.getMBB());
600     R << "Did not outline " << NV("Length", StringLen) << " instructions"
601       << " from " << NV("NumOccurrences", CandidatesForRepeatedSeq.size())
602       << " locations."
603       << " Bytes from outlining all occurrences ("
604       << NV("OutliningCost", OF.getOutliningCost()) << ")"
605       << " >= Unoutlined instruction bytes ("
606       << NV("NotOutliningCost", OF.getNotOutlinedCost()) << ")"
607       << " (Also found at: ";
608 
609     // Tell the user the other places the candidate was found.
610     for (unsigned i = 1, e = CandidatesForRepeatedSeq.size(); i < e; i++) {
611       R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
612               CandidatesForRepeatedSeq[i].front().getDebugLoc());
613       if (i != e - 1)
614         R << ", ";
615     }
616 
617     R << ")";
618     return R;
619   });
620 }
621 
622 void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
623   MachineBasicBlock *MBB = &*OF.MF->begin();
624   MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr);
625   MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
626                               MBB->findDebugLoc(MBB->begin()), MBB);
627   R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) << " bytes by "
628     << "outlining " << NV("Length", OF.getNumInstrs()) << " instructions "
629     << "from " << NV("NumOccurrences", OF.getOccurrenceCount())
630     << " locations. "
631     << "(Found at: ";
632 
633   // Tell the user the other places the candidate was found.
634   for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) {
635 
636     R << NV((Twine("StartLoc") + Twine(i)).str(),
637             OF.Candidates[i].front().getDebugLoc());
638     if (i != e - 1)
639       R << ", ";
640   }
641 
642   R << ")";
643 
644   MORE.emit(R);
645 }
646 
647 struct MatchedEntry {
648   unsigned StartIdx;
649   unsigned EndIdx;
650   unsigned Count;
651   MatchedEntry(unsigned StartIdx, unsigned EndIdx, unsigned Count)
652       : StartIdx(StartIdx), EndIdx(EndIdx), Count(Count) {}
653   MatchedEntry() = delete;
654 };
655 
656 // Find all matches in the global outlined hash tree.
657 // It's quadratic complexity in theory, but it's nearly linear in practice
658 // since the length of outlined sequences are small within a block.
659 static SmallVector<MatchedEntry> getMatchedEntries(InstructionMapper &Mapper) {
660   auto &InstrList = Mapper.InstrList;
661   auto &UnsignedVec = Mapper.UnsignedVec;
662 
663   SmallVector<MatchedEntry> MatchedEntries;
664   auto Size = UnsignedVec.size();
665 
666   // Get the global outlined hash tree built from the previous run.
667   assert(cgdata::hasOutlinedHashTree());
668   const auto *RootNode = cgdata::getOutlinedHashTree()->getRoot();
669 
670   auto getValidInstr = [&](unsigned Index) -> const MachineInstr * {
671     if (UnsignedVec[Index] >= Mapper.LegalInstrNumber)
672       return nullptr;
673     return &(*InstrList[Index]);
674   };
675 
676   auto getStableHashAndFollow =
677       [](const MachineInstr &MI, const HashNode *CurrNode) -> const HashNode * {
678     stable_hash StableHash = stableHashValue(MI);
679     if (!StableHash)
680       return nullptr;
681     auto It = CurrNode->Successors.find(StableHash);
682     return (It == CurrNode->Successors.end()) ? nullptr : It->second.get();
683   };
684 
685   for (unsigned I = 0; I < Size; ++I) {
686     const MachineInstr *MI = getValidInstr(I);
687     if (!MI || MI->isDebugInstr())
688       continue;
689     const HashNode *CurrNode = getStableHashAndFollow(*MI, RootNode);
690     if (!CurrNode)
691       continue;
692 
693     for (unsigned J = I + 1; J < Size; ++J) {
694       const MachineInstr *MJ = getValidInstr(J);
695       if (!MJ)
696         break;
697       // Skip debug instructions as we did for the outlined function.
698       if (MJ->isDebugInstr())
699         continue;
700       CurrNode = getStableHashAndFollow(*MJ, CurrNode);
701       if (!CurrNode)
702         break;
703       // Even with a match ending with a terminal, we continue finding
704       // matches to populate all candidates.
705       if (auto Count = CurrNode->Terminals)
706         MatchedEntries.emplace_back(I, J, *Count);
707     }
708   }
709 
710   return MatchedEntries;
711 }
712 
713 void MachineOutliner::findGlobalCandidates(
714     InstructionMapper &Mapper,
715     std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList) {
716   FunctionList.clear();
717   auto &InstrList = Mapper.InstrList;
718   auto &MBBFlagsMap = Mapper.MBBFlagsMap;
719 
720   std::vector<Candidate> CandidatesForRepeatedSeq;
721   for (auto &ME : getMatchedEntries(Mapper)) {
722     CandidatesForRepeatedSeq.clear();
723     MachineBasicBlock::iterator StartIt = InstrList[ME.StartIdx];
724     MachineBasicBlock::iterator EndIt = InstrList[ME.EndIdx];
725     auto Length = ME.EndIdx - ME.StartIdx + 1;
726     MachineBasicBlock *MBB = StartIt->getParent();
727     CandidatesForRepeatedSeq.emplace_back(ME.StartIdx, Length, StartIt, EndIt,
728                                           MBB, FunctionList.size(),
729                                           MBBFlagsMap[MBB]);
730     const TargetInstrInfo *TII =
731         MBB->getParent()->getSubtarget().getInstrInfo();
732     unsigned MinRepeats = 1;
733     std::optional<std::unique_ptr<OutlinedFunction>> OF =
734         TII->getOutliningCandidateInfo(*MMI, CandidatesForRepeatedSeq,
735                                        MinRepeats);
736     if (!OF.has_value() || OF.value()->Candidates.empty())
737       continue;
738     // We create a global candidate for each match.
739     assert(OF.value()->Candidates.size() == MinRepeats);
740     FunctionList.emplace_back(std::make_unique<GlobalOutlinedFunction>(
741         std::move(OF.value()), ME.Count));
742   }
743 }
744 
745 void MachineOutliner::findCandidates(
746     InstructionMapper &Mapper,
747     std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList) {
748   FunctionList.clear();
749   SuffixTree ST(Mapper.UnsignedVec, OutlinerLeafDescendants);
750 
751   // First, find all of the repeated substrings in the tree of minimum length
752   // 2.
753   std::vector<Candidate> CandidatesForRepeatedSeq;
754   LLVM_DEBUG(dbgs() << "*** Discarding overlapping candidates *** \n");
755   LLVM_DEBUG(
756       dbgs() << "Searching for overlaps in all repeated sequences...\n");
757   for (SuffixTree::RepeatedSubstring &RS : ST) {
758     CandidatesForRepeatedSeq.clear();
759     unsigned StringLen = RS.Length;
760     LLVM_DEBUG(dbgs() << "  Sequence length: " << StringLen << "\n");
761     // Debug code to keep track of how many candidates we removed.
762 #ifndef NDEBUG
763     unsigned NumDiscarded = 0;
764     unsigned NumKept = 0;
765 #endif
766     // Sort the start indices so that we can efficiently check if candidates
767     // overlap with the ones we've already found for this sequence.
768     llvm::sort(RS.StartIndices);
769     for (const unsigned &StartIdx : RS.StartIndices) {
770       // Trick: Discard some candidates that would be incompatible with the
771       // ones we've already found for this sequence. This will save us some
772       // work in candidate selection.
773       //
774       // If two candidates overlap, then we can't outline them both. This
775       // happens when we have candidates that look like, say
776       //
777       // AA (where each "A" is an instruction).
778       //
779       // We might have some portion of the module that looks like this:
780       // AAAAAA (6 A's)
781       //
782       // In this case, there are 5 different copies of "AA" in this range, but
783       // at most 3 can be outlined. If only outlining 3 of these is going to
784       // be unbeneficial, then we ought to not bother.
785       //
786       // Note that two things DON'T overlap when they look like this:
787       // start1...end1 .... start2...end2
788       // That is, one must either
789       // * End before the other starts
790       // * Start after the other ends
791       unsigned EndIdx = StartIdx + StringLen - 1;
792       if (!CandidatesForRepeatedSeq.empty() &&
793           StartIdx <= CandidatesForRepeatedSeq.back().getEndIdx()) {
794 #ifndef NDEBUG
795         ++NumDiscarded;
796         LLVM_DEBUG(dbgs() << "    .. DISCARD candidate @ [" << StartIdx << ", "
797                           << EndIdx << "]; overlaps with candidate @ ["
798                           << CandidatesForRepeatedSeq.back().getStartIdx()
799                           << ", " << CandidatesForRepeatedSeq.back().getEndIdx()
800                           << "]\n");
801 #endif
802         continue;
803       }
804       // It doesn't overlap with anything, so we can outline it.
805       // Each sequence is over [StartIt, EndIt].
806       // Save the candidate and its location.
807 #ifndef NDEBUG
808       ++NumKept;
809 #endif
810       MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
811       MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
812       MachineBasicBlock *MBB = StartIt->getParent();
813       CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt, EndIt,
814                                             MBB, FunctionList.size(),
815                                             Mapper.MBBFlagsMap[MBB]);
816     }
817 #ifndef NDEBUG
818     LLVM_DEBUG(dbgs() << "    Candidates discarded: " << NumDiscarded
819                       << "\n");
820     LLVM_DEBUG(dbgs() << "    Candidates kept: " << NumKept << "\n\n");
821 #endif
822     unsigned MinRepeats = 2;
823 
824     // We've found something we might want to outline.
825     // Create an OutlinedFunction to store it and check if it'd be beneficial
826     // to outline.
827     if (CandidatesForRepeatedSeq.size() < MinRepeats)
828       continue;
829 
830     // Arbitrarily choose a TII from the first candidate.
831     // FIXME: Should getOutliningCandidateInfo move to TargetMachine?
832     const TargetInstrInfo *TII =
833         CandidatesForRepeatedSeq[0].getMF()->getSubtarget().getInstrInfo();
834 
835     std::optional<std::unique_ptr<OutlinedFunction>> OF =
836         TII->getOutliningCandidateInfo(*MMI, CandidatesForRepeatedSeq,
837                                        MinRepeats);
838 
839     // If we deleted too many candidates, then there's nothing worth outlining.
840     // FIXME: This should take target-specified instruction sizes into account.
841     if (!OF.has_value() || OF.value()->Candidates.size() < MinRepeats)
842       continue;
843 
844     // Is it better to outline this candidate than not?
845     if (OF.value()->getBenefit() < OutlinerBenefitThreshold) {
846       emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq,
847                                     *OF.value());
848       continue;
849     }
850 
851     FunctionList.emplace_back(std::move(OF.value()));
852   }
853 }
854 
855 void MachineOutliner::computeAndPublishHashSequence(MachineFunction &MF,
856                                                     unsigned CandSize) {
857   // Compute the hash sequence for the outlined function.
858   SmallVector<stable_hash> OutlinedHashSequence;
859   for (auto &MBB : MF) {
860     for (auto &NewMI : MBB) {
861       stable_hash Hash = stableHashValue(NewMI);
862       if (!Hash) {
863         OutlinedHashSequence.clear();
864         break;
865       }
866       OutlinedHashSequence.push_back(Hash);
867     }
868   }
869 
870   // Append a unique name based on the non-empty hash sequence.
871   if (AppendContentHashToOutlinedName && !OutlinedHashSequence.empty()) {
872     auto CombinedHash = stable_hash_combine(OutlinedHashSequence);
873     auto NewName =
874         MF.getName().str() + ".content." + std::to_string(CombinedHash);
875     MF.getFunction().setName(NewName);
876   }
877 
878   // Publish the non-empty hash sequence to the local hash tree.
879   if (OutlinerMode == CGDataMode::Write) {
880     StableHashAttempts++;
881     if (!OutlinedHashSequence.empty())
882       LocalHashTree->insert({OutlinedHashSequence, CandSize});
883     else
884       StableHashDropped++;
885   }
886 }
887 
888 MachineFunction *MachineOutliner::createOutlinedFunction(
889     Module &M, OutlinedFunction &OF, InstructionMapper &Mapper, unsigned Name) {
890 
891   // Create the function name. This should be unique.
892   // FIXME: We should have a better naming scheme. This should be stable,
893   // regardless of changes to the outliner's cost model/traversal order.
894   std::string FunctionName = "OUTLINED_FUNCTION_";
895   if (OutlineRepeatedNum > 0)
896     FunctionName += std::to_string(OutlineRepeatedNum + 1) + "_";
897   FunctionName += std::to_string(Name);
898   LLVM_DEBUG(dbgs() << "NEW FUNCTION: " << FunctionName << "\n");
899 
900   // Create the function using an IR-level function.
901   LLVMContext &C = M.getContext();
902   Function *F = Function::Create(FunctionType::get(Type::getVoidTy(C), false),
903                                  Function::ExternalLinkage, FunctionName, M);
904 
905   // NOTE: If this is linkonceodr, then we can take advantage of linker deduping
906   // which gives us better results when we outline from linkonceodr functions.
907   F->setLinkage(GlobalValue::InternalLinkage);
908   F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
909 
910   // Set optsize/minsize, so we don't insert padding between outlined
911   // functions.
912   F->addFnAttr(Attribute::OptimizeForSize);
913   F->addFnAttr(Attribute::MinSize);
914 
915   Candidate &FirstCand = OF.Candidates.front();
916   const TargetInstrInfo &TII =
917       *FirstCand.getMF()->getSubtarget().getInstrInfo();
918 
919   TII.mergeOutliningCandidateAttributes(*F, OF.Candidates);
920 
921   // Set uwtable, so we generate eh_frame.
922   UWTableKind UW = std::accumulate(
923       OF.Candidates.cbegin(), OF.Candidates.cend(), UWTableKind::None,
924       [](UWTableKind K, const outliner::Candidate &C) {
925         return std::max(K, C.getMF()->getFunction().getUWTableKind());
926       });
927   F->setUWTableKind(UW);
928 
929   BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
930   IRBuilder<> Builder(EntryBB);
931   Builder.CreateRetVoid();
932 
933   MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
934   MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
935   MF.setIsOutlined(true);
936   MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
937 
938   // Insert the new function into the module.
939   MF.insert(MF.begin(), &MBB);
940 
941   MachineFunction *OriginalMF = FirstCand.front().getMF();
942   const std::vector<MCCFIInstruction> &Instrs =
943       OriginalMF->getFrameInstructions();
944   for (auto &MI : FirstCand) {
945     if (MI.isDebugInstr())
946       continue;
947 
948     // Don't keep debug information for outlined instructions.
949     auto DL = DebugLoc();
950     if (MI.isCFIInstruction()) {
951       unsigned CFIIndex = MI.getOperand(0).getCFIIndex();
952       MCCFIInstruction CFI = Instrs[CFIIndex];
953       BuildMI(MBB, MBB.end(), DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
954           .addCFIIndex(MF.addFrameInst(CFI));
955     } else {
956       MachineInstr &NewMI = TII.duplicate(MBB, MBB.end(), MI);
957       NewMI.dropMemRefs(MF);
958       NewMI.setDebugLoc(DL);
959     }
960   }
961 
962   if (OutlinerMode != CGDataMode::None)
963     computeAndPublishHashSequence(MF, OF.Candidates.size());
964 
965   // Set normal properties for a late MachineFunction.
966   MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA);
967   MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs);
968   MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
969   MF.getProperties().set(MachineFunctionProperties::Property::TracksLiveness);
970   MF.getRegInfo().freezeReservedRegs();
971 
972   // Compute live-in set for outlined fn
973   const MachineRegisterInfo &MRI = MF.getRegInfo();
974   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
975   LivePhysRegs LiveIns(TRI);
976   for (auto &Cand : OF.Candidates) {
977     // Figure out live-ins at the first instruction.
978     MachineBasicBlock &OutlineBB = *Cand.front().getParent();
979     LivePhysRegs CandLiveIns(TRI);
980     CandLiveIns.addLiveOuts(OutlineBB);
981     for (const MachineInstr &MI :
982          reverse(make_range(Cand.begin(), OutlineBB.end())))
983       CandLiveIns.stepBackward(MI);
984 
985     // The live-in set for the outlined function is the union of the live-ins
986     // from all the outlining points.
987     for (MCPhysReg Reg : CandLiveIns)
988       LiveIns.addReg(Reg);
989   }
990   addLiveIns(MBB, LiveIns);
991 
992   TII.buildOutlinedFrame(MBB, MF, OF);
993 
994   // If there's a DISubprogram associated with this outlined function, then
995   // emit debug info for the outlined function.
996   if (DISubprogram *SP = getSubprogramOrNull(OF)) {
997     // We have a DISubprogram. Get its DICompileUnit.
998     DICompileUnit *CU = SP->getUnit();
999     DIBuilder DB(M, true, CU);
1000     DIFile *Unit = SP->getFile();
1001     Mangler Mg;
1002     // Get the mangled name of the function for the linkage name.
1003     std::string Dummy;
1004     raw_string_ostream MangledNameStream(Dummy);
1005     Mg.getNameWithPrefix(MangledNameStream, F, false);
1006 
1007     DISubprogram *OutlinedSP = DB.createFunction(
1008         Unit /* Context */, F->getName(), StringRef(Dummy), Unit /* File */,
1009         0 /* Line 0 is reserved for compiler-generated code. */,
1010         DB.createSubroutineType(DB.getOrCreateTypeArray({})), /* void type */
1011         0, /* Line 0 is reserved for compiler-generated code. */
1012         DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
1013         /* Outlined code is optimized code by definition. */
1014         DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
1015 
1016     // Don't add any new variables to the subprogram.
1017     DB.finalizeSubprogram(OutlinedSP);
1018 
1019     // Attach subprogram to the function.
1020     F->setSubprogram(OutlinedSP);
1021     // We're done with the DIBuilder.
1022     DB.finalize();
1023   }
1024 
1025   return &MF;
1026 }
1027 
1028 bool MachineOutliner::outline(
1029     Module &M, std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList,
1030     InstructionMapper &Mapper, unsigned &OutlinedFunctionNum) {
1031   LLVM_DEBUG(dbgs() << "*** Outlining ***\n");
1032   LLVM_DEBUG(dbgs() << "NUMBER OF POTENTIAL FUNCTIONS: " << FunctionList.size()
1033                     << "\n");
1034   bool OutlinedSomething = false;
1035 
1036   // Sort by priority where priority := getNotOutlinedCost / getOutliningCost.
1037   // The function with highest priority should be outlined first.
1038   stable_sort(FunctionList, [](const std::unique_ptr<OutlinedFunction> &LHS,
1039                                const std::unique_ptr<OutlinedFunction> &RHS) {
1040     return LHS->getNotOutlinedCost() * RHS->getOutliningCost() >
1041            RHS->getNotOutlinedCost() * LHS->getOutliningCost();
1042   });
1043 
1044   // Walk over each function, outlining them as we go along. Functions are
1045   // outlined greedily, based off the sort above.
1046   auto *UnsignedVecBegin = Mapper.UnsignedVec.begin();
1047   LLVM_DEBUG(dbgs() << "WALKING FUNCTION LIST\n");
1048   for (auto &OF : FunctionList) {
1049 #ifndef NDEBUG
1050     auto NumCandidatesBefore = OF->Candidates.size();
1051 #endif
1052     // If we outlined something that overlapped with a candidate in a previous
1053     // step, then we can't outline from it.
1054     erase_if(OF->Candidates, [&UnsignedVecBegin](Candidate &C) {
1055       return std::any_of(UnsignedVecBegin + C.getStartIdx(),
1056                          UnsignedVecBegin + C.getEndIdx() + 1, [](unsigned I) {
1057                            return I == static_cast<unsigned>(-1);
1058                          });
1059     });
1060 
1061 #ifndef NDEBUG
1062     auto NumCandidatesAfter = OF->Candidates.size();
1063     LLVM_DEBUG(dbgs() << "PRUNED: " << NumCandidatesBefore - NumCandidatesAfter
1064                       << "/" << NumCandidatesBefore << " candidates\n");
1065 #endif
1066 
1067     // If we made it unbeneficial to outline this function, skip it.
1068     if (OF->getBenefit() < OutlinerBenefitThreshold) {
1069       LLVM_DEBUG(dbgs() << "SKIP: Expected benefit (" << OF->getBenefit()
1070                         << " B) < threshold (" << OutlinerBenefitThreshold
1071                         << " B)\n");
1072       continue;
1073     }
1074 
1075     LLVM_DEBUG(dbgs() << "OUTLINE: Expected benefit (" << OF->getBenefit()
1076                       << " B) > threshold (" << OutlinerBenefitThreshold
1077                       << " B)\n");
1078 
1079     // It's beneficial. Create the function and outline its sequence's
1080     // occurrences.
1081     OF->MF = createOutlinedFunction(M, *OF, Mapper, OutlinedFunctionNum);
1082     emitOutlinedFunctionRemark(*OF);
1083     FunctionsCreated++;
1084     OutlinedFunctionNum++; // Created a function, move to the next name.
1085     MachineFunction *MF = OF->MF;
1086     const TargetSubtargetInfo &STI = MF->getSubtarget();
1087     const TargetInstrInfo &TII = *STI.getInstrInfo();
1088 
1089     // Replace occurrences of the sequence with calls to the new function.
1090     LLVM_DEBUG(dbgs() << "CREATE OUTLINED CALLS\n");
1091     for (Candidate &C : OF->Candidates) {
1092       MachineBasicBlock &MBB = *C.getMBB();
1093       MachineBasicBlock::iterator StartIt = C.begin();
1094       MachineBasicBlock::iterator EndIt = std::prev(C.end());
1095 
1096       // Insert the call.
1097       auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *MF, C);
1098 // Insert the call.
1099 #ifndef NDEBUG
1100       auto MBBBeingOutlinedFromName =
1101           MBB.getName().empty() ? "<unknown>" : MBB.getName().str();
1102       auto MFBeingOutlinedFromName = MBB.getParent()->getName().empty()
1103                                          ? "<unknown>"
1104                                          : MBB.getParent()->getName().str();
1105       LLVM_DEBUG(dbgs() << "  CALL: " << MF->getName() << " in "
1106                         << MFBeingOutlinedFromName << ":"
1107                         << MBBBeingOutlinedFromName << "\n");
1108       LLVM_DEBUG(dbgs() << "   .. " << *CallInst);
1109 #endif
1110 
1111       // If the caller tracks liveness, then we need to make sure that
1112       // anything we outline doesn't break liveness assumptions. The outlined
1113       // functions themselves currently don't track liveness, but we should
1114       // make sure that the ranges we yank things out of aren't wrong.
1115       if (MBB.getParent()->getProperties().hasProperty(
1116               MachineFunctionProperties::Property::TracksLiveness)) {
1117         // The following code is to add implicit def operands to the call
1118         // instruction. It also updates call site information for moved
1119         // code.
1120         SmallSet<Register, 2> UseRegs, DefRegs;
1121         // Copy over the defs in the outlined range.
1122         // First inst in outlined range <-- Anything that's defined in this
1123         // ...                           .. range has to be added as an
1124         // implicit Last inst in outlined range  <-- def to the call
1125         // instruction. Also remove call site information for outlined block
1126         // of code. The exposed uses need to be copied in the outlined range.
1127         for (MachineBasicBlock::reverse_iterator
1128                  Iter = EndIt.getReverse(),
1129                  Last = std::next(CallInst.getReverse());
1130              Iter != Last; Iter++) {
1131           MachineInstr *MI = &*Iter;
1132           SmallSet<Register, 2> InstrUseRegs;
1133           for (MachineOperand &MOP : MI->operands()) {
1134             // Skip over anything that isn't a register.
1135             if (!MOP.isReg())
1136               continue;
1137 
1138             if (MOP.isDef()) {
1139               // Introduce DefRegs set to skip the redundant register.
1140               DefRegs.insert(MOP.getReg());
1141               if (UseRegs.count(MOP.getReg()) &&
1142                   !InstrUseRegs.count(MOP.getReg()))
1143                 // Since the regiester is modeled as defined,
1144                 // it is not necessary to be put in use register set.
1145                 UseRegs.erase(MOP.getReg());
1146             } else if (!MOP.isUndef()) {
1147               // Any register which is not undefined should
1148               // be put in the use register set.
1149               UseRegs.insert(MOP.getReg());
1150               InstrUseRegs.insert(MOP.getReg());
1151             }
1152           }
1153           if (MI->isCandidateForCallSiteEntry())
1154             MI->getMF()->eraseCallSiteInfo(MI);
1155         }
1156 
1157         for (const Register &I : DefRegs)
1158           // If it's a def, add it to the call instruction.
1159           CallInst->addOperand(
1160               MachineOperand::CreateReg(I, true, /* isDef = true */
1161                                         true /* isImp = true */));
1162 
1163         for (const Register &I : UseRegs)
1164           // If it's a exposed use, add it to the call instruction.
1165           CallInst->addOperand(
1166               MachineOperand::CreateReg(I, false, /* isDef = false */
1167                                         true /* isImp = true */));
1168       }
1169 
1170       // Erase from the point after where the call was inserted up to, and
1171       // including, the final instruction in the sequence.
1172       // Erase needs one past the end, so we need std::next there too.
1173       MBB.erase(std::next(StartIt), std::next(EndIt));
1174 
1175       // Keep track of what we removed by marking them all as -1.
1176       for (unsigned &I : make_range(UnsignedVecBegin + C.getStartIdx(),
1177                                     UnsignedVecBegin + C.getEndIdx() + 1))
1178         I = static_cast<unsigned>(-1);
1179       OutlinedSomething = true;
1180 
1181       // Statistics.
1182       NumOutlined++;
1183     }
1184   }
1185 
1186   LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";);
1187   return OutlinedSomething;
1188 }
1189 
1190 void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M) {
1191   // Build instruction mappings for each function in the module. Start by
1192   // iterating over each Function in M.
1193   LLVM_DEBUG(dbgs() << "*** Populating mapper ***\n");
1194   for (Function &F : M) {
1195     LLVM_DEBUG(dbgs() << "MAPPING FUNCTION: " << F.getName() << "\n");
1196 
1197     if (F.hasFnAttribute("nooutline")) {
1198       LLVM_DEBUG(dbgs() << "SKIP: Function has nooutline attribute\n");
1199       continue;
1200     }
1201 
1202     // There's something in F. Check if it has a MachineFunction associated with
1203     // it.
1204     MachineFunction *MF = MMI->getMachineFunction(F);
1205 
1206     // If it doesn't, then there's nothing to outline from. Move to the next
1207     // Function.
1208     if (!MF) {
1209       LLVM_DEBUG(dbgs() << "SKIP: Function does not have a MachineFunction\n");
1210       continue;
1211     }
1212 
1213     const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1214     if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF)) {
1215       LLVM_DEBUG(dbgs() << "SKIP: Target does not want to outline from "
1216                            "function by default\n");
1217       continue;
1218     }
1219 
1220     // We have a MachineFunction. Ask the target if it's suitable for outlining.
1221     // If it isn't, then move on to the next Function in the module.
1222     if (!TII->isFunctionSafeToOutlineFrom(*MF, OutlineFromLinkOnceODRs)) {
1223       LLVM_DEBUG(dbgs() << "SKIP: " << MF->getName()
1224                         << ": unsafe to outline from\n");
1225       continue;
1226     }
1227 
1228     // We have a function suitable for outlining. Iterate over every
1229     // MachineBasicBlock in MF and try to map its instructions to a list of
1230     // unsigned integers.
1231     const unsigned MinMBBSize = 2;
1232 
1233     for (MachineBasicBlock &MBB : *MF) {
1234       LLVM_DEBUG(dbgs() << "  MAPPING MBB: '" << MBB.getName() << "'\n");
1235       // If there isn't anything in MBB, then there's no point in outlining from
1236       // it.
1237       // If there are fewer than 2 instructions in the MBB, then it can't ever
1238       // contain something worth outlining.
1239       // FIXME: This should be based off of the maximum size in B of an outlined
1240       // call versus the size in B of the MBB.
1241       if (MBB.size() < MinMBBSize) {
1242         LLVM_DEBUG(dbgs() << "    SKIP: MBB size less than minimum size of "
1243                           << MinMBBSize << "\n");
1244         continue;
1245       }
1246 
1247       // Check if MBB could be the target of an indirect branch. If it is, then
1248       // we don't want to outline from it.
1249       if (MBB.hasAddressTaken()) {
1250         LLVM_DEBUG(dbgs() << "    SKIP: MBB's address is taken\n");
1251         continue;
1252       }
1253 
1254       // MBB is suitable for outlining. Map it to a list of unsigneds.
1255       Mapper.convertToUnsignedVec(MBB, *TII);
1256     }
1257   }
1258   // Statistics.
1259   UnsignedVecSize = Mapper.UnsignedVec.size();
1260 }
1261 
1262 void MachineOutliner::initSizeRemarkInfo(
1263     const Module &M, StringMap<unsigned> &FunctionToInstrCount) {
1264   // Collect instruction counts for every function. We'll use this to emit
1265   // per-function size remarks later.
1266   for (const Function &F : M) {
1267     MachineFunction *MF = MMI->getMachineFunction(F);
1268 
1269     // We only care about MI counts here. If there's no MachineFunction at this
1270     // point, then there won't be after the outliner runs, so let's move on.
1271     if (!MF)
1272       continue;
1273     FunctionToInstrCount[F.getName().str()] = MF->getInstructionCount();
1274   }
1275 }
1276 
1277 void MachineOutliner::emitInstrCountChangedRemark(
1278     const Module &M, const StringMap<unsigned> &FunctionToInstrCount) {
1279   // Iterate over each function in the module and emit remarks.
1280   // Note that we won't miss anything by doing this, because the outliner never
1281   // deletes functions.
1282   for (const Function &F : M) {
1283     MachineFunction *MF = MMI->getMachineFunction(F);
1284 
1285     // The outliner never deletes functions. If we don't have a MF here, then we
1286     // didn't have one prior to outlining either.
1287     if (!MF)
1288       continue;
1289 
1290     std::string Fname = std::string(F.getName());
1291     unsigned FnCountAfter = MF->getInstructionCount();
1292     unsigned FnCountBefore = 0;
1293 
1294     // Check if the function was recorded before.
1295     auto It = FunctionToInstrCount.find(Fname);
1296 
1297     // Did we have a previously-recorded size? If yes, then set FnCountBefore
1298     // to that.
1299     if (It != FunctionToInstrCount.end())
1300       FnCountBefore = It->second;
1301 
1302     // Compute the delta and emit a remark if there was a change.
1303     int64_t FnDelta = static_cast<int64_t>(FnCountAfter) -
1304                       static_cast<int64_t>(FnCountBefore);
1305     if (FnDelta == 0)
1306       continue;
1307 
1308     MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
1309     MORE.emit([&]() {
1310       MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
1311                                           DiagnosticLocation(), &MF->front());
1312       R << DiagnosticInfoOptimizationBase::Argument("Pass", "Machine Outliner")
1313         << ": Function: "
1314         << DiagnosticInfoOptimizationBase::Argument("Function", F.getName())
1315         << ": MI instruction count changed from "
1316         << DiagnosticInfoOptimizationBase::Argument("MIInstrsBefore",
1317                                                     FnCountBefore)
1318         << " to "
1319         << DiagnosticInfoOptimizationBase::Argument("MIInstrsAfter",
1320                                                     FnCountAfter)
1321         << "; Delta: "
1322         << DiagnosticInfoOptimizationBase::Argument("Delta", FnDelta);
1323       return R;
1324     });
1325   }
1326 }
1327 
1328 void MachineOutliner::initializeOutlinerMode(const Module &M) {
1329   if (DisableGlobalOutlining)
1330     return;
1331 
1332   if (auto *IndexWrapperPass =
1333           getAnalysisIfAvailable<ImmutableModuleSummaryIndexWrapperPass>()) {
1334     auto *TheIndex = IndexWrapperPass->getIndex();
1335     // (Full)LTO module does not have functions added to the index.
1336     // In this case, we run the outliner without using codegen data as usual.
1337     if (TheIndex && !TheIndex->hasExportedFunctions(M))
1338       return;
1339   }
1340 
1341   // When codegen data write is enabled, we want to write the local outlined
1342   // hash tree to the custom section, `__llvm_outline`.
1343   // When the outlined hash tree is available from the previous codegen data,
1344   // we want to read it to optimistically create global outlining candidates.
1345   if (cgdata::emitCGData()) {
1346     OutlinerMode = CGDataMode::Write;
1347     // Create a local outlined hash tree to be published.
1348     LocalHashTree = std::make_unique<OutlinedHashTree>();
1349     // We don't need to read the outlined hash tree from the previous codegen
1350   } else if (cgdata::hasOutlinedHashTree())
1351     OutlinerMode = CGDataMode::Read;
1352 }
1353 
1354 void MachineOutliner::emitOutlinedHashTree(Module &M) {
1355   assert(LocalHashTree);
1356   if (!LocalHashTree->empty()) {
1357     LLVM_DEBUG({
1358       dbgs() << "Emit outlined hash tree. Size: " << LocalHashTree->size()
1359              << "\n";
1360     });
1361     SmallVector<char> Buf;
1362     raw_svector_ostream OS(Buf);
1363 
1364     OutlinedHashTreeRecord HTR(std::move(LocalHashTree));
1365     HTR.serialize(OS);
1366 
1367     llvm::StringRef Data(Buf.data(), Buf.size());
1368     std::unique_ptr<MemoryBuffer> Buffer =
1369         MemoryBuffer::getMemBuffer(Data, "in-memory outlined hash tree", false);
1370 
1371     Triple TT(M.getTargetTriple());
1372     embedBufferInModule(
1373         M, *Buffer,
1374         getCodeGenDataSectionName(CG_outline, TT.getObjectFormat()));
1375   }
1376 }
1377 
1378 bool MachineOutliner::runOnModule(Module &M) {
1379   // Check if there's anything in the module. If it's empty, then there's
1380   // nothing to outline.
1381   if (M.empty())
1382     return false;
1383 
1384   // Initialize the outliner mode.
1385   initializeOutlinerMode(M);
1386 
1387   MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
1388 
1389   // Number to append to the current outlined function.
1390   unsigned OutlinedFunctionNum = 0;
1391 
1392   OutlineRepeatedNum = 0;
1393   if (!doOutline(M, OutlinedFunctionNum))
1394     return false;
1395 
1396   for (unsigned I = 0; I < OutlinerReruns; ++I) {
1397     OutlinedFunctionNum = 0;
1398     OutlineRepeatedNum++;
1399     if (!doOutline(M, OutlinedFunctionNum)) {
1400       LLVM_DEBUG({
1401         dbgs() << "Did not outline on iteration " << I + 2 << " out of "
1402                << OutlinerReruns + 1 << "\n";
1403       });
1404       break;
1405     }
1406   }
1407 
1408   if (OutlinerMode == CGDataMode::Write)
1409     emitOutlinedHashTree(M);
1410 
1411   return true;
1412 }
1413 
1414 bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
1415   // If the user passed -enable-machine-outliner=always or
1416   // -enable-machine-outliner, the pass will run on all functions in the module.
1417   // Otherwise, if the target supports default outlining, it will run on all
1418   // functions deemed by the target to be worth outlining from by default. Tell
1419   // the user how the outliner is running.
1420   LLVM_DEBUG({
1421     dbgs() << "Machine Outliner: Running on ";
1422     if (RunOnAllFunctions)
1423       dbgs() << "all functions";
1424     else
1425       dbgs() << "target-default functions";
1426     dbgs() << "\n";
1427   });
1428 
1429   // If the user specifies that they want to outline from linkonceodrs, set
1430   // it here.
1431   OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
1432   InstructionMapper Mapper(*MMI);
1433 
1434   // Prepare instruction mappings for the suffix tree.
1435   populateMapper(Mapper, M);
1436   std::vector<std::unique_ptr<OutlinedFunction>> FunctionList;
1437 
1438   // Find all of the outlining candidates.
1439   if (OutlinerMode == CGDataMode::Read)
1440     findGlobalCandidates(Mapper, FunctionList);
1441   else
1442     findCandidates(Mapper, FunctionList);
1443 
1444   // If we've requested size remarks, then collect the MI counts of every
1445   // function before outlining, and the MI counts after outlining.
1446   // FIXME: This shouldn't be in the outliner at all; it should ultimately be
1447   // the pass manager's responsibility.
1448   // This could pretty easily be placed in outline instead, but because we
1449   // really ultimately *don't* want this here, it's done like this for now
1450   // instead.
1451 
1452   // Check if we want size remarks.
1453   bool ShouldEmitSizeRemarks = M.shouldEmitInstrCountChangedRemark();
1454   StringMap<unsigned> FunctionToInstrCount;
1455   if (ShouldEmitSizeRemarks)
1456     initSizeRemarkInfo(M, FunctionToInstrCount);
1457 
1458   // Outline each of the candidates and return true if something was outlined.
1459   bool OutlinedSomething =
1460       outline(M, FunctionList, Mapper, OutlinedFunctionNum);
1461 
1462   // If we outlined something, we definitely changed the MI count of the
1463   // module. If we've asked for size remarks, then output them.
1464   // FIXME: This should be in the pass manager.
1465   if (ShouldEmitSizeRemarks && OutlinedSomething)
1466     emitInstrCountChangedRemark(M, FunctionToInstrCount);
1467 
1468   LLVM_DEBUG({
1469     if (!OutlinedSomething)
1470       dbgs() << "Stopped outlining at iteration " << OutlineRepeatedNum
1471              << " because no changes were found.\n";
1472   });
1473 
1474   return OutlinedSomething;
1475 }
1476