xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp (revision 4824e7fd18a1223177218d4aec1b3c6c5c4a444e)
1 //===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 // Implementation for the IROutliner which is used by the IROutliner Pass.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/IPO/IROutliner.h"
15 #include "llvm/Analysis/IRSimilarityIdentifier.h"
16 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
17 #include "llvm/Analysis/TargetTransformInfo.h"
18 #include "llvm/IR/Attributes.h"
19 #include "llvm/IR/DebugInfoMetadata.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/Dominators.h"
22 #include "llvm/IR/Mangler.h"
23 #include "llvm/IR/PassManager.h"
24 #include "llvm/InitializePasses.h"
25 #include "llvm/Pass.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Transforms/IPO.h"
28 #include <map>
29 #include <set>
30 #include <vector>
31 
32 #define DEBUG_TYPE "iroutliner"
33 
34 using namespace llvm;
35 using namespace IRSimilarity;
36 
37 // A command flag to be used for debugging to exclude branches from similarity
38 // matching and outlining.
39 extern cl::opt<bool> DisableBranches;
40 
41 // Set to true if the user wants the ir outliner to run on linkonceodr linkage
42 // functions. This is false by default because the linker can dedupe linkonceodr
43 // functions. Since the outliner is confined to a single module (modulo LTO),
44 // this is off by default. It should, however, be the default behavior in
45 // LTO.
46 static cl::opt<bool> EnableLinkOnceODRIROutlining(
47     "enable-linkonceodr-ir-outlining", cl::Hidden,
48     cl::desc("Enable the IR outliner on linkonceodr functions"),
49     cl::init(false));
50 
51 // This is a debug option to test small pieces of code to ensure that outlining
52 // works correctly.
53 static cl::opt<bool> NoCostModel(
54     "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden,
55     cl::desc("Debug option to outline greedily, without restriction that "
56              "calculated benefit outweighs cost"));
57 
58 /// The OutlinableGroup holds all the overarching information for outlining
59 /// a set of regions that are structurally similar to one another, such as the
60 /// types of the overall function, the output blocks, the sets of stores needed
61 /// and a list of the different regions. This information is used in the
62 /// deduplication of extracted regions with the same structure.
63 struct OutlinableGroup {
64   /// The sections that could be outlined
65   std::vector<OutlinableRegion *> Regions;
66 
67   /// The argument types for the function created as the overall function to
68   /// replace the extracted function for each region.
69   std::vector<Type *> ArgumentTypes;
70   /// The FunctionType for the overall function.
71   FunctionType *OutlinedFunctionType = nullptr;
72   /// The Function for the collective overall function.
73   Function *OutlinedFunction = nullptr;
74 
75   /// Flag for whether we should not consider this group of OutlinableRegions
76   /// for extraction.
77   bool IgnoreGroup = false;
78 
79   /// The return blocks for the overall function.
80   DenseMap<Value *, BasicBlock *> EndBBs;
81 
82   /// The PHIBlocks with their corresponding return block based on the return
83   /// value as the key.
84   DenseMap<Value *, BasicBlock *> PHIBlocks;
85 
86   /// A set containing the different GVN store sets needed. Each array contains
87   /// a sorted list of the different values that need to be stored into output
88   /// registers.
89   DenseSet<ArrayRef<unsigned>> OutputGVNCombinations;
90 
91   /// Flag for whether the \ref ArgumentTypes have been defined after the
92   /// extraction of the first region.
93   bool InputTypesSet = false;
94 
95   /// The number of input values in \ref ArgumentTypes.  Anything after this
96   /// index in ArgumentTypes is an output argument.
97   unsigned NumAggregateInputs = 0;
98 
99   /// The mapping of the canonical numbering of the values in outlined sections
100   /// to specific arguments.
101   DenseMap<unsigned, unsigned> CanonicalNumberToAggArg;
102 
103   /// The number of branches in the region target a basic block that is outside
104   /// of the region.
105   unsigned BranchesToOutside = 0;
106 
107   /// The number of instructions that will be outlined by extracting \ref
108   /// Regions.
109   InstructionCost Benefit = 0;
110   /// The number of added instructions needed for the outlining of the \ref
111   /// Regions.
112   InstructionCost Cost = 0;
113 
114   /// The argument that needs to be marked with the swifterr attribute.  If not
115   /// needed, there is no value.
116   Optional<unsigned> SwiftErrorArgument;
117 
118   /// For the \ref Regions, we look at every Value.  If it is a constant,
119   /// we check whether it is the same in Region.
120   ///
121   /// \param [in,out] NotSame contains the global value numbers where the
122   /// constant is not always the same, and must be passed in as an argument.
123   void findSameConstants(DenseSet<unsigned> &NotSame);
124 
125   /// For the regions, look at each set of GVN stores needed and account for
126   /// each combination.  Add an argument to the argument types if there is
127   /// more than one combination.
128   ///
129   /// \param [in] M - The module we are outlining from.
130   void collectGVNStoreSets(Module &M);
131 };
132 
133 /// Move the contents of \p SourceBB to before the last instruction of \p
134 /// TargetBB.
135 /// \param SourceBB - the BasicBlock to pull Instructions from.
136 /// \param TargetBB - the BasicBlock to put Instruction into.
137 static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
138   for (Instruction &I : llvm::make_early_inc_range(SourceBB))
139     I.moveBefore(TargetBB, TargetBB.end());
140 }
141 
142 /// A function to sort the keys of \p Map, which must be a mapping of constant
143 /// values to basic blocks and return it in \p SortedKeys
144 ///
145 /// \param SortedKeys - The vector the keys will be return in and sorted.
146 /// \param Map - The DenseMap containing keys to sort.
147 static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
148                                   DenseMap<Value *, BasicBlock *> &Map) {
149   for (auto &VtoBB : Map)
150     SortedKeys.push_back(VtoBB.first);
151 
152   stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) {
153     const ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS);
154     const ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
155     assert(RHSC && "Not a constant integer in return value?");
156     assert(LHSC && "Not a constant integer in return value?");
157 
158     return LHSC->getLimitedValue() < RHSC->getLimitedValue();
159   });
160 }
161 
162 Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other,
163                                                   Value *V) {
164   Optional<unsigned> GVN = Candidate->getGVN(V);
165   assert(GVN.hasValue() && "No GVN for incoming value");
166   Optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN);
167   Optional<unsigned> FirstGVN = Other.Candidate->fromCanonicalNum(*CanonNum);
168   Optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN);
169   return FoundValueOpt.getValueOr(nullptr);
170 }
171 
172 void OutlinableRegion::splitCandidate() {
173   assert(!CandidateSplit && "Candidate already split!");
174 
175   Instruction *BackInst = Candidate->backInstruction();
176 
177   Instruction *EndInst = nullptr;
178   // Check whether the last instruction is a terminator, if it is, we do
179   // not split on the following instruction. We leave the block as it is.  We
180   // also check that this is not the last instruction in the Module, otherwise
181   // the check for whether the current following instruction matches the
182   // previously recorded instruction will be incorrect.
183   if (!BackInst->isTerminator() ||
184       BackInst->getParent() != &BackInst->getFunction()->back()) {
185     EndInst = Candidate->end()->Inst;
186     assert(EndInst && "Expected an end instruction?");
187   }
188 
189   // We check if the current instruction following the last instruction in the
190   // region is the same as the recorded instruction following the last
191   // instruction. If they do not match, there could be problems in rewriting
192   // the program after outlining, so we ignore it.
193   if (!BackInst->isTerminator() &&
194       EndInst != BackInst->getNextNonDebugInstruction())
195     return;
196 
197   Instruction *StartInst = (*Candidate->begin()).Inst;
198   assert(StartInst && "Expected a start instruction?");
199   StartBB = StartInst->getParent();
200   PrevBB = StartBB;
201 
202   // The basic block gets split like so:
203   // block:                 block:
204   //   inst1                  inst1
205   //   inst2                  inst2
206   //   region1               br block_to_outline
207   //   region2              block_to_outline:
208   //   region3          ->    region1
209   //   region4                region2
210   //   inst3                  region3
211   //   inst4                  region4
212   //                          br block_after_outline
213   //                        block_after_outline:
214   //                          inst3
215   //                          inst4
216 
217   std::string OriginalName = PrevBB->getName().str();
218 
219   StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
220   PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB);
221 
222   CandidateSplit = true;
223   if (!BackInst->isTerminator()) {
224     EndBB = EndInst->getParent();
225     FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
226     EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB);
227     FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB);
228     return;
229   }
230 
231   EndBB = BackInst->getParent();
232   EndsInBranch = true;
233   FollowBB = nullptr;
234 }
235 
236 void OutlinableRegion::reattachCandidate() {
237   assert(CandidateSplit && "Candidate is not split!");
238 
239   // The basic block gets reattached like so:
240   // block:                        block:
241   //   inst1                         inst1
242   //   inst2                         inst2
243   //   br block_to_outline           region1
244   // block_to_outline:        ->     region2
245   //   region1                       region3
246   //   region2                       region4
247   //   region3                       inst3
248   //   region4                       inst4
249   //   br block_after_outline
250   // block_after_outline:
251   //   inst3
252   //   inst4
253   assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
254 
255   // StartBB should only have one predecessor since we put an unconditional
256   // branch at the end of PrevBB when we split the BasicBlock.
257   PrevBB = StartBB->getSinglePredecessor();
258   assert(PrevBB != nullptr &&
259          "No Predecessor for the region start basic block!");
260 
261   assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
262   PrevBB->getTerminator()->eraseFromParent();
263 
264   moveBBContents(*StartBB, *PrevBB);
265 
266   BasicBlock *PlacementBB = PrevBB;
267   if (StartBB != EndBB)
268     PlacementBB = EndBB;
269   if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
270     assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
271     assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
272     PlacementBB->getTerminator()->eraseFromParent();
273     moveBBContents(*FollowBB, *PlacementBB);
274     PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
275     FollowBB->eraseFromParent();
276   }
277 
278   PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB);
279   StartBB->eraseFromParent();
280 
281   // Make sure to save changes back to the StartBB.
282   StartBB = PrevBB;
283   EndBB = nullptr;
284   PrevBB = nullptr;
285   FollowBB = nullptr;
286 
287   CandidateSplit = false;
288 }
289 
290 /// Find whether \p V matches the Constants previously found for the \p GVN.
291 ///
292 /// \param V - The value to check for consistency.
293 /// \param GVN - The global value number assigned to \p V.
294 /// \param GVNToConstant - The mapping of global value number to Constants.
295 /// \returns true if the Value matches the Constant mapped to by V and false if
296 /// it \p V is a Constant but does not match.
297 /// \returns None if \p V is not a Constant.
298 static Optional<bool>
299 constantMatches(Value *V, unsigned GVN,
300                 DenseMap<unsigned, Constant *> &GVNToConstant) {
301   // See if we have a constants
302   Constant *CST = dyn_cast<Constant>(V);
303   if (!CST)
304     return None;
305 
306   // Holds a mapping from a global value number to a Constant.
307   DenseMap<unsigned, Constant *>::iterator GVNToConstantIt;
308   bool Inserted;
309 
310 
311   // If we have a constant, try to make a new entry in the GVNToConstant.
312   std::tie(GVNToConstantIt, Inserted) =
313       GVNToConstant.insert(std::make_pair(GVN, CST));
314   // If it was found and is not equal, it is not the same. We do not
315   // handle this case yet, and exit early.
316   if (Inserted || (GVNToConstantIt->second == CST))
317     return true;
318 
319   return false;
320 }
321 
322 InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
323   InstructionCost Benefit = 0;
324 
325   // Estimate the benefit of outlining a specific sections of the program.  We
326   // delegate mostly this task to the TargetTransformInfo so that if the target
327   // has specific changes, we can have a more accurate estimate.
328 
329   // However, getInstructionCost delegates the code size calculation for
330   // arithmetic instructions to getArithmeticInstrCost in
331   // include/Analysis/TargetTransformImpl.h, where it always estimates that the
332   // code size for a division and remainder instruction to be equal to 4, and
333   // everything else to 1.  This is not an accurate representation of the
334   // division instruction for targets that have a native division instruction.
335   // To be overly conservative, we only add 1 to the number of instructions for
336   // each division instruction.
337   for (IRInstructionData &ID : *Candidate) {
338     Instruction *I = ID.Inst;
339     switch (I->getOpcode()) {
340     case Instruction::FDiv:
341     case Instruction::FRem:
342     case Instruction::SDiv:
343     case Instruction::SRem:
344     case Instruction::UDiv:
345     case Instruction::URem:
346       Benefit += 1;
347       break;
348     default:
349       Benefit += TTI.getInstructionCost(I, TargetTransformInfo::TCK_CodeSize);
350       break;
351     }
352   }
353 
354   return Benefit;
355 }
356 
357 /// Find whether \p Region matches the global value numbering to Constant
358 /// mapping found so far.
359 ///
360 /// \param Region - The OutlinableRegion we are checking for constants
361 /// \param GVNToConstant - The mapping of global value number to Constants.
362 /// \param NotSame - The set of global value numbers that do not have the same
363 /// constant in each region.
364 /// \returns true if all Constants are the same in every use of a Constant in \p
365 /// Region and false if not
366 static bool
367 collectRegionsConstants(OutlinableRegion &Region,
368                         DenseMap<unsigned, Constant *> &GVNToConstant,
369                         DenseSet<unsigned> &NotSame) {
370   bool ConstantsTheSame = true;
371 
372   IRSimilarityCandidate &C = *Region.Candidate;
373   for (IRInstructionData &ID : C) {
374 
375     // Iterate over the operands in an instruction. If the global value number,
376     // assigned by the IRSimilarityCandidate, has been seen before, we check if
377     // the the number has been found to be not the same value in each instance.
378     for (Value *V : ID.OperVals) {
379       Optional<unsigned> GVNOpt = C.getGVN(V);
380       assert(GVNOpt.hasValue() && "Expected a GVN for operand?");
381       unsigned GVN = GVNOpt.getValue();
382 
383       // Check if this global value has been found to not be the same already.
384       if (NotSame.contains(GVN)) {
385         if (isa<Constant>(V))
386           ConstantsTheSame = false;
387         continue;
388       }
389 
390       // If it has been the same so far, we check the value for if the
391       // associated Constant value match the previous instances of the same
392       // global value number.  If the global value does not map to a Constant,
393       // it is considered to not be the same value.
394       Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant);
395       if (ConstantMatches.hasValue()) {
396         if (ConstantMatches.getValue())
397           continue;
398         else
399           ConstantsTheSame = false;
400       }
401 
402       // While this value is a register, it might not have been previously,
403       // make sure we don't already have a constant mapped to this global value
404       // number.
405       if (GVNToConstant.find(GVN) != GVNToConstant.end())
406         ConstantsTheSame = false;
407 
408       NotSame.insert(GVN);
409     }
410   }
411 
412   return ConstantsTheSame;
413 }
414 
415 void OutlinableGroup::findSameConstants(DenseSet<unsigned> &NotSame) {
416   DenseMap<unsigned, Constant *> GVNToConstant;
417 
418   for (OutlinableRegion *Region : Regions)
419     collectRegionsConstants(*Region, GVNToConstant, NotSame);
420 }
421 
422 void OutlinableGroup::collectGVNStoreSets(Module &M) {
423   for (OutlinableRegion *OS : Regions)
424     OutputGVNCombinations.insert(OS->GVNStores);
425 
426   // We are adding an extracted argument to decide between which output path
427   // to use in the basic block.  It is used in a switch statement and only
428   // needs to be an integer.
429   if (OutputGVNCombinations.size() > 1)
430     ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
431 }
432 
433 /// Get the subprogram if it exists for one of the outlined regions.
434 ///
435 /// \param [in] Group - The set of regions to find a subprogram for.
436 /// \returns the subprogram if it exists, or nullptr.
437 static DISubprogram *getSubprogramOrNull(OutlinableGroup &Group) {
438   for (OutlinableRegion *OS : Group.Regions)
439     if (Function *F = OS->Call->getFunction())
440       if (DISubprogram *SP = F->getSubprogram())
441         return SP;
442 
443   return nullptr;
444 }
445 
446 Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
447                                      unsigned FunctionNameSuffix) {
448   assert(!Group.OutlinedFunction && "Function is already defined!");
449 
450   Type *RetTy = Type::getVoidTy(M.getContext());
451   // All extracted functions _should_ have the same return type at this point
452   // since the similarity identifier ensures that all branches outside of the
453   // region occur in the same place.
454 
455   // NOTE: Should we ever move to the model that uses a switch at every point
456   // needed, meaning that we could branch within the region or out, it is
457   // possible that we will need to switch to using the most general case all of
458   // the time.
459   for (OutlinableRegion *R : Group.Regions) {
460     Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
461     if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
462         (RetTy->isIntegerTy(1) && ExtractedFuncType->isIntegerTy(16)))
463       RetTy = ExtractedFuncType;
464   }
465 
466   Group.OutlinedFunctionType = FunctionType::get(
467       RetTy, Group.ArgumentTypes, false);
468 
469   // These functions will only be called from within the same module, so
470   // we can set an internal linkage.
471   Group.OutlinedFunction = Function::Create(
472       Group.OutlinedFunctionType, GlobalValue::InternalLinkage,
473       "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M);
474 
475   // Transfer the swifterr attribute to the correct function parameter.
476   if (Group.SwiftErrorArgument.hasValue())
477     Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.getValue(),
478                                          Attribute::SwiftError);
479 
480   Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize);
481   Group.OutlinedFunction->addFnAttr(Attribute::MinSize);
482 
483   // If there's a DISubprogram associated with this outlined function, then
484   // emit debug info for the outlined function.
485   if (DISubprogram *SP = getSubprogramOrNull(Group)) {
486     Function *F = Group.OutlinedFunction;
487     // We have a DISubprogram. Get its DICompileUnit.
488     DICompileUnit *CU = SP->getUnit();
489     DIBuilder DB(M, true, CU);
490     DIFile *Unit = SP->getFile();
491     Mangler Mg;
492     // Get the mangled name of the function for the linkage name.
493     std::string Dummy;
494     llvm::raw_string_ostream MangledNameStream(Dummy);
495     Mg.getNameWithPrefix(MangledNameStream, F, false);
496 
497     DISubprogram *OutlinedSP = DB.createFunction(
498         Unit /* Context */, F->getName(), MangledNameStream.str(),
499         Unit /* File */,
500         0 /* Line 0 is reserved for compiler-generated code. */,
501         DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */
502         0, /* Line 0 is reserved for compiler-generated code. */
503         DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
504         /* Outlined code is optimized code by definition. */
505         DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
506 
507     // Don't add any new variables to the subprogram.
508     DB.finalizeSubprogram(OutlinedSP);
509 
510     // Attach subprogram to the function.
511     F->setSubprogram(OutlinedSP);
512     // We're done with the DIBuilder.
513     DB.finalize();
514   }
515 
516   return Group.OutlinedFunction;
517 }
518 
519 /// Move each BasicBlock in \p Old to \p New.
520 ///
521 /// \param [in] Old - The function to move the basic blocks from.
522 /// \param [in] New - The function to move the basic blocks to.
523 /// \param [out] NewEnds - The return blocks of the new overall function.
524 static void moveFunctionData(Function &Old, Function &New,
525                              DenseMap<Value *, BasicBlock *> &NewEnds) {
526   for (BasicBlock &CurrBB : llvm::make_early_inc_range(Old)) {
527     CurrBB.removeFromParent();
528     CurrBB.insertInto(&New);
529     Instruction *I = CurrBB.getTerminator();
530 
531     // For each block we find a return instruction is, it is a potential exit
532     // path for the function.  We keep track of each block based on the return
533     // value here.
534     if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
535       NewEnds.insert(std::make_pair(RI->getReturnValue(), &CurrBB));
536 
537     std::vector<Instruction *> DebugInsts;
538 
539     for (Instruction &Val : CurrBB) {
540       // We must handle the scoping of called functions differently than
541       // other outlined instructions.
542       if (!isa<CallInst>(&Val)) {
543         // Remove the debug information for outlined functions.
544         Val.setDebugLoc(DebugLoc());
545         continue;
546       }
547 
548       // From this point we are only handling call instructions.
549       CallInst *CI = cast<CallInst>(&Val);
550 
551       // We add any debug statements here, to be removed after.  Since the
552       // instructions originate from many different locations in the program,
553       // it will cause incorrect reporting from a debugger if we keep the
554       // same debug instructions.
555       if (isa<DbgInfoIntrinsic>(CI)) {
556         DebugInsts.push_back(&Val);
557         continue;
558       }
559 
560       // Edit the scope of called functions inside of outlined functions.
561       if (DISubprogram *SP = New.getSubprogram()) {
562         DILocation *DI = DILocation::get(New.getContext(), 0, 0, SP);
563         Val.setDebugLoc(DI);
564       }
565     }
566 
567     for (Instruction *I : DebugInsts)
568       I->eraseFromParent();
569   }
570 
571   assert(NewEnds.size() > 0 && "No return instruction for new function?");
572 }
573 
574 /// Find the the constants that will need to be lifted into arguments
575 /// as they are not the same in each instance of the region.
576 ///
577 /// \param [in] C - The IRSimilarityCandidate containing the region we are
578 /// analyzing.
579 /// \param [in] NotSame - The set of global value numbers that do not have a
580 /// single Constant across all OutlinableRegions similar to \p C.
581 /// \param [out] Inputs - The list containing the global value numbers of the
582 /// arguments needed for the region of code.
583 static void findConstants(IRSimilarityCandidate &C, DenseSet<unsigned> &NotSame,
584                           std::vector<unsigned> &Inputs) {
585   DenseSet<unsigned> Seen;
586   // Iterate over the instructions, and find what constants will need to be
587   // extracted into arguments.
588   for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
589        IDIt != EndIDIt; IDIt++) {
590     for (Value *V : (*IDIt).OperVals) {
591       // Since these are stored before any outlining, they will be in the
592       // global value numbering.
593       unsigned GVN = C.getGVN(V).getValue();
594       if (isa<Constant>(V))
595         if (NotSame.contains(GVN) && !Seen.contains(GVN)) {
596           Inputs.push_back(GVN);
597           Seen.insert(GVN);
598         }
599     }
600   }
601 }
602 
603 /// Find the GVN for the inputs that have been found by the CodeExtractor.
604 ///
605 /// \param [in] C - The IRSimilarityCandidate containing the region we are
606 /// analyzing.
607 /// \param [in] CurrentInputs - The set of inputs found by the
608 /// CodeExtractor.
609 /// \param [in] OutputMappings - The mapping of values that have been replaced
610 /// by a new output value.
611 /// \param [out] EndInputNumbers - The global value numbers for the extracted
612 /// arguments.
613 static void mapInputsToGVNs(IRSimilarityCandidate &C,
614                             SetVector<Value *> &CurrentInputs,
615                             const DenseMap<Value *, Value *> &OutputMappings,
616                             std::vector<unsigned> &EndInputNumbers) {
617   // Get the Global Value Number for each input.  We check if the Value has been
618   // replaced by a different value at output, and use the original value before
619   // replacement.
620   for (Value *Input : CurrentInputs) {
621     assert(Input && "Have a nullptr as an input");
622     if (OutputMappings.find(Input) != OutputMappings.end())
623       Input = OutputMappings.find(Input)->second;
624     assert(C.getGVN(Input).hasValue() &&
625            "Could not find a numbering for the given input");
626     EndInputNumbers.push_back(C.getGVN(Input).getValue());
627   }
628 }
629 
630 /// Find the original value for the \p ArgInput values if any one of them was
631 /// replaced during a previous extraction.
632 ///
633 /// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
634 /// \param [in] OutputMappings - The mapping of values that have been replaced
635 /// by a new output value.
636 /// \param [out] RemappedArgInputs - The remapped values according to
637 /// \p OutputMappings that will be extracted.
638 static void
639 remapExtractedInputs(const ArrayRef<Value *> ArgInputs,
640                      const DenseMap<Value *, Value *> &OutputMappings,
641                      SetVector<Value *> &RemappedArgInputs) {
642   // Get the global value number for each input that will be extracted as an
643   // argument by the code extractor, remapping if needed for reloaded values.
644   for (Value *Input : ArgInputs) {
645     if (OutputMappings.find(Input) != OutputMappings.end())
646       Input = OutputMappings.find(Input)->second;
647     RemappedArgInputs.insert(Input);
648   }
649 }
650 
651 /// Find the input GVNs and the output values for a region of Instructions.
652 /// Using the code extractor, we collect the inputs to the extracted function.
653 ///
654 /// The \p Region can be identified as needing to be ignored in this function.
655 /// It should be checked whether it should be ignored after a call to this
656 /// function.
657 ///
658 /// \param [in,out] Region - The region of code to be analyzed.
659 /// \param [out] InputGVNs - The global value numbers for the extracted
660 /// arguments.
661 /// \param [in] NotSame - The global value numbers in the region that do not
662 /// have the same constant value in the regions structurally similar to
663 /// \p Region.
664 /// \param [in] OutputMappings - The mapping of values that have been replaced
665 /// by a new output value after extraction.
666 /// \param [out] ArgInputs - The values of the inputs to the extracted function.
667 /// \param [out] Outputs - The set of values extracted by the CodeExtractor
668 /// as outputs.
669 static void getCodeExtractorArguments(
670     OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
671     DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
672     SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
673   IRSimilarityCandidate &C = *Region.Candidate;
674 
675   // OverallInputs are the inputs to the region found by the CodeExtractor,
676   // SinkCands and HoistCands are used by the CodeExtractor to find sunken
677   // allocas of values whose lifetimes are contained completely within the
678   // outlined region. PremappedInputs are the arguments found by the
679   // CodeExtractor, removing conditions such as sunken allocas, but that
680   // may need to be remapped due to the extracted output values replacing
681   // the original values. We use DummyOutputs for this first run of finding
682   // inputs and outputs since the outputs could change during findAllocas,
683   // the correct set of extracted outputs will be in the final Outputs ValueSet.
684   SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
685       DummyOutputs;
686 
687   // Use the code extractor to get the inputs and outputs, without sunken
688   // allocas or removing llvm.assumes.
689   CodeExtractor *CE = Region.CE;
690   CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands);
691   assert(Region.StartBB && "Region must have a start BasicBlock!");
692   Function *OrigF = Region.StartBB->getParent();
693   CodeExtractorAnalysisCache CEAC(*OrigF);
694   BasicBlock *Dummy = nullptr;
695 
696   // The region may be ineligible due to VarArgs in the parent function. In this
697   // case we ignore the region.
698   if (!CE->isEligible()) {
699     Region.IgnoreRegion = true;
700     return;
701   }
702 
703   // Find if any values are going to be sunk into the function when extracted
704   CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy);
705   CE->findInputsOutputs(PremappedInputs, Outputs, SinkCands);
706 
707   // TODO: Support regions with sunken allocas: values whose lifetimes are
708   // contained completely within the outlined region.  These are not guaranteed
709   // to be the same in every region, so we must elevate them all to arguments
710   // when they appear.  If these values are not equal, it means there is some
711   // Input in OverallInputs that was removed for ArgInputs.
712   if (OverallInputs.size() != PremappedInputs.size()) {
713     Region.IgnoreRegion = true;
714     return;
715   }
716 
717   findConstants(C, NotSame, InputGVNs);
718 
719   mapInputsToGVNs(C, OverallInputs, OutputMappings, InputGVNs);
720 
721   remapExtractedInputs(PremappedInputs.getArrayRef(), OutputMappings,
722                        ArgInputs);
723 
724   // Sort the GVNs, since we now have constants included in the \ref InputGVNs
725   // we need to make sure they are in a deterministic order.
726   stable_sort(InputGVNs);
727 }
728 
729 /// Look over the inputs and map each input argument to an argument in the
730 /// overall function for the OutlinableRegions.  This creates a way to replace
731 /// the arguments of the extracted function with the arguments of the new
732 /// overall function.
733 ///
734 /// \param [in,out] Region - The region of code to be analyzed.
735 /// \param [in] InputGVNs - The global value numbering of the input values
736 /// collected.
737 /// \param [in] ArgInputs - The values of the arguments to the extracted
738 /// function.
739 static void
740 findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
741                                         std::vector<unsigned> &InputGVNs,
742                                         SetVector<Value *> &ArgInputs) {
743 
744   IRSimilarityCandidate &C = *Region.Candidate;
745   OutlinableGroup &Group = *Region.Parent;
746 
747   // This counts the argument number in the overall function.
748   unsigned TypeIndex = 0;
749 
750   // This counts the argument number in the extracted function.
751   unsigned OriginalIndex = 0;
752 
753   // Find the mapping of the extracted arguments to the arguments for the
754   // overall function. Since there may be extra arguments in the overall
755   // function to account for the extracted constants, we have two different
756   // counters as we find extracted arguments, and as we come across overall
757   // arguments.
758 
759   // Additionally, in our first pass, for the first extracted function,
760   // we find argument locations for the canonical value numbering.  This
761   // numbering overrides any discovered location for the extracted code.
762   for (unsigned InputVal : InputGVNs) {
763     Optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
764     assert(CanonicalNumberOpt.hasValue() && "Canonical number not found?");
765     unsigned CanonicalNumber = CanonicalNumberOpt.getValue();
766 
767     Optional<Value *> InputOpt = C.fromGVN(InputVal);
768     assert(InputOpt.hasValue() && "Global value number not found?");
769     Value *Input = InputOpt.getValue();
770 
771     DenseMap<unsigned, unsigned>::iterator AggArgIt =
772         Group.CanonicalNumberToAggArg.find(CanonicalNumber);
773 
774     if (!Group.InputTypesSet) {
775       Group.ArgumentTypes.push_back(Input->getType());
776       // If the input value has a swifterr attribute, make sure to mark the
777       // argument in the overall function.
778       if (Input->isSwiftError()) {
779         assert(
780             !Group.SwiftErrorArgument.hasValue() &&
781             "Argument already marked with swifterr for this OutlinableGroup!");
782         Group.SwiftErrorArgument = TypeIndex;
783       }
784     }
785 
786     // Check if we have a constant. If we do add it to the overall argument
787     // number to Constant map for the region, and continue to the next input.
788     if (Constant *CST = dyn_cast<Constant>(Input)) {
789       if (AggArgIt != Group.CanonicalNumberToAggArg.end())
790         Region.AggArgToConstant.insert(std::make_pair(AggArgIt->second, CST));
791       else {
792         Group.CanonicalNumberToAggArg.insert(
793             std::make_pair(CanonicalNumber, TypeIndex));
794         Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
795       }
796       TypeIndex++;
797       continue;
798     }
799 
800     // It is not a constant, we create the mapping from extracted argument list
801     // to the overall argument list, using the canonical location, if it exists.
802     assert(ArgInputs.count(Input) && "Input cannot be found!");
803 
804     if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
805       if (OriginalIndex != AggArgIt->second)
806         Region.ChangedArgOrder = true;
807       Region.ExtractedArgToAgg.insert(
808           std::make_pair(OriginalIndex, AggArgIt->second));
809       Region.AggArgToExtracted.insert(
810           std::make_pair(AggArgIt->second, OriginalIndex));
811     } else {
812       Group.CanonicalNumberToAggArg.insert(
813           std::make_pair(CanonicalNumber, TypeIndex));
814       Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
815       Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
816     }
817     OriginalIndex++;
818     TypeIndex++;
819   }
820 
821   // If the function type definitions for the OutlinableGroup holding the region
822   // have not been set, set the length of the inputs here.  We should have the
823   // same inputs for all of the different regions contained in the
824   // OutlinableGroup since they are all structurally similar to one another.
825   if (!Group.InputTypesSet) {
826     Group.NumAggregateInputs = TypeIndex;
827     Group.InputTypesSet = true;
828   }
829 
830   Region.NumExtractedInputs = OriginalIndex;
831 }
832 
833 /// Create a mapping of the output arguments for the \p Region to the output
834 /// arguments of the overall outlined function.
835 ///
836 /// \param [in,out] Region - The region of code to be analyzed.
837 /// \param [in] Outputs - The values found by the code extractor.
838 static void
839 findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
840                                           SetVector<Value *> &Outputs) {
841   OutlinableGroup &Group = *Region.Parent;
842   IRSimilarityCandidate &C = *Region.Candidate;
843 
844   SmallVector<BasicBlock *> BE;
845   DenseSet<BasicBlock *> BBSet;
846   C.getBasicBlocks(BBSet, BE);
847 
848   // Find the exits to the region.
849   SmallPtrSet<BasicBlock *, 1> Exits;
850   for (BasicBlock *Block : BE)
851     for (BasicBlock *Succ : successors(Block))
852       if (!BBSet.contains(Succ))
853         Exits.insert(Succ);
854 
855   // After determining which blocks exit to PHINodes, we add these PHINodes to
856   // the set of outputs to be processed.  We also check the incoming values of
857   // the PHINodes for whether they should no longer be considered outputs.
858   for (BasicBlock *ExitBB : Exits) {
859     for (PHINode &PN : ExitBB->phis()) {
860       // Find all incoming values from the outlining region.
861       SmallVector<unsigned, 2> IncomingVals;
862       for (unsigned Idx = 0; Idx < PN.getNumIncomingValues(); ++Idx)
863         if (BBSet.contains(PN.getIncomingBlock(Idx)))
864           IncomingVals.push_back(Idx);
865 
866       // Do not process PHI if there is one (or fewer) predecessor from region.
867       if (IncomingVals.size() <= 1)
868         continue;
869 
870       Region.IgnoreRegion = true;
871       return;
872     }
873   }
874 
875   // This counts the argument number in the extracted function.
876   unsigned OriginalIndex = Region.NumExtractedInputs;
877 
878   // This counts the argument number in the overall function.
879   unsigned TypeIndex = Group.NumAggregateInputs;
880   bool TypeFound;
881   DenseSet<unsigned> AggArgsUsed;
882 
883   // Iterate over the output types and identify if there is an aggregate pointer
884   // type whose base type matches the current output type. If there is, we mark
885   // that we will use this output register for this value. If not we add another
886   // type to the overall argument type list. We also store the GVNs used for
887   // stores to identify which values will need to be moved into an special
888   // block that holds the stores to the output registers.
889   for (Value *Output : Outputs) {
890     TypeFound = false;
891     // We can do this since it is a result value, and will have a number
892     // that is necessarily the same. BUT if in the future, the instructions
893     // do not have to be in same order, but are functionally the same, we will
894     // have to use a different scheme, as one-to-one correspondence is not
895     // guaranteed.
896     unsigned GlobalValue = C.getGVN(Output).getValue();
897     unsigned ArgumentSize = Group.ArgumentTypes.size();
898 
899     for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
900       if (Group.ArgumentTypes[Jdx] != PointerType::getUnqual(Output->getType()))
901         continue;
902 
903       if (AggArgsUsed.contains(Jdx))
904         continue;
905 
906       TypeFound = true;
907       AggArgsUsed.insert(Jdx);
908       Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx));
909       Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex));
910       Region.GVNStores.push_back(GlobalValue);
911       break;
912     }
913 
914     // We were unable to find an unused type in the output type set that matches
915     // the output, so we add a pointer type to the argument types of the overall
916     // function to handle this output and create a mapping to it.
917     if (!TypeFound) {
918       Group.ArgumentTypes.push_back(PointerType::getUnqual(Output->getType()));
919       AggArgsUsed.insert(Group.ArgumentTypes.size() - 1);
920       Region.ExtractedArgToAgg.insert(
921           std::make_pair(OriginalIndex, Group.ArgumentTypes.size() - 1));
922       Region.AggArgToExtracted.insert(
923           std::make_pair(Group.ArgumentTypes.size() - 1, OriginalIndex));
924       Region.GVNStores.push_back(GlobalValue);
925     }
926 
927     stable_sort(Region.GVNStores);
928     OriginalIndex++;
929     TypeIndex++;
930   }
931 }
932 
933 void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
934                                       DenseSet<unsigned> &NotSame) {
935   std::vector<unsigned> Inputs;
936   SetVector<Value *> ArgInputs, Outputs;
937 
938   getCodeExtractorArguments(Region, Inputs, NotSame, OutputMappings, ArgInputs,
939                             Outputs);
940 
941   if (Region.IgnoreRegion)
942     return;
943 
944   // Map the inputs found by the CodeExtractor to the arguments found for
945   // the overall function.
946   findExtractedInputToOverallInputMapping(Region, Inputs, ArgInputs);
947 
948   // Map the outputs found by the CodeExtractor to the arguments found for
949   // the overall function.
950   findExtractedOutputToOverallOutputMapping(Region, Outputs);
951 }
952 
953 /// Replace the extracted function in the Region with a call to the overall
954 /// function constructed from the deduplicated similar regions, replacing and
955 /// remapping the values passed to the extracted function as arguments to the
956 /// new arguments of the overall function.
957 ///
958 /// \param [in] M - The module to outline from.
959 /// \param [in] Region - The regions of extracted code to be replaced with a new
960 /// function.
961 /// \returns a call instruction with the replaced function.
962 CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
963   std::vector<Value *> NewCallArgs;
964   DenseMap<unsigned, unsigned>::iterator ArgPair;
965 
966   OutlinableGroup &Group = *Region.Parent;
967   CallInst *Call = Region.Call;
968   assert(Call && "Call to replace is nullptr?");
969   Function *AggFunc = Group.OutlinedFunction;
970   assert(AggFunc && "Function to replace with is nullptr?");
971 
972   // If the arguments are the same size, there are not values that need to be
973   // made into an argument, the argument ordering has not been change, or
974   // different output registers to handle.  We can simply replace the called
975   // function in this case.
976   if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
977     LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
978                       << *AggFunc << " with same number of arguments\n");
979     Call->setCalledFunction(AggFunc);
980     return Call;
981   }
982 
983   // We have a different number of arguments than the new function, so
984   // we need to use our previously mappings off extracted argument to overall
985   // function argument, and constants to overall function argument to create the
986   // new argument list.
987   for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
988 
989     if (AggArgIdx == AggFunc->arg_size() - 1 &&
990         Group.OutputGVNCombinations.size() > 1) {
991       // If we are on the last argument, and we need to differentiate between
992       // output blocks, add an integer to the argument list to determine
993       // what block to take
994       LLVM_DEBUG(dbgs() << "Set switch block argument to "
995                         << Region.OutputBlockNum << "\n");
996       NewCallArgs.push_back(ConstantInt::get(Type::getInt32Ty(M.getContext()),
997                                              Region.OutputBlockNum));
998       continue;
999     }
1000 
1001     ArgPair = Region.AggArgToExtracted.find(AggArgIdx);
1002     if (ArgPair != Region.AggArgToExtracted.end()) {
1003       Value *ArgumentValue = Call->getArgOperand(ArgPair->second);
1004       // If we found the mapping from the extracted function to the overall
1005       // function, we simply add it to the argument list.  We use the same
1006       // value, it just needs to honor the new order of arguments.
1007       LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1008                         << *ArgumentValue << "\n");
1009       NewCallArgs.push_back(ArgumentValue);
1010       continue;
1011     }
1012 
1013     // If it is a constant, we simply add it to the argument list as a value.
1014     if (Region.AggArgToConstant.find(AggArgIdx) !=
1015         Region.AggArgToConstant.end()) {
1016       Constant *CST = Region.AggArgToConstant.find(AggArgIdx)->second;
1017       LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1018                         << *CST << "\n");
1019       NewCallArgs.push_back(CST);
1020       continue;
1021     }
1022 
1023     // Add a nullptr value if the argument is not found in the extracted
1024     // function.  If we cannot find a value, it means it is not in use
1025     // for the region, so we should not pass anything to it.
1026     LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
1027     NewCallArgs.push_back(ConstantPointerNull::get(
1028         static_cast<PointerType *>(AggFunc->getArg(AggArgIdx)->getType())));
1029   }
1030 
1031   LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1032                     << *AggFunc << " with new set of arguments\n");
1033   // Create the new call instruction and erase the old one.
1034   Call = CallInst::Create(AggFunc->getFunctionType(), AggFunc, NewCallArgs, "",
1035                           Call);
1036 
1037   // It is possible that the call to the outlined function is either the first
1038   // instruction is in the new block, the last instruction, or both.  If either
1039   // of these is the case, we need to make sure that we replace the instruction
1040   // in the IRInstructionData struct with the new call.
1041   CallInst *OldCall = Region.Call;
1042   if (Region.NewFront->Inst == OldCall)
1043     Region.NewFront->Inst = Call;
1044   if (Region.NewBack->Inst == OldCall)
1045     Region.NewBack->Inst = Call;
1046 
1047   // Transfer any debug information.
1048   Call->setDebugLoc(Region.Call->getDebugLoc());
1049   // Since our output may determine which branch we go to, we make sure to
1050   // propogate this new call value through the module.
1051   OldCall->replaceAllUsesWith(Call);
1052 
1053   // Remove the old instruction.
1054   OldCall->eraseFromParent();
1055   Region.Call = Call;
1056 
1057   // Make sure that the argument in the new function has the SwiftError
1058   // argument.
1059   if (Group.SwiftErrorArgument.hasValue())
1060     Call->addParamAttr(Group.SwiftErrorArgument.getValue(),
1061                        Attribute::SwiftError);
1062 
1063   return Call;
1064 }
1065 
1066 // Within an extracted function, replace the argument uses of the extracted
1067 // region with the arguments of the function for an OutlinableGroup.
1068 //
1069 /// \param [in] Region - The region of extracted code to be changed.
1070 /// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
1071 /// region.
1072 /// \param [in] FirstFunction - A flag to indicate whether we are using this
1073 /// function to define the overall outlined function for all the regions, or
1074 /// if we are operating on one of the following regions.
1075 static void
1076 replaceArgumentUses(OutlinableRegion &Region,
1077                     DenseMap<Value *, BasicBlock *> &OutputBBs,
1078                     bool FirstFunction = false) {
1079   OutlinableGroup &Group = *Region.Parent;
1080   assert(Region.ExtractedFunction && "Region has no extracted function?");
1081 
1082   Function *DominatingFunction = Region.ExtractedFunction;
1083   if (FirstFunction)
1084     DominatingFunction = Group.OutlinedFunction;
1085   DominatorTree DT(*DominatingFunction);
1086 
1087   for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
1088        ArgIdx++) {
1089     assert(Region.ExtractedArgToAgg.find(ArgIdx) !=
1090                Region.ExtractedArgToAgg.end() &&
1091            "No mapping from extracted to outlined?");
1092     unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second;
1093     Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx);
1094     Argument *Arg = Region.ExtractedFunction->getArg(ArgIdx);
1095     // The argument is an input, so we can simply replace it with the overall
1096     // argument value
1097     if (ArgIdx < Region.NumExtractedInputs) {
1098       LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
1099                         << *Region.ExtractedFunction << " with " << *AggArg
1100                         << " in function " << *Group.OutlinedFunction << "\n");
1101       Arg->replaceAllUsesWith(AggArg);
1102       continue;
1103     }
1104 
1105     // If we are replacing an output, we place the store value in its own
1106     // block inside the overall function before replacing the use of the output
1107     // in the function.
1108     assert(Arg->hasOneUse() && "Output argument can only have one use");
1109     User *InstAsUser = Arg->user_back();
1110     assert(InstAsUser && "User is nullptr!");
1111 
1112     Instruction *I = cast<Instruction>(InstAsUser);
1113     BasicBlock *BB = I->getParent();
1114     SmallVector<BasicBlock *, 4> Descendants;
1115     DT.getDescendants(BB, Descendants);
1116     bool EdgeAdded = false;
1117     if (Descendants.size() == 0) {
1118       EdgeAdded = true;
1119       DT.insertEdge(&DominatingFunction->getEntryBlock(), BB);
1120       DT.getDescendants(BB, Descendants);
1121     }
1122 
1123     // Iterate over the following blocks, looking for return instructions,
1124     // if we find one, find the corresponding output block for the return value
1125     // and move our store instruction there.
1126     for (BasicBlock *DescendBB : Descendants) {
1127       ReturnInst *RI = dyn_cast<ReturnInst>(DescendBB->getTerminator());
1128       if (!RI)
1129         continue;
1130       Value *RetVal = RI->getReturnValue();
1131       auto VBBIt = OutputBBs.find(RetVal);
1132       assert(VBBIt != OutputBBs.end() && "Could not find output value!");
1133 
1134       // If this is storing a PHINode, we must make sure it is included in the
1135       // overall function.
1136       StoreInst *SI = cast<StoreInst>(I);
1137 
1138       Value *ValueOperand = SI->getValueOperand();
1139 
1140       StoreInst *NewI = cast<StoreInst>(I->clone());
1141       NewI->setDebugLoc(DebugLoc());
1142       BasicBlock *OutputBB = VBBIt->second;
1143       OutputBB->getInstList().push_back(NewI);
1144       LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
1145                         << *OutputBB << "\n");
1146 
1147       if (FirstFunction)
1148         continue;
1149       Value *CorrVal =
1150           Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
1151       assert(CorrVal && "Value is nullptr?");
1152       NewI->setOperand(0, CorrVal);
1153     }
1154 
1155     // If we added an edge for basic blocks without a predecessor, we remove it
1156     // here.
1157     if (EdgeAdded)
1158       DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB);
1159     I->eraseFromParent();
1160 
1161     LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
1162                       << *Region.ExtractedFunction << " with " << *AggArg
1163                       << " in function " << *Group.OutlinedFunction << "\n");
1164     Arg->replaceAllUsesWith(AggArg);
1165   }
1166 }
1167 
1168 /// Within an extracted function, replace the constants that need to be lifted
1169 /// into arguments with the actual argument.
1170 ///
1171 /// \param Region [in] - The region of extracted code to be changed.
1172 void replaceConstants(OutlinableRegion &Region) {
1173   OutlinableGroup &Group = *Region.Parent;
1174   // Iterate over the constants that need to be elevated into arguments
1175   for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
1176     unsigned AggArgIdx = Const.first;
1177     Function *OutlinedFunction = Group.OutlinedFunction;
1178     assert(OutlinedFunction && "Overall Function is not defined?");
1179     Constant *CST = Const.second;
1180     Argument *Arg = Group.OutlinedFunction->getArg(AggArgIdx);
1181     // Identify the argument it will be elevated to, and replace instances of
1182     // that constant in the function.
1183 
1184     // TODO: If in the future constants do not have one global value number,
1185     // i.e. a constant 1 could be mapped to several values, this check will
1186     // have to be more strict.  It cannot be using only replaceUsesWithIf.
1187 
1188     LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
1189                       << " in function " << *OutlinedFunction << " with "
1190                       << *Arg << "\n");
1191     CST->replaceUsesWithIf(Arg, [OutlinedFunction](Use &U) {
1192       if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
1193         return I->getFunction() == OutlinedFunction;
1194       return false;
1195     });
1196   }
1197 }
1198 
1199 /// It is possible that there is a basic block that already performs the same
1200 /// stores. This returns a duplicate block, if it exists
1201 ///
1202 /// \param OutputBBs [in] the blocks we are looking for a duplicate of.
1203 /// \param OutputStoreBBs [in] The existing output blocks.
1204 /// \returns an optional value with the number output block if there is a match.
1205 Optional<unsigned> findDuplicateOutputBlock(
1206     DenseMap<Value *, BasicBlock *> &OutputBBs,
1207     std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1208 
1209   bool Mismatch = false;
1210   unsigned MatchingNum = 0;
1211   // We compare the new set output blocks to the other sets of output blocks.
1212   // If they are the same number, and have identical instructions, they are
1213   // considered to be the same.
1214   for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
1215     Mismatch = false;
1216     for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
1217       DenseMap<Value *, BasicBlock *>::iterator OutputBBIt =
1218           OutputBBs.find(VToB.first);
1219       if (OutputBBIt == OutputBBs.end()) {
1220         Mismatch = true;
1221         break;
1222       }
1223 
1224       BasicBlock *CompBB = VToB.second;
1225       BasicBlock *OutputBB = OutputBBIt->second;
1226       if (CompBB->size() - 1 != OutputBB->size()) {
1227         Mismatch = true;
1228         break;
1229       }
1230 
1231       BasicBlock::iterator NIt = OutputBB->begin();
1232       for (Instruction &I : *CompBB) {
1233         if (isa<BranchInst>(&I))
1234           continue;
1235 
1236         if (!I.isIdenticalTo(&(*NIt))) {
1237           Mismatch = true;
1238           break;
1239         }
1240 
1241         NIt++;
1242       }
1243     }
1244 
1245     if (!Mismatch)
1246       return MatchingNum;
1247 
1248     MatchingNum++;
1249   }
1250 
1251   return None;
1252 }
1253 
1254 /// Remove empty output blocks from the outlined region.
1255 ///
1256 /// \param BlocksToPrune - Mapping of return values output blocks for the \p
1257 /// Region.
1258 /// \param Region - The OutlinableRegion we are analyzing.
1259 static bool
1260 analyzeAndPruneOutputBlocks(DenseMap<Value *, BasicBlock *> &BlocksToPrune,
1261                             OutlinableRegion &Region) {
1262   bool AllRemoved = true;
1263   Value *RetValueForBB;
1264   BasicBlock *NewBB;
1265   SmallVector<Value *, 4> ToRemove;
1266   // Iterate over the output blocks created in the outlined section.
1267   for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
1268     RetValueForBB = VtoBB.first;
1269     NewBB = VtoBB.second;
1270 
1271     // If there are no instructions, we remove it from the module, and also
1272     // mark the value for removal from the return value to output block mapping.
1273     if (NewBB->size() == 0) {
1274       NewBB->eraseFromParent();
1275       ToRemove.push_back(RetValueForBB);
1276       continue;
1277     }
1278 
1279     // Mark that we could not remove all the blocks since they were not all
1280     // empty.
1281     AllRemoved = false;
1282   }
1283 
1284   // Remove the return value from the mapping.
1285   for (Value *V : ToRemove)
1286     BlocksToPrune.erase(V);
1287 
1288   // Mark the region as having the no output scheme.
1289   if (AllRemoved)
1290     Region.OutputBlockNum = -1;
1291 
1292   return AllRemoved;
1293 }
1294 
1295 /// For the outlined section, move needed the StoreInsts for the output
1296 /// registers into their own block. Then, determine if there is a duplicate
1297 /// output block already created.
1298 ///
1299 /// \param [in] OG - The OutlinableGroup of regions to be outlined.
1300 /// \param [in] Region - The OutlinableRegion that is being analyzed.
1301 /// \param [in,out] OutputBBs - the blocks that stores for this region will be
1302 /// placed in.
1303 /// \param [in] EndBBs - the final blocks of the extracted function.
1304 /// \param [in] OutputMappings - OutputMappings the mapping of values that have
1305 /// been replaced by a new output value.
1306 /// \param [in,out] OutputStoreBBs - The existing output blocks.
1307 static void alignOutputBlockWithAggFunc(
1308     OutlinableGroup &OG, OutlinableRegion &Region,
1309     DenseMap<Value *, BasicBlock *> &OutputBBs,
1310     DenseMap<Value *, BasicBlock *> &EndBBs,
1311     const DenseMap<Value *, Value *> &OutputMappings,
1312     std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1313   // If none of the output blocks have any instructions, this means that we do
1314   // not have to determine if it matches any of the other output schemes, and we
1315   // don't have to do anything else.
1316   if (analyzeAndPruneOutputBlocks(OutputBBs, Region))
1317     return;
1318 
1319   // Determine is there is a duplicate set of blocks.
1320   Optional<unsigned> MatchingBB =
1321       findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
1322 
1323   // If there is, we remove the new output blocks.  If it does not,
1324   // we add it to our list of sets of output blocks.
1325   if (MatchingBB.hasValue()) {
1326     LLVM_DEBUG(dbgs() << "Set output block for region in function"
1327                       << Region.ExtractedFunction << " to "
1328                       << MatchingBB.getValue());
1329 
1330     Region.OutputBlockNum = MatchingBB.getValue();
1331     for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
1332       VtoBB.second->eraseFromParent();
1333     return;
1334   }
1335 
1336   Region.OutputBlockNum = OutputStoreBBs.size();
1337 
1338   Value *RetValueForBB;
1339   BasicBlock *NewBB;
1340   OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
1341   for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
1342     RetValueForBB = VtoBB.first;
1343     NewBB = VtoBB.second;
1344     DenseMap<Value *, BasicBlock *>::iterator VBBIt =
1345         EndBBs.find(RetValueForBB);
1346     LLVM_DEBUG(dbgs() << "Create output block for region in"
1347                       << Region.ExtractedFunction << " to "
1348                       << *NewBB);
1349     BranchInst::Create(VBBIt->second, NewBB);
1350     OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB));
1351   }
1352 }
1353 
1354 /// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
1355 /// before creating a basic block for each \p NewMap, and inserting into the new
1356 /// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
1357 ///
1358 /// \param OldMap [in] - The mapping to base the new mapping off of.
1359 /// \param NewMap [out] - The output mapping using the keys of \p OldMap.
1360 /// \param ParentFunc [in] - The function to put the new basic block in.
1361 /// \param BaseName [in] - The start of the BasicBlock names to be appended to
1362 /// by an index value.
1363 static void createAndInsertBasicBlocks(DenseMap<Value *, BasicBlock *> &OldMap,
1364                                        DenseMap<Value *, BasicBlock *> &NewMap,
1365                                        Function *ParentFunc, Twine BaseName) {
1366   unsigned Idx = 0;
1367   std::vector<Value *> SortedKeys;
1368 
1369   getSortedConstantKeys(SortedKeys, OldMap);
1370 
1371   for (Value *RetVal : SortedKeys) {
1372     BasicBlock *NewBB = BasicBlock::Create(
1373         ParentFunc->getContext(),
1374         Twine(BaseName) + Twine("_") + Twine(static_cast<unsigned>(Idx++)),
1375         ParentFunc);
1376     NewMap.insert(std::make_pair(RetVal, NewBB));
1377   }
1378 }
1379 
1380 /// Create the switch statement for outlined function to differentiate between
1381 /// all the output blocks.
1382 ///
1383 /// For the outlined section, determine if an outlined block already exists that
1384 /// matches the needed stores for the extracted section.
1385 /// \param [in] M - The module we are outlining from.
1386 /// \param [in] OG - The group of regions to be outlined.
1387 /// \param [in] EndBBs - The final blocks of the extracted function.
1388 /// \param [in,out] OutputStoreBBs - The existing output blocks.
1389 void createSwitchStatement(
1390     Module &M, OutlinableGroup &OG, DenseMap<Value *, BasicBlock *> &EndBBs,
1391     std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1392   // We only need the switch statement if there is more than one store
1393   // combination.
1394   if (OG.OutputGVNCombinations.size() > 1) {
1395     Function *AggFunc = OG.OutlinedFunction;
1396     // Create a final block for each different return block.
1397     DenseMap<Value *, BasicBlock *> ReturnBBs;
1398     createAndInsertBasicBlocks(OG.EndBBs, ReturnBBs, AggFunc, "final_block");
1399 
1400     for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
1401       std::pair<Value *, BasicBlock *> &OutputBlock =
1402           *OG.EndBBs.find(RetBlockPair.first);
1403       BasicBlock *ReturnBlock = RetBlockPair.second;
1404       BasicBlock *EndBB = OutputBlock.second;
1405       Instruction *Term = EndBB->getTerminator();
1406       // Move the return value to the final block instead of the original exit
1407       // stub.
1408       Term->moveBefore(*ReturnBlock, ReturnBlock->end());
1409       // Put the switch statement in the old end basic block for the function
1410       // with a fall through to the new return block.
1411       LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
1412                         << OutputStoreBBs.size() << "\n");
1413       SwitchInst *SwitchI =
1414           SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
1415                              ReturnBlock, OutputStoreBBs.size(), EndBB);
1416 
1417       unsigned Idx = 0;
1418       for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
1419         DenseMap<Value *, BasicBlock *>::iterator OSBBIt =
1420             OutputStoreBB.find(OutputBlock.first);
1421 
1422         if (OSBBIt == OutputStoreBB.end())
1423           continue;
1424 
1425         BasicBlock *BB = OSBBIt->second;
1426         SwitchI->addCase(
1427             ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB);
1428         Term = BB->getTerminator();
1429         Term->setSuccessor(0, ReturnBlock);
1430         Idx++;
1431       }
1432     }
1433     return;
1434   }
1435 
1436   // If there needs to be stores, move them from the output blocks to their
1437   // corresponding ending block.
1438   if (OutputStoreBBs.size() == 1) {
1439     LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
1440                       << *OG.OutlinedFunction << "\n");
1441     DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
1442     for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
1443       DenseMap<Value *, BasicBlock *>::iterator EndBBIt =
1444           EndBBs.find(VBPair.first);
1445       assert(EndBBIt != EndBBs.end() && "Could not find end block");
1446       BasicBlock *EndBB = EndBBIt->second;
1447       BasicBlock *OutputBB = VBPair.second;
1448       Instruction *Term = OutputBB->getTerminator();
1449       Term->eraseFromParent();
1450       Term = EndBB->getTerminator();
1451       moveBBContents(*OutputBB, *EndBB);
1452       Term->moveBefore(*EndBB, EndBB->end());
1453       OutputBB->eraseFromParent();
1454     }
1455   }
1456 }
1457 
1458 /// Fill the new function that will serve as the replacement function for all of
1459 /// the extracted regions of a certain structure from the first region in the
1460 /// list of regions.  Replace this first region's extracted function with the
1461 /// new overall function.
1462 ///
1463 /// \param [in] M - The module we are outlining from.
1464 /// \param [in] CurrentGroup - The group of regions to be outlined.
1465 /// \param [in,out] OutputStoreBBs - The output blocks for each different
1466 /// set of stores needed for the different functions.
1467 /// \param [in,out] FuncsToRemove - Extracted functions to erase from module
1468 /// once outlining is complete.
1469 static void fillOverallFunction(
1470     Module &M, OutlinableGroup &CurrentGroup,
1471     std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
1472     std::vector<Function *> &FuncsToRemove) {
1473   OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
1474 
1475   // Move first extracted function's instructions into new function.
1476   LLVM_DEBUG(dbgs() << "Move instructions from "
1477                     << *CurrentOS->ExtractedFunction << " to instruction "
1478                     << *CurrentGroup.OutlinedFunction << "\n");
1479   moveFunctionData(*CurrentOS->ExtractedFunction,
1480                    *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs);
1481 
1482   // Transfer the attributes from the function to the new function.
1483   for (Attribute A : CurrentOS->ExtractedFunction->getAttributes().getFnAttrs())
1484     CurrentGroup.OutlinedFunction->addFnAttr(A);
1485 
1486   // Create a new set of output blocks for the first extracted function.
1487   DenseMap<Value *, BasicBlock *> NewBBs;
1488   createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs,
1489                              CurrentGroup.OutlinedFunction, "output_block_0");
1490   CurrentOS->OutputBlockNum = 0;
1491 
1492   replaceArgumentUses(*CurrentOS, NewBBs, true);
1493   replaceConstants(*CurrentOS);
1494 
1495   // We first identify if any output blocks are empty, if they are we remove
1496   // them. We then create a branch instruction to the basic block to the return
1497   // block for the function for each non empty output block.
1498   if (!analyzeAndPruneOutputBlocks(NewBBs, *CurrentOS)) {
1499     OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
1500     for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
1501       DenseMap<Value *, BasicBlock *>::iterator VBBIt =
1502           CurrentGroup.EndBBs.find(VToBB.first);
1503       BasicBlock *EndBB = VBBIt->second;
1504       BranchInst::Create(EndBB, VToBB.second);
1505       OutputStoreBBs.back().insert(VToBB);
1506     }
1507   }
1508 
1509   // Replace the call to the extracted function with the outlined function.
1510   CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
1511 
1512   // We only delete the extracted functions at the end since we may need to
1513   // reference instructions contained in them for mapping purposes.
1514   FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
1515 }
1516 
1517 void IROutliner::deduplicateExtractedSections(
1518     Module &M, OutlinableGroup &CurrentGroup,
1519     std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
1520   createFunction(M, CurrentGroup, OutlinedFunctionNum);
1521 
1522   std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
1523 
1524   OutlinableRegion *CurrentOS;
1525 
1526   fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove);
1527 
1528   std::vector<Value *> SortedKeys;
1529   for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
1530     CurrentOS = CurrentGroup.Regions[Idx];
1531     AttributeFuncs::mergeAttributesForOutlining(*CurrentGroup.OutlinedFunction,
1532                                                *CurrentOS->ExtractedFunction);
1533 
1534     // Create a set of BasicBlocks, one for each return block, to hold the
1535     // needed store instructions.
1536     DenseMap<Value *, BasicBlock *> NewBBs;
1537     createAndInsertBasicBlocks(
1538         CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction,
1539         "output_block_" + Twine(static_cast<unsigned>(Idx)));
1540 
1541     replaceArgumentUses(*CurrentOS, NewBBs);
1542     alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs,
1543                                 CurrentGroup.EndBBs, OutputMappings,
1544                                 OutputStoreBBs);
1545 
1546     CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
1547     FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
1548   }
1549 
1550   // Create a switch statement to handle the different output schemes.
1551   createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs);
1552 
1553   OutlinedFunctionNum++;
1554 }
1555 
1556 /// Checks that the next instruction in the InstructionDataList matches the
1557 /// next instruction in the module.  If they do not, there could be the
1558 /// possibility that extra code has been inserted, and we must ignore it.
1559 ///
1560 /// \param ID - The IRInstructionData to check the next instruction of.
1561 /// \returns true if the InstructionDataList and actual instruction match.
1562 static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID) {
1563   // We check if there is a discrepancy between the InstructionDataList
1564   // and the actual next instruction in the module.  If there is, it means
1565   // that an extra instruction was added, likely by the CodeExtractor.
1566 
1567   // Since we do not have any similarity data about this particular
1568   // instruction, we cannot confidently outline it, and must discard this
1569   // candidate.
1570   IRInstructionDataList::iterator NextIDIt = std::next(ID.getIterator());
1571   Instruction *NextIDLInst = NextIDIt->Inst;
1572   Instruction *NextModuleInst = nullptr;
1573   if (!ID.Inst->isTerminator())
1574     NextModuleInst = ID.Inst->getNextNonDebugInstruction();
1575   else if (NextIDLInst != nullptr)
1576     NextModuleInst =
1577         &*NextIDIt->Inst->getParent()->instructionsWithoutDebug().begin();
1578 
1579   if (NextIDLInst && NextIDLInst != NextModuleInst)
1580     return false;
1581 
1582   return true;
1583 }
1584 
1585 bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
1586     const OutlinableRegion &Region) {
1587   IRSimilarityCandidate *IRSC = Region.Candidate;
1588   unsigned StartIdx = IRSC->getStartIdx();
1589   unsigned EndIdx = IRSC->getEndIdx();
1590 
1591   // A check to make sure that we are not about to attempt to outline something
1592   // that has already been outlined.
1593   for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
1594     if (Outlined.contains(Idx))
1595       return false;
1596 
1597   // We check if the recorded instruction matches the actual next instruction,
1598   // if it does not, we fix it in the InstructionDataList.
1599   if (!Region.Candidate->backInstruction()->isTerminator()) {
1600     Instruction *NewEndInst =
1601         Region.Candidate->backInstruction()->getNextNonDebugInstruction();
1602     assert(NewEndInst && "Next instruction is a nullptr?");
1603     if (Region.Candidate->end()->Inst != NewEndInst) {
1604       IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
1605       IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
1606           IRInstructionData(*NewEndInst,
1607                             InstructionClassifier.visit(*NewEndInst), *IDL);
1608 
1609       // Insert the first IRInstructionData of the new region after the
1610       // last IRInstructionData of the IRSimilarityCandidate.
1611       IDL->insert(Region.Candidate->end(), *NewEndIRID);
1612     }
1613   }
1614 
1615   return none_of(*IRSC, [this](IRInstructionData &ID) {
1616     if (!nextIRInstructionDataMatchesNextInst(ID))
1617       return true;
1618 
1619     return !this->InstructionClassifier.visit(ID.Inst);
1620   });
1621 }
1622 
1623 void IROutliner::pruneIncompatibleRegions(
1624     std::vector<IRSimilarityCandidate> &CandidateVec,
1625     OutlinableGroup &CurrentGroup) {
1626   bool PreviouslyOutlined;
1627 
1628   // Sort from beginning to end, so the IRSimilarityCandidates are in order.
1629   stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
1630                                const IRSimilarityCandidate &RHS) {
1631     return LHS.getStartIdx() < RHS.getStartIdx();
1632   });
1633 
1634   IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
1635   // Since outlining a call and a branch instruction will be the same as only
1636   // outlinining a call instruction, we ignore it as a space saving.
1637   if (FirstCandidate.getLength() == 2) {
1638     if (isa<CallInst>(FirstCandidate.front()->Inst) &&
1639         isa<BranchInst>(FirstCandidate.back()->Inst))
1640         return;
1641   }
1642 
1643   unsigned CurrentEndIdx = 0;
1644   for (IRSimilarityCandidate &IRSC : CandidateVec) {
1645     PreviouslyOutlined = false;
1646     unsigned StartIdx = IRSC.getStartIdx();
1647     unsigned EndIdx = IRSC.getEndIdx();
1648 
1649     for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
1650       if (Outlined.contains(Idx)) {
1651         PreviouslyOutlined = true;
1652         break;
1653       }
1654 
1655     if (PreviouslyOutlined)
1656       continue;
1657 
1658     // Check over the instructions, and if the basic block has its address
1659     // taken for use somewhere else, we do not outline that block.
1660     bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){
1661       return ID.Inst->getParent()->hasAddressTaken();
1662     });
1663 
1664     if (BBHasAddressTaken)
1665       continue;
1666 
1667     if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
1668         !OutlineFromLinkODRs)
1669       continue;
1670 
1671     // Greedily prune out any regions that will overlap with already chosen
1672     // regions.
1673     if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
1674       continue;
1675 
1676     bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
1677       if (!nextIRInstructionDataMatchesNextInst(ID))
1678         return true;
1679 
1680       return !this->InstructionClassifier.visit(ID.Inst);
1681     });
1682 
1683     if (BadInst)
1684       continue;
1685 
1686     OutlinableRegion *OS = new (RegionAllocator.Allocate())
1687         OutlinableRegion(IRSC, CurrentGroup);
1688     CurrentGroup.Regions.push_back(OS);
1689 
1690     CurrentEndIdx = EndIdx;
1691   }
1692 }
1693 
1694 InstructionCost
1695 IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
1696   InstructionCost RegionBenefit = 0;
1697   for (OutlinableRegion *Region : CurrentGroup.Regions) {
1698     TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
1699     // We add the number of instructions in the region to the benefit as an
1700     // estimate as to how much will be removed.
1701     RegionBenefit += Region->getBenefit(TTI);
1702     LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
1703                       << " saved instructions to overfall benefit.\n");
1704   }
1705 
1706   return RegionBenefit;
1707 }
1708 
1709 InstructionCost
1710 IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
1711   InstructionCost OverallCost = 0;
1712   for (OutlinableRegion *Region : CurrentGroup.Regions) {
1713     TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
1714 
1715     // Each output incurs a load after the call, so we add that to the cost.
1716     for (unsigned OutputGVN : Region->GVNStores) {
1717       Optional<Value *> OV = Region->Candidate->fromGVN(OutputGVN);
1718       assert(OV.hasValue() && "Could not find value for GVN?");
1719       Value *V = OV.getValue();
1720       InstructionCost LoadCost =
1721           TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
1722                               TargetTransformInfo::TCK_CodeSize);
1723 
1724       LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
1725                         << " instructions to cost for output of type "
1726                         << *V->getType() << "\n");
1727       OverallCost += LoadCost;
1728     }
1729   }
1730 
1731   return OverallCost;
1732 }
1733 
1734 /// Find the extra instructions needed to handle any output values for the
1735 /// region.
1736 ///
1737 /// \param [in] M - The Module to outline from.
1738 /// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
1739 /// \param [in] TTI - The TargetTransformInfo used to collect information for
1740 /// new instruction costs.
1741 /// \returns the additional cost to handle the outputs.
1742 static InstructionCost findCostForOutputBlocks(Module &M,
1743                                                OutlinableGroup &CurrentGroup,
1744                                                TargetTransformInfo &TTI) {
1745   InstructionCost OutputCost = 0;
1746   unsigned NumOutputBranches = 0;
1747 
1748   IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
1749   DenseSet<BasicBlock *> CandidateBlocks;
1750   Candidate.getBasicBlocks(CandidateBlocks);
1751 
1752   // Count the number of different output branches that point to blocks outside
1753   // of the region.
1754   DenseSet<BasicBlock *> FoundBlocks;
1755   for (IRInstructionData &ID : Candidate) {
1756     if (!isa<BranchInst>(ID.Inst))
1757       continue;
1758 
1759     for (Value *V : ID.OperVals) {
1760       BasicBlock *BB = static_cast<BasicBlock *>(V);
1761       DenseSet<BasicBlock *>::iterator CBIt = CandidateBlocks.find(BB);
1762       if (CBIt != CandidateBlocks.end() || FoundBlocks.contains(BB))
1763         continue;
1764       FoundBlocks.insert(BB);
1765       NumOutputBranches++;
1766     }
1767   }
1768 
1769   CurrentGroup.BranchesToOutside = NumOutputBranches;
1770 
1771   for (const ArrayRef<unsigned> &OutputUse :
1772        CurrentGroup.OutputGVNCombinations) {
1773     for (unsigned GVN : OutputUse) {
1774       Optional<Value *> OV = Candidate.fromGVN(GVN);
1775       assert(OV.hasValue() && "Could not find value for GVN?");
1776       Value *V = OV.getValue();
1777       InstructionCost StoreCost =
1778           TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
1779                               TargetTransformInfo::TCK_CodeSize);
1780 
1781       // An instruction cost is added for each store set that needs to occur for
1782       // various output combinations inside the function, plus a branch to
1783       // return to the exit block.
1784       LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
1785                         << " instructions to cost for output of type "
1786                         << *V->getType() << "\n");
1787       OutputCost += StoreCost * NumOutputBranches;
1788     }
1789 
1790     InstructionCost BranchCost =
1791         TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
1792     LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
1793                       << " a branch instruction\n");
1794     OutputCost += BranchCost * NumOutputBranches;
1795   }
1796 
1797   // If there is more than one output scheme, we must have a comparison and
1798   // branch for each different item in the switch statement.
1799   if (CurrentGroup.OutputGVNCombinations.size() > 1) {
1800     InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
1801         Instruction::ICmp, Type::getInt32Ty(M.getContext()),
1802         Type::getInt32Ty(M.getContext()), CmpInst::BAD_ICMP_PREDICATE,
1803         TargetTransformInfo::TCK_CodeSize);
1804     InstructionCost BranchCost =
1805         TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
1806 
1807     unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
1808     InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
1809 
1810     LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
1811                       << " instructions for each switch case for each different"
1812                       << " output path in a function\n");
1813     OutputCost += TotalCost * NumOutputBranches;
1814   }
1815 
1816   return OutputCost;
1817 }
1818 
1819 void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
1820   InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
1821   CurrentGroup.Benefit += RegionBenefit;
1822   LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
1823 
1824   InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
1825   CurrentGroup.Cost += OutputReloadCost;
1826   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1827 
1828   InstructionCost AverageRegionBenefit =
1829       RegionBenefit / CurrentGroup.Regions.size();
1830   unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
1831   unsigned NumRegions = CurrentGroup.Regions.size();
1832   TargetTransformInfo &TTI =
1833       getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
1834 
1835   // We add one region to the cost once, to account for the instructions added
1836   // inside of the newly created function.
1837   LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
1838                     << " instructions to cost for body of new function.\n");
1839   CurrentGroup.Cost += AverageRegionBenefit;
1840   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1841 
1842   // For each argument, we must add an instruction for loading the argument
1843   // out of the register and into a value inside of the newly outlined function.
1844   LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
1845                     << " instructions to cost for each argument in the new"
1846                     << " function.\n");
1847   CurrentGroup.Cost +=
1848       OverallArgumentNum * TargetTransformInfo::TCC_Basic;
1849   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1850 
1851   // Each argument needs to either be loaded into a register or onto the stack.
1852   // Some arguments will only be loaded into the stack once the argument
1853   // registers are filled.
1854   LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
1855                     << " instructions to cost for each argument in the new"
1856                     << " function " << NumRegions << " times for the "
1857                     << "needed argument handling at the call site.\n");
1858   CurrentGroup.Cost +=
1859       2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
1860   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1861 
1862   CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
1863   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1864 }
1865 
1866 void IROutliner::updateOutputMapping(OutlinableRegion &Region,
1867                                      ArrayRef<Value *> Outputs,
1868                                      LoadInst *LI) {
1869   // For and load instructions following the call
1870   Value *Operand = LI->getPointerOperand();
1871   Optional<unsigned> OutputIdx = None;
1872   // Find if the operand it is an output register.
1873   for (unsigned ArgIdx = Region.NumExtractedInputs;
1874        ArgIdx < Region.Call->arg_size(); ArgIdx++) {
1875     if (Operand == Region.Call->getArgOperand(ArgIdx)) {
1876       OutputIdx = ArgIdx - Region.NumExtractedInputs;
1877       break;
1878     }
1879   }
1880 
1881   // If we found an output register, place a mapping of the new value
1882   // to the original in the mapping.
1883   if (!OutputIdx.hasValue())
1884     return;
1885 
1886   if (OutputMappings.find(Outputs[OutputIdx.getValue()]) ==
1887       OutputMappings.end()) {
1888     LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
1889                       << *Outputs[OutputIdx.getValue()] << "\n");
1890     OutputMappings.insert(std::make_pair(LI, Outputs[OutputIdx.getValue()]));
1891   } else {
1892     Value *Orig = OutputMappings.find(Outputs[OutputIdx.getValue()])->second;
1893     LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
1894                       << *Outputs[OutputIdx.getValue()] << "\n");
1895     OutputMappings.insert(std::make_pair(LI, Orig));
1896   }
1897 }
1898 
1899 bool IROutliner::extractSection(OutlinableRegion &Region) {
1900   SetVector<Value *> ArgInputs, Outputs, SinkCands;
1901   assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
1902   BasicBlock *InitialStart = Region.StartBB;
1903   Function *OrigF = Region.StartBB->getParent();
1904   CodeExtractorAnalysisCache CEAC(*OrigF);
1905   Region.ExtractedFunction =
1906       Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs);
1907 
1908   // If the extraction was successful, find the BasicBlock, and reassign the
1909   // OutlinableRegion blocks
1910   if (!Region.ExtractedFunction) {
1911     LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
1912                       << "\n");
1913     Region.reattachCandidate();
1914     return false;
1915   }
1916 
1917   // Get the block containing the called branch, and reassign the blocks as
1918   // necessary.  If the original block still exists, it is because we ended on
1919   // a branch instruction, and so we move the contents into the block before
1920   // and assign the previous block correctly.
1921   User *InstAsUser = Region.ExtractedFunction->user_back();
1922   BasicBlock *RewrittenBB = cast<Instruction>(InstAsUser)->getParent();
1923   Region.PrevBB = RewrittenBB->getSinglePredecessor();
1924   assert(Region.PrevBB && "PrevBB is nullptr?");
1925   if (Region.PrevBB == InitialStart) {
1926     BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
1927     Instruction *BI = NewPrev->getTerminator();
1928     BI->eraseFromParent();
1929     moveBBContents(*InitialStart, *NewPrev);
1930     Region.PrevBB = NewPrev;
1931     InitialStart->eraseFromParent();
1932   }
1933 
1934   Region.StartBB = RewrittenBB;
1935   Region.EndBB = RewrittenBB;
1936 
1937   // The sequences of outlinable regions has now changed.  We must fix the
1938   // IRInstructionDataList for consistency.  Although they may not be illegal
1939   // instructions, they should not be compared with anything else as they
1940   // should not be outlined in this round.  So marking these as illegal is
1941   // allowed.
1942   IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
1943   Instruction *BeginRewritten = &*RewrittenBB->begin();
1944   Instruction *EndRewritten = &*RewrittenBB->begin();
1945   Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
1946       *BeginRewritten, InstructionClassifier.visit(*BeginRewritten), *IDL);
1947   Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
1948       *EndRewritten, InstructionClassifier.visit(*EndRewritten), *IDL);
1949 
1950   // Insert the first IRInstructionData of the new region in front of the
1951   // first IRInstructionData of the IRSimilarityCandidate.
1952   IDL->insert(Region.Candidate->begin(), *Region.NewFront);
1953   // Insert the first IRInstructionData of the new region after the
1954   // last IRInstructionData of the IRSimilarityCandidate.
1955   IDL->insert(Region.Candidate->end(), *Region.NewBack);
1956   // Remove the IRInstructionData from the IRSimilarityCandidate.
1957   IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
1958 
1959   assert(RewrittenBB != nullptr &&
1960          "Could not find a predecessor after extraction!");
1961 
1962   // Iterate over the new set of instructions to find the new call
1963   // instruction.
1964   for (Instruction &I : *RewrittenBB)
1965     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1966       if (Region.ExtractedFunction == CI->getCalledFunction())
1967         Region.Call = CI;
1968     } else if (LoadInst *LI = dyn_cast<LoadInst>(&I))
1969       updateOutputMapping(Region, Outputs.getArrayRef(), LI);
1970   Region.reattachCandidate();
1971   return true;
1972 }
1973 
1974 unsigned IROutliner::doOutline(Module &M) {
1975   // Find the possible similarity sections.
1976   InstructionClassifier.EnableBranches = !DisableBranches;
1977   IRSimilarityIdentifier &Identifier = getIRSI(M);
1978   SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
1979 
1980   // Sort them by size of extracted sections
1981   unsigned OutlinedFunctionNum = 0;
1982   // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
1983   // to sort them by the potential number of instructions to be outlined
1984   if (SimilarityCandidates.size() > 1)
1985     llvm::stable_sort(SimilarityCandidates,
1986                       [](const std::vector<IRSimilarityCandidate> &LHS,
1987                          const std::vector<IRSimilarityCandidate> &RHS) {
1988                         return LHS[0].getLength() * LHS.size() >
1989                                RHS[0].getLength() * RHS.size();
1990                       });
1991   // Creating OutlinableGroups for each SimilarityCandidate to be used in
1992   // each of the following for loops to avoid making an allocator.
1993   std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
1994 
1995   DenseSet<unsigned> NotSame;
1996   std::vector<OutlinableGroup *> NegativeCostGroups;
1997   std::vector<OutlinableRegion *> OutlinedRegions;
1998   // Iterate over the possible sets of similarity.
1999   unsigned PotentialGroupIdx = 0;
2000   for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
2001     OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
2002 
2003     // Remove entries that were previously outlined
2004     pruneIncompatibleRegions(CandidateVec, CurrentGroup);
2005 
2006     // We pruned the number of regions to 0 to 1, meaning that it's not worth
2007     // trying to outlined since there is no compatible similar instance of this
2008     // code.
2009     if (CurrentGroup.Regions.size() < 2)
2010       continue;
2011 
2012     // Determine if there are any values that are the same constant throughout
2013     // each section in the set.
2014     NotSame.clear();
2015     CurrentGroup.findSameConstants(NotSame);
2016 
2017     if (CurrentGroup.IgnoreGroup)
2018       continue;
2019 
2020     // Create a CodeExtractor for each outlinable region. Identify inputs and
2021     // outputs for each section using the code extractor and create the argument
2022     // types for the Aggregate Outlining Function.
2023     OutlinedRegions.clear();
2024     for (OutlinableRegion *OS : CurrentGroup.Regions) {
2025       // Break the outlinable region out of its parent BasicBlock into its own
2026       // BasicBlocks (see function implementation).
2027       OS->splitCandidate();
2028 
2029       // There's a chance that when the region is split, extra instructions are
2030       // added to the region. This makes the region no longer viable
2031       // to be split, so we ignore it for outlining.
2032       if (!OS->CandidateSplit)
2033         continue;
2034 
2035       SmallVector<BasicBlock *> BE;
2036       DenseSet<BasicBlock *> BBSet;
2037       OS->Candidate->getBasicBlocks(BBSet, BE);
2038       OS->CE = new (ExtractorAllocator.Allocate())
2039           CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2040                         false, "outlined");
2041       findAddInputsOutputs(M, *OS, NotSame);
2042       if (!OS->IgnoreRegion)
2043         OutlinedRegions.push_back(OS);
2044 
2045       // We recombine the blocks together now that we have gathered all the
2046       // needed information.
2047       OS->reattachCandidate();
2048     }
2049 
2050     CurrentGroup.Regions = std::move(OutlinedRegions);
2051 
2052     if (CurrentGroup.Regions.empty())
2053       continue;
2054 
2055     CurrentGroup.collectGVNStoreSets(M);
2056 
2057     if (CostModel)
2058       findCostBenefit(M, CurrentGroup);
2059 
2060     // If we are adhering to the cost model, skip those groups where the cost
2061     // outweighs the benefits.
2062     if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
2063       OptimizationRemarkEmitter &ORE =
2064           getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
2065       ORE.emit([&]() {
2066         IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2067         OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
2068                                    C->frontInstruction());
2069         R << "did not outline "
2070           << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2071           << " regions due to estimated increase of "
2072           << ore::NV("InstructionIncrease",
2073                      CurrentGroup.Cost - CurrentGroup.Benefit)
2074           << " instructions at locations ";
2075         interleave(
2076             CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2077             [&R](OutlinableRegion *Region) {
2078               R << ore::NV(
2079                   "DebugLoc",
2080                   Region->Candidate->frontInstruction()->getDebugLoc());
2081             },
2082             [&R]() { R << " "; });
2083         return R;
2084       });
2085       continue;
2086     }
2087 
2088     NegativeCostGroups.push_back(&CurrentGroup);
2089   }
2090 
2091   ExtractorAllocator.DestroyAll();
2092 
2093   if (NegativeCostGroups.size() > 1)
2094     stable_sort(NegativeCostGroups,
2095                 [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
2096                   return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
2097                 });
2098 
2099   std::vector<Function *> FuncsToRemove;
2100   for (OutlinableGroup *CG : NegativeCostGroups) {
2101     OutlinableGroup &CurrentGroup = *CG;
2102 
2103     OutlinedRegions.clear();
2104     for (OutlinableRegion *Region : CurrentGroup.Regions) {
2105       // We check whether our region is compatible with what has already been
2106       // outlined, and whether we need to ignore this item.
2107       if (!isCompatibleWithAlreadyOutlinedCode(*Region))
2108         continue;
2109       OutlinedRegions.push_back(Region);
2110     }
2111 
2112     if (OutlinedRegions.size() < 2)
2113       continue;
2114 
2115     // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
2116     // we are still outlining enough regions to make up for the added cost.
2117     CurrentGroup.Regions = std::move(OutlinedRegions);
2118     if (CostModel) {
2119       CurrentGroup.Benefit = 0;
2120       CurrentGroup.Cost = 0;
2121       findCostBenefit(M, CurrentGroup);
2122       if (CurrentGroup.Cost >= CurrentGroup.Benefit)
2123         continue;
2124     }
2125     OutlinedRegions.clear();
2126     for (OutlinableRegion *Region : CurrentGroup.Regions) {
2127       Region->splitCandidate();
2128       if (!Region->CandidateSplit)
2129         continue;
2130       OutlinedRegions.push_back(Region);
2131     }
2132 
2133     CurrentGroup.Regions = std::move(OutlinedRegions);
2134     if (CurrentGroup.Regions.size() < 2) {
2135       for (OutlinableRegion *R : CurrentGroup.Regions)
2136         R->reattachCandidate();
2137       continue;
2138     }
2139 
2140     LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
2141                       << " and benefit " << CurrentGroup.Benefit << "\n");
2142 
2143     // Create functions out of all the sections, and mark them as outlined.
2144     OutlinedRegions.clear();
2145     for (OutlinableRegion *OS : CurrentGroup.Regions) {
2146       SmallVector<BasicBlock *> BE;
2147       DenseSet<BasicBlock *> BBSet;
2148       OS->Candidate->getBasicBlocks(BBSet, BE);
2149       OS->CE = new (ExtractorAllocator.Allocate())
2150           CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2151                         false, "outlined");
2152       bool FunctionOutlined = extractSection(*OS);
2153       if (FunctionOutlined) {
2154         unsigned StartIdx = OS->Candidate->getStartIdx();
2155         unsigned EndIdx = OS->Candidate->getEndIdx();
2156         for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2157           Outlined.insert(Idx);
2158 
2159         OutlinedRegions.push_back(OS);
2160       }
2161     }
2162 
2163     LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
2164                       << " with benefit " << CurrentGroup.Benefit
2165                       << " and cost " << CurrentGroup.Cost << "\n");
2166 
2167     CurrentGroup.Regions = std::move(OutlinedRegions);
2168 
2169     if (CurrentGroup.Regions.empty())
2170       continue;
2171 
2172     OptimizationRemarkEmitter &ORE =
2173         getORE(*CurrentGroup.Regions[0]->Call->getFunction());
2174     ORE.emit([&]() {
2175       IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2176       OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
2177       R << "outlined " << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2178         << " regions with decrease of "
2179         << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
2180         << " instructions at locations ";
2181       interleave(
2182           CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2183           [&R](OutlinableRegion *Region) {
2184             R << ore::NV("DebugLoc",
2185                          Region->Candidate->frontInstruction()->getDebugLoc());
2186           },
2187           [&R]() { R << " "; });
2188       return R;
2189     });
2190 
2191     deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
2192                                  OutlinedFunctionNum);
2193   }
2194 
2195   for (Function *F : FuncsToRemove)
2196     F->eraseFromParent();
2197 
2198   return OutlinedFunctionNum;
2199 }
2200 
2201 bool IROutliner::run(Module &M) {
2202   CostModel = !NoCostModel;
2203   OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
2204 
2205   return doOutline(M) > 0;
2206 }
2207 
2208 // Pass Manager Boilerplate
2209 namespace {
2210 class IROutlinerLegacyPass : public ModulePass {
2211 public:
2212   static char ID;
2213   IROutlinerLegacyPass() : ModulePass(ID) {
2214     initializeIROutlinerLegacyPassPass(*PassRegistry::getPassRegistry());
2215   }
2216 
2217   void getAnalysisUsage(AnalysisUsage &AU) const override {
2218     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
2219     AU.addRequired<TargetTransformInfoWrapperPass>();
2220     AU.addRequired<IRSimilarityIdentifierWrapperPass>();
2221   }
2222 
2223   bool runOnModule(Module &M) override;
2224 };
2225 } // namespace
2226 
2227 bool IROutlinerLegacyPass::runOnModule(Module &M) {
2228   if (skipModule(M))
2229     return false;
2230 
2231   std::unique_ptr<OptimizationRemarkEmitter> ORE;
2232   auto GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & {
2233     ORE.reset(new OptimizationRemarkEmitter(&F));
2234     return *ORE.get();
2235   };
2236 
2237   auto GTTI = [this](Function &F) -> TargetTransformInfo & {
2238     return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2239   };
2240 
2241   auto GIRSI = [this](Module &) -> IRSimilarityIdentifier & {
2242     return this->getAnalysis<IRSimilarityIdentifierWrapperPass>().getIRSI();
2243   };
2244 
2245   return IROutliner(GTTI, GIRSI, GORE).run(M);
2246 }
2247 
2248 PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) {
2249   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2250 
2251   std::function<TargetTransformInfo &(Function &)> GTTI =
2252       [&FAM](Function &F) -> TargetTransformInfo & {
2253     return FAM.getResult<TargetIRAnalysis>(F);
2254   };
2255 
2256   std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
2257       [&AM](Module &M) -> IRSimilarityIdentifier & {
2258     return AM.getResult<IRSimilarityAnalysis>(M);
2259   };
2260 
2261   std::unique_ptr<OptimizationRemarkEmitter> ORE;
2262   std::function<OptimizationRemarkEmitter &(Function &)> GORE =
2263       [&ORE](Function &F) -> OptimizationRemarkEmitter & {
2264     ORE.reset(new OptimizationRemarkEmitter(&F));
2265     return *ORE.get();
2266   };
2267 
2268   if (IROutliner(GTTI, GIRSI, GORE).run(M))
2269     return PreservedAnalyses::none();
2270   return PreservedAnalyses::all();
2271 }
2272 
2273 char IROutlinerLegacyPass::ID = 0;
2274 INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
2275                       false)
2276 INITIALIZE_PASS_DEPENDENCY(IRSimilarityIdentifierWrapperPass)
2277 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
2278 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
2279 INITIALIZE_PASS_END(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
2280                     false)
2281 
2282 ModulePass *llvm::createIROutlinerPass() { return new IROutlinerLegacyPass(); }
2283