10b57cec5SDimitry Andric //===- PartialInlining.cpp - Inline parts of functions --------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This pass performs partial inlining, typically by inlining an if statement 100b57cec5SDimitry Andric // that surrounds the body of the function. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "llvm/Transforms/IPO/PartialInlining.h" 150b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 160b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h" 1706c3fb27SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h" 180b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 190b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 200b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 210b57cec5SDimitry Andric #include "llvm/Analysis/BlockFrequencyInfo.h" 220b57cec5SDimitry Andric #include "llvm/Analysis/BranchProbabilityInfo.h" 230b57cec5SDimitry Andric #include "llvm/Analysis/InlineCost.h" 240b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 250b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 260b57cec5SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h" 270b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h" 280b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 290b57cec5SDimitry Andric #include "llvm/IR/Attributes.h" 300b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 310b57cec5SDimitry Andric #include "llvm/IR/CFG.h" 320b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h" 330b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 340b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 350b57cec5SDimitry Andric #include "llvm/IR/Function.h" 360b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h" 370b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 380b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 390b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 400b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h" 410b57cec5SDimitry Andric #include "llvm/IR/Module.h" 4281ad6265SDimitry Andric #include "llvm/IR/Operator.h" 43bdd1243dSDimitry Andric #include "llvm/IR/ProfDataUtils.h" 440b57cec5SDimitry Andric #include "llvm/IR/User.h" 450b57cec5SDimitry Andric #include "llvm/Support/BlockFrequency.h" 460b57cec5SDimitry Andric #include "llvm/Support/BranchProbability.h" 470b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 480b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 490b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 500b57cec5SDimitry Andric #include "llvm/Transforms/IPO.h" 510b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h" 520b57cec5SDimitry Andric #include "llvm/Transforms/Utils/CodeExtractor.h" 530b57cec5SDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h" 540b57cec5SDimitry Andric #include <algorithm> 550b57cec5SDimitry Andric #include <cassert> 560b57cec5SDimitry Andric #include <cstdint> 570b57cec5SDimitry Andric #include <memory> 580b57cec5SDimitry Andric #include <tuple> 590b57cec5SDimitry Andric #include <vector> 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric using namespace llvm; 620b57cec5SDimitry Andric 630b57cec5SDimitry Andric #define DEBUG_TYPE "partial-inlining" 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric STATISTIC(NumPartialInlined, 660b57cec5SDimitry Andric "Number of callsites functions partially inlined into."); 670b57cec5SDimitry Andric STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with " 680b57cec5SDimitry Andric "cold outlined regions were partially " 690b57cec5SDimitry Andric "inlined into its caller(s)."); 700b57cec5SDimitry Andric STATISTIC(NumColdRegionsFound, 710b57cec5SDimitry Andric "Number of cold single entry/exit regions found."); 720b57cec5SDimitry Andric STATISTIC(NumColdRegionsOutlined, 730b57cec5SDimitry Andric "Number of cold single entry/exit regions outlined."); 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric // Command line option to disable partial-inlining. The default is false: 760b57cec5SDimitry Andric static cl::opt<bool> 770b57cec5SDimitry Andric DisablePartialInlining("disable-partial-inlining", cl::init(false), 780b57cec5SDimitry Andric cl::Hidden, cl::desc("Disable partial inlining")); 790b57cec5SDimitry Andric // Command line option to disable multi-region partial-inlining. The default is 800b57cec5SDimitry Andric // false: 810b57cec5SDimitry Andric static cl::opt<bool> DisableMultiRegionPartialInline( 820b57cec5SDimitry Andric "disable-mr-partial-inlining", cl::init(false), cl::Hidden, 830b57cec5SDimitry Andric cl::desc("Disable multi-region partial inlining")); 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric // Command line option to force outlining in regions with live exit variables. 860b57cec5SDimitry Andric // The default is false: 870b57cec5SDimitry Andric static cl::opt<bool> 880b57cec5SDimitry Andric ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, 890b57cec5SDimitry Andric cl::desc("Force outline regions with live exits")); 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric // Command line option to enable marking outline functions with Cold Calling 920b57cec5SDimitry Andric // Convention. The default is false: 930b57cec5SDimitry Andric static cl::opt<bool> 940b57cec5SDimitry Andric MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, 950b57cec5SDimitry Andric cl::desc("Mark outline function calls with ColdCC")); 960b57cec5SDimitry Andric 970b57cec5SDimitry Andric // This is an option used by testing: 980b57cec5SDimitry Andric static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis", 9981ad6265SDimitry Andric 1000b57cec5SDimitry Andric cl::ReallyHidden, 1010b57cec5SDimitry Andric cl::desc("Skip Cost Analysis")); 1020b57cec5SDimitry Andric // Used to determine if a cold region is worth outlining based on 1030b57cec5SDimitry Andric // its inlining cost compared to the original function. Default is set at 10%. 1040b57cec5SDimitry Andric // ie. if the cold region reduces the inlining cost of the original function by 1050b57cec5SDimitry Andric // at least 10%. 1060b57cec5SDimitry Andric static cl::opt<float> MinRegionSizeRatio( 1070b57cec5SDimitry Andric "min-region-size-ratio", cl::init(0.1), cl::Hidden, 1080b57cec5SDimitry Andric cl::desc("Minimum ratio comparing relative sizes of each " 1090b57cec5SDimitry Andric "outline candidate and original function")); 1100b57cec5SDimitry Andric // Used to tune the minimum number of execution counts needed in the predecessor 1110b57cec5SDimitry Andric // block to the cold edge. ie. confidence interval. 1120b57cec5SDimitry Andric static cl::opt<unsigned> 1130b57cec5SDimitry Andric MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, 1140b57cec5SDimitry Andric cl::desc("Minimum block executions to consider " 1150b57cec5SDimitry Andric "its BranchProbabilityInfo valid")); 1160b57cec5SDimitry Andric // Used to determine when an edge is considered cold. Default is set to 10%. ie. 1170b57cec5SDimitry Andric // if the branch probability is 10% or less, then it is deemed as 'cold'. 1180b57cec5SDimitry Andric static cl::opt<float> ColdBranchRatio( 1190b57cec5SDimitry Andric "cold-branch-ratio", cl::init(0.1), cl::Hidden, 1200b57cec5SDimitry Andric cl::desc("Minimum BranchProbability to consider a region cold.")); 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric static cl::opt<unsigned> MaxNumInlineBlocks( 1230b57cec5SDimitry Andric "max-num-inline-blocks", cl::init(5), cl::Hidden, 1240b57cec5SDimitry Andric cl::desc("Max number of blocks to be partially inlined")); 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric // Command line option to set the maximum number of partial inlining allowed 1270b57cec5SDimitry Andric // for the module. The default value of -1 means no limit. 1280b57cec5SDimitry Andric static cl::opt<int> MaxNumPartialInlining( 12981ad6265SDimitry Andric "max-partial-inlining", cl::init(-1), cl::Hidden, 1300b57cec5SDimitry Andric cl::desc("Max number of partial inlining. The default is unlimited")); 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric // Used only when PGO or user annotated branch data is absent. It is 1330b57cec5SDimitry Andric // the least value that is used to weigh the outline region. If BFI 1340b57cec5SDimitry Andric // produces larger value, the BFI value will be used. 1350b57cec5SDimitry Andric static cl::opt<int> 1360b57cec5SDimitry Andric OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), 13781ad6265SDimitry Andric cl::Hidden, 1380b57cec5SDimitry Andric cl::desc("Relative frequency of outline region to " 1390b57cec5SDimitry Andric "the entry block")); 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric static cl::opt<unsigned> ExtraOutliningPenalty( 1420b57cec5SDimitry Andric "partial-inlining-extra-penalty", cl::init(0), cl::Hidden, 1430b57cec5SDimitry Andric cl::desc("A debug option to add additional penalty to the computed one.")); 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric namespace { 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric struct FunctionOutliningInfo { 1480b57cec5SDimitry Andric FunctionOutliningInfo() = default; 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric // Returns the number of blocks to be inlined including all blocks 1510b57cec5SDimitry Andric // in Entries and one return block. 152e8d8bef9SDimitry Andric unsigned getNumInlinedBlocks() const { return Entries.size() + 1; } 1530b57cec5SDimitry Andric 1540b57cec5SDimitry Andric // A set of blocks including the function entry that guard 1550b57cec5SDimitry Andric // the region to be outlined. 1560b57cec5SDimitry Andric SmallVector<BasicBlock *, 4> Entries; 1570b57cec5SDimitry Andric 1580b57cec5SDimitry Andric // The return block that is not included in the outlined region. 1590b57cec5SDimitry Andric BasicBlock *ReturnBlock = nullptr; 1600b57cec5SDimitry Andric 1610b57cec5SDimitry Andric // The dominating block of the region to be outlined. 1620b57cec5SDimitry Andric BasicBlock *NonReturnBlock = nullptr; 1630b57cec5SDimitry Andric 1645f757f3fSDimitry Andric // The set of blocks in Entries that are predecessors to ReturnBlock 1650b57cec5SDimitry Andric SmallVector<BasicBlock *, 4> ReturnBlockPreds; 1660b57cec5SDimitry Andric }; 1670b57cec5SDimitry Andric 1680b57cec5SDimitry Andric struct FunctionOutliningMultiRegionInfo { 16981ad6265SDimitry Andric FunctionOutliningMultiRegionInfo() = default; 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric // Container for outline regions 1720b57cec5SDimitry Andric struct OutlineRegionInfo { 1730b57cec5SDimitry Andric OutlineRegionInfo(ArrayRef<BasicBlock *> Region, 1740b57cec5SDimitry Andric BasicBlock *EntryBlock, BasicBlock *ExitBlock, 1750b57cec5SDimitry Andric BasicBlock *ReturnBlock) 1760b57cec5SDimitry Andric : Region(Region.begin(), Region.end()), EntryBlock(EntryBlock), 1770b57cec5SDimitry Andric ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {} 1780b57cec5SDimitry Andric SmallVector<BasicBlock *, 8> Region; 1790b57cec5SDimitry Andric BasicBlock *EntryBlock; 1800b57cec5SDimitry Andric BasicBlock *ExitBlock; 1810b57cec5SDimitry Andric BasicBlock *ReturnBlock; 1820b57cec5SDimitry Andric }; 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric SmallVector<OutlineRegionInfo, 4> ORI; 1850b57cec5SDimitry Andric }; 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric struct PartialInlinerImpl { 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric PartialInlinerImpl( 1905ffd83dbSDimitry Andric function_ref<AssumptionCache &(Function &)> GetAC, 1910b57cec5SDimitry Andric function_ref<AssumptionCache *(Function &)> LookupAC, 1925ffd83dbSDimitry Andric function_ref<TargetTransformInfo &(Function &)> GTTI, 1935ffd83dbSDimitry Andric function_ref<const TargetLibraryInfo &(Function &)> GTLI, 1945ffd83dbSDimitry Andric ProfileSummaryInfo &ProfSI, 1955ffd83dbSDimitry Andric function_ref<BlockFrequencyInfo &(Function &)> GBFI = nullptr) 1960b57cec5SDimitry Andric : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC), 1975ffd83dbSDimitry Andric GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {} 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric bool run(Module &M); 2000b57cec5SDimitry Andric // Main part of the transformation that calls helper functions to find 2010b57cec5SDimitry Andric // outlining candidates, clone & outline the function, and attempt to 2020b57cec5SDimitry Andric // partially inline the resulting function. Returns true if 2030b57cec5SDimitry Andric // inlining was successful, false otherwise. Also returns the outline 2040b57cec5SDimitry Andric // function (only if we partially inlined early returns) as there is a 2050b57cec5SDimitry Andric // possibility to further "peel" early return statements that were left in the 2060b57cec5SDimitry Andric // outline function due to code size. 207e8d8bef9SDimitry Andric std::pair<bool, Function *> unswitchFunction(Function &F); 2080b57cec5SDimitry Andric 2090b57cec5SDimitry Andric // This class speculatively clones the function to be partial inlined. 2100b57cec5SDimitry Andric // At the end of partial inlining, the remaining callsites to the cloned 2110b57cec5SDimitry Andric // function that are not partially inlined will be fixed up to reference 2120b57cec5SDimitry Andric // the original function, and the cloned function will be erased. 2130b57cec5SDimitry Andric struct FunctionCloner { 2140b57cec5SDimitry Andric // Two constructors, one for single region outlining, the other for 2150b57cec5SDimitry Andric // multi-region outlining. 2160b57cec5SDimitry Andric FunctionCloner(Function *F, FunctionOutliningInfo *OI, 2170b57cec5SDimitry Andric OptimizationRemarkEmitter &ORE, 218e8d8bef9SDimitry Andric function_ref<AssumptionCache *(Function &)> LookupAC, 219e8d8bef9SDimitry Andric function_ref<TargetTransformInfo &(Function &)> GetTTI); 2200b57cec5SDimitry Andric FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI, 2210b57cec5SDimitry Andric OptimizationRemarkEmitter &ORE, 222e8d8bef9SDimitry Andric function_ref<AssumptionCache *(Function &)> LookupAC, 223e8d8bef9SDimitry Andric function_ref<TargetTransformInfo &(Function &)> GetTTI); 224e8d8bef9SDimitry Andric 2250b57cec5SDimitry Andric ~FunctionCloner(); 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric // Prepare for function outlining: making sure there is only 2280b57cec5SDimitry Andric // one incoming edge from the extracted/outlined region to 2290b57cec5SDimitry Andric // the return block. 230e8d8bef9SDimitry Andric void normalizeReturnBlock() const; 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric // Do function outlining for cold regions. 2330b57cec5SDimitry Andric bool doMultiRegionFunctionOutlining(); 2340b57cec5SDimitry Andric // Do function outlining for region after early return block(s). 2350b57cec5SDimitry Andric // NOTE: For vararg functions that do the vararg handling in the outlined 2360b57cec5SDimitry Andric // function, we temporarily generate IR that does not properly 2370b57cec5SDimitry Andric // forward varargs to the outlined function. Calling InlineFunction 2380b57cec5SDimitry Andric // will update calls to the outlined functions to properly forward 2390b57cec5SDimitry Andric // the varargs. 2400b57cec5SDimitry Andric Function *doSingleRegionFunctionOutlining(); 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric Function *OrigFunc = nullptr; 2430b57cec5SDimitry Andric Function *ClonedFunc = nullptr; 2440b57cec5SDimitry Andric 2450b57cec5SDimitry Andric typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair; 2460b57cec5SDimitry Andric // Keep track of Outlined Functions and the basic block they're called from. 2470b57cec5SDimitry Andric SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions; 2480b57cec5SDimitry Andric 2490b57cec5SDimitry Andric // ClonedFunc is inlined in one of its callers after function 2500b57cec5SDimitry Andric // outlining. 2510b57cec5SDimitry Andric bool IsFunctionInlined = false; 2520b57cec5SDimitry Andric // The cost of the region to be outlined. 253fe6060f1SDimitry Andric InstructionCost OutlinedRegionCost = 0; 2540b57cec5SDimitry Andric // ClonedOI is specific to outlining non-early return blocks. 2550b57cec5SDimitry Andric std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr; 2560b57cec5SDimitry Andric // ClonedOMRI is specific to outlining cold regions. 2570b57cec5SDimitry Andric std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr; 2580b57cec5SDimitry Andric std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr; 2590b57cec5SDimitry Andric OptimizationRemarkEmitter &ORE; 2600b57cec5SDimitry Andric function_ref<AssumptionCache *(Function &)> LookupAC; 261e8d8bef9SDimitry Andric function_ref<TargetTransformInfo &(Function &)> GetTTI; 2620b57cec5SDimitry Andric }; 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andric private: 2650b57cec5SDimitry Andric int NumPartialInlining = 0; 2665ffd83dbSDimitry Andric function_ref<AssumptionCache &(Function &)> GetAssumptionCache; 2670b57cec5SDimitry Andric function_ref<AssumptionCache *(Function &)> LookupAssumptionCache; 2685ffd83dbSDimitry Andric function_ref<TargetTransformInfo &(Function &)> GetTTI; 2695ffd83dbSDimitry Andric function_ref<BlockFrequencyInfo &(Function &)> GetBFI; 2705ffd83dbSDimitry Andric function_ref<const TargetLibraryInfo &(Function &)> GetTLI; 2715ffd83dbSDimitry Andric ProfileSummaryInfo &PSI; 2720b57cec5SDimitry Andric 2730b57cec5SDimitry Andric // Return the frequency of the OutlininingBB relative to F's entry point. 2740b57cec5SDimitry Andric // The result is no larger than 1 and is represented using BP. 2750b57cec5SDimitry Andric // (Note that the outlined region's 'head' block can only have incoming 2760b57cec5SDimitry Andric // edges from the guarding entry blocks). 277e8d8bef9SDimitry Andric BranchProbability 278e8d8bef9SDimitry Andric getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const; 2790b57cec5SDimitry Andric 2805ffd83dbSDimitry Andric // Return true if the callee of CB should be partially inlined with 2810b57cec5SDimitry Andric // profit. 2825ffd83dbSDimitry Andric bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner, 2830b57cec5SDimitry Andric BlockFrequency WeightedOutliningRcost, 284e8d8bef9SDimitry Andric OptimizationRemarkEmitter &ORE) const; 2850b57cec5SDimitry Andric 2860b57cec5SDimitry Andric // Try to inline DuplicateFunction (cloned from F with call to 2870b57cec5SDimitry Andric // the OutlinedFunction into its callers. Return true 2880b57cec5SDimitry Andric // if there is any successful inlining. 2890b57cec5SDimitry Andric bool tryPartialInline(FunctionCloner &Cloner); 2900b57cec5SDimitry Andric 2910b57cec5SDimitry Andric // Compute the mapping from use site of DuplicationFunction to the enclosing 2920b57cec5SDimitry Andric // BB's profile count. 293e8d8bef9SDimitry Andric void 294e8d8bef9SDimitry Andric computeCallsiteToProfCountMap(Function *DuplicateFunction, 295e8d8bef9SDimitry Andric DenseMap<User *, uint64_t> &SiteCountMap) const; 2960b57cec5SDimitry Andric 297e8d8bef9SDimitry Andric bool isLimitReached() const { 2980b57cec5SDimitry Andric return (MaxNumPartialInlining != -1 && 2990b57cec5SDimitry Andric NumPartialInlining >= MaxNumPartialInlining); 3000b57cec5SDimitry Andric } 3010b57cec5SDimitry Andric 3025ffd83dbSDimitry Andric static CallBase *getSupportedCallBase(User *U) { 3035ffd83dbSDimitry Andric if (isa<CallInst>(U) || isa<InvokeInst>(U)) 3045ffd83dbSDimitry Andric return cast<CallBase>(U); 3050b57cec5SDimitry Andric llvm_unreachable("All uses must be calls"); 3065ffd83dbSDimitry Andric return nullptr; 3070b57cec5SDimitry Andric } 3080b57cec5SDimitry Andric 309e8d8bef9SDimitry Andric static CallBase *getOneCallSiteTo(Function &F) { 310e8d8bef9SDimitry Andric User *User = *F.user_begin(); 3115ffd83dbSDimitry Andric return getSupportedCallBase(User); 3120b57cec5SDimitry Andric } 3130b57cec5SDimitry Andric 314e8d8bef9SDimitry Andric std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const { 3155ffd83dbSDimitry Andric CallBase *CB = getOneCallSiteTo(F); 3165ffd83dbSDimitry Andric DebugLoc DLoc = CB->getDebugLoc(); 3175ffd83dbSDimitry Andric BasicBlock *Block = CB->getParent(); 3180b57cec5SDimitry Andric return std::make_tuple(DLoc, Block); 3190b57cec5SDimitry Andric } 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric // Returns the costs associated with function outlining: 3220b57cec5SDimitry Andric // - The first value is the non-weighted runtime cost for making the call 3230b57cec5SDimitry Andric // to the outlined function, including the addtional setup cost in the 3240b57cec5SDimitry Andric // outlined function itself; 3250b57cec5SDimitry Andric // - The second value is the estimated size of the new call sequence in 3260b57cec5SDimitry Andric // basic block Cloner.OutliningCallBB; 327fe6060f1SDimitry Andric std::tuple<InstructionCost, InstructionCost> 328fe6060f1SDimitry Andric computeOutliningCosts(FunctionCloner &Cloner) const; 3290b57cec5SDimitry Andric 3300b57cec5SDimitry Andric // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to 3310b57cec5SDimitry Andric // approximate both the size and runtime cost (Note that in the current 3320b57cec5SDimitry Andric // inline cost analysis, there is no clear distinction there either). 333fe6060f1SDimitry Andric static InstructionCost computeBBInlineCost(BasicBlock *BB, 334fe6060f1SDimitry Andric TargetTransformInfo *TTI); 3350b57cec5SDimitry Andric 336e8d8bef9SDimitry Andric std::unique_ptr<FunctionOutliningInfo> 337e8d8bef9SDimitry Andric computeOutliningInfo(Function &F) const; 338e8d8bef9SDimitry Andric 3390b57cec5SDimitry Andric std::unique_ptr<FunctionOutliningMultiRegionInfo> 340e8d8bef9SDimitry Andric computeOutliningColdRegionsInfo(Function &F, 341e8d8bef9SDimitry Andric OptimizationRemarkEmitter &ORE) const; 3420b57cec5SDimitry Andric }; 3430b57cec5SDimitry Andric 3440b57cec5SDimitry Andric } // end anonymous namespace 3450b57cec5SDimitry Andric 3460b57cec5SDimitry Andric std::unique_ptr<FunctionOutliningMultiRegionInfo> 347e8d8bef9SDimitry Andric PartialInlinerImpl::computeOutliningColdRegionsInfo( 348e8d8bef9SDimitry Andric Function &F, OptimizationRemarkEmitter &ORE) const { 349e8d8bef9SDimitry Andric BasicBlock *EntryBlock = &F.front(); 3500b57cec5SDimitry Andric 351e8d8bef9SDimitry Andric DominatorTree DT(F); 3520b57cec5SDimitry Andric LoopInfo LI(DT); 353e8d8bef9SDimitry Andric BranchProbabilityInfo BPI(F, LI); 3540b57cec5SDimitry Andric std::unique_ptr<BlockFrequencyInfo> ScopedBFI; 3550b57cec5SDimitry Andric BlockFrequencyInfo *BFI; 3560b57cec5SDimitry Andric if (!GetBFI) { 357e8d8bef9SDimitry Andric ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI)); 3580b57cec5SDimitry Andric BFI = ScopedBFI.get(); 3590b57cec5SDimitry Andric } else 360e8d8bef9SDimitry Andric BFI = &(GetBFI(F)); 3610b57cec5SDimitry Andric 3620b57cec5SDimitry Andric // Return if we don't have profiling information. 3635ffd83dbSDimitry Andric if (!PSI.hasInstrumentationProfile()) 3640b57cec5SDimitry Andric return std::unique_ptr<FunctionOutliningMultiRegionInfo>(); 3650b57cec5SDimitry Andric 3660b57cec5SDimitry Andric std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo = 3678bcb0991SDimitry Andric std::make_unique<FunctionOutliningMultiRegionInfo>(); 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric auto IsSingleExit = 3700b57cec5SDimitry Andric [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * { 3710b57cec5SDimitry Andric BasicBlock *ExitBlock = nullptr; 3720b57cec5SDimitry Andric for (auto *Block : BlockList) { 373fe6060f1SDimitry Andric for (BasicBlock *Succ : successors(Block)) { 374fe6060f1SDimitry Andric if (!is_contained(BlockList, Succ)) { 3750b57cec5SDimitry Andric if (ExitBlock) { 3760b57cec5SDimitry Andric ORE.emit([&]() { 3770b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion", 378fe6060f1SDimitry Andric &Succ->front()) 3790b57cec5SDimitry Andric << "Region dominated by " 3800b57cec5SDimitry Andric << ore::NV("Block", BlockList.front()->getName()) 3810b57cec5SDimitry Andric << " has more than one region exit edge."; 3820b57cec5SDimitry Andric }); 3830b57cec5SDimitry Andric return nullptr; 384e8d8bef9SDimitry Andric } 385e8d8bef9SDimitry Andric 3860b57cec5SDimitry Andric ExitBlock = Block; 3870b57cec5SDimitry Andric } 3880b57cec5SDimitry Andric } 3890b57cec5SDimitry Andric } 3900b57cec5SDimitry Andric return ExitBlock; 3910b57cec5SDimitry Andric }; 3920b57cec5SDimitry Andric 3930b57cec5SDimitry Andric auto BBProfileCount = [BFI](BasicBlock *BB) { 39481ad6265SDimitry Andric return BFI->getBlockProfileCount(BB).value_or(0); 3950b57cec5SDimitry Andric }; 3960b57cec5SDimitry Andric 3970b57cec5SDimitry Andric // Use the same computeBBInlineCost function to compute the cost savings of 3980b57cec5SDimitry Andric // the outlining the candidate region. 399e8d8bef9SDimitry Andric TargetTransformInfo *FTTI = &GetTTI(F); 400fe6060f1SDimitry Andric InstructionCost OverallFunctionCost = 0; 401e8d8bef9SDimitry Andric for (auto &BB : F) 402e8d8bef9SDimitry Andric OverallFunctionCost += computeBBInlineCost(&BB, FTTI); 4030b57cec5SDimitry Andric 404e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCost 405e8d8bef9SDimitry Andric << "\n";); 406e8d8bef9SDimitry Andric 407fe6060f1SDimitry Andric InstructionCost MinOutlineRegionCost = OverallFunctionCost.map( 408fe6060f1SDimitry Andric [&](auto Cost) { return Cost * MinRegionSizeRatio; }); 409fe6060f1SDimitry Andric 4100b57cec5SDimitry Andric BranchProbability MinBranchProbability( 4110b57cec5SDimitry Andric static_cast<int>(ColdBranchRatio * MinBlockCounterExecution), 4120b57cec5SDimitry Andric MinBlockCounterExecution); 4130b57cec5SDimitry Andric bool ColdCandidateFound = false; 4140b57cec5SDimitry Andric BasicBlock *CurrEntry = EntryBlock; 4150b57cec5SDimitry Andric std::vector<BasicBlock *> DFS; 4160b57cec5SDimitry Andric DenseMap<BasicBlock *, bool> VisitedMap; 4170b57cec5SDimitry Andric DFS.push_back(CurrEntry); 4180b57cec5SDimitry Andric VisitedMap[CurrEntry] = true; 419e8d8bef9SDimitry Andric 4200b57cec5SDimitry Andric // Use Depth First Search on the basic blocks to find CFG edges that are 4210b57cec5SDimitry Andric // considered cold. 4220b57cec5SDimitry Andric // Cold regions considered must also have its inline cost compared to the 4230b57cec5SDimitry Andric // overall inline cost of the original function. The region is outlined only 4240b57cec5SDimitry Andric // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or 4250b57cec5SDimitry Andric // more. 4260b57cec5SDimitry Andric while (!DFS.empty()) { 427e8d8bef9SDimitry Andric auto *ThisBB = DFS.back(); 4280b57cec5SDimitry Andric DFS.pop_back(); 4290b57cec5SDimitry Andric // Only consider regions with predecessor blocks that are considered 4300b57cec5SDimitry Andric // not-cold (default: part of the top 99.99% of all block counters) 4310b57cec5SDimitry Andric // AND greater than our minimum block execution count (default: 100). 432e8d8bef9SDimitry Andric if (PSI.isColdBlock(ThisBB, BFI) || 433e8d8bef9SDimitry Andric BBProfileCount(ThisBB) < MinBlockCounterExecution) 4340b57cec5SDimitry Andric continue; 435e8d8bef9SDimitry Andric for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) { 4360b57cec5SDimitry Andric if (VisitedMap[*SI]) 4370b57cec5SDimitry Andric continue; 4380b57cec5SDimitry Andric VisitedMap[*SI] = true; 4390b57cec5SDimitry Andric DFS.push_back(*SI); 4400b57cec5SDimitry Andric // If branch isn't cold, we skip to the next one. 441e8d8bef9SDimitry Andric BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI); 4420b57cec5SDimitry Andric if (SuccProb > MinBranchProbability) 4430b57cec5SDimitry Andric continue; 444e8d8bef9SDimitry Andric 445e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->" 446e8d8bef9SDimitry Andric << SI->getName() 447e8d8bef9SDimitry Andric << "\nBranch Probability = " << SuccProb << "\n";); 448e8d8bef9SDimitry Andric 4490b57cec5SDimitry Andric SmallVector<BasicBlock *, 8> DominateVector; 4500b57cec5SDimitry Andric DT.getDescendants(*SI, DominateVector); 451e8d8bef9SDimitry Andric assert(!DominateVector.empty() && 452e8d8bef9SDimitry Andric "SI should be reachable and have at least itself as descendant"); 453e8d8bef9SDimitry Andric 4540b57cec5SDimitry Andric // We can only outline single entry regions (for now). 455e8d8bef9SDimitry Andric if (!DominateVector.front()->hasNPredecessors(1)) { 456e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName() 457e8d8bef9SDimitry Andric << " doesn't have a single predecessor in the " 458e8d8bef9SDimitry Andric "dominator tree\n";); 4590b57cec5SDimitry Andric continue; 460e8d8bef9SDimitry Andric } 461e8d8bef9SDimitry Andric 4620b57cec5SDimitry Andric BasicBlock *ExitBlock = nullptr; 4630b57cec5SDimitry Andric // We can only outline single exit regions (for now). 464e8d8bef9SDimitry Andric if (!(ExitBlock = IsSingleExit(DominateVector))) { 465e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName() 466e8d8bef9SDimitry Andric << " doesn't have a unique successor\n";); 4670b57cec5SDimitry Andric continue; 468e8d8bef9SDimitry Andric } 469e8d8bef9SDimitry Andric 470fe6060f1SDimitry Andric InstructionCost OutlineRegionCost = 0; 4710b57cec5SDimitry Andric for (auto *BB : DominateVector) 472e8d8bef9SDimitry Andric OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent())); 4730b57cec5SDimitry Andric 474e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost 475e8d8bef9SDimitry Andric << "\n";); 4760b57cec5SDimitry Andric 477e8d8bef9SDimitry Andric if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) { 4780b57cec5SDimitry Andric ORE.emit([&]() { 4790b57cec5SDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", 4800b57cec5SDimitry Andric &SI->front()) 481e8d8bef9SDimitry Andric << ore::NV("Callee", &F) 482e8d8bef9SDimitry Andric << " inline cost-savings smaller than " 4830b57cec5SDimitry Andric << ore::NV("Cost", MinOutlineRegionCost); 4840b57cec5SDimitry Andric }); 485e8d8bef9SDimitry Andric 486e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than " 487e8d8bef9SDimitry Andric << MinOutlineRegionCost << "\n";); 4880b57cec5SDimitry Andric continue; 4890b57cec5SDimitry Andric } 490e8d8bef9SDimitry Andric 4910b57cec5SDimitry Andric // For now, ignore blocks that belong to a SISE region that is a 4920b57cec5SDimitry Andric // candidate for outlining. In the future, we may want to look 4930b57cec5SDimitry Andric // at inner regions because the outer region may have live-exit 4940b57cec5SDimitry Andric // variables. 4950b57cec5SDimitry Andric for (auto *BB : DominateVector) 4960b57cec5SDimitry Andric VisitedMap[BB] = true; 497e8d8bef9SDimitry Andric 4980b57cec5SDimitry Andric // ReturnBlock here means the block after the outline call 4990b57cec5SDimitry Andric BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor(); 5000b57cec5SDimitry Andric FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo( 5010b57cec5SDimitry Andric DominateVector, DominateVector.front(), ExitBlock, ReturnBlock); 5020b57cec5SDimitry Andric OutliningInfo->ORI.push_back(RegInfo); 503e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: " 504e8d8bef9SDimitry Andric << DominateVector.front()->getName() << "\n";); 5050b57cec5SDimitry Andric ColdCandidateFound = true; 5060b57cec5SDimitry Andric NumColdRegionsFound++; 5070b57cec5SDimitry Andric } 5080b57cec5SDimitry Andric } 509e8d8bef9SDimitry Andric 5100b57cec5SDimitry Andric if (ColdCandidateFound) 5110b57cec5SDimitry Andric return OutliningInfo; 512e8d8bef9SDimitry Andric 5130b57cec5SDimitry Andric return std::unique_ptr<FunctionOutliningMultiRegionInfo>(); 5140b57cec5SDimitry Andric } 5150b57cec5SDimitry Andric 5160b57cec5SDimitry Andric std::unique_ptr<FunctionOutliningInfo> 517e8d8bef9SDimitry Andric PartialInlinerImpl::computeOutliningInfo(Function &F) const { 518e8d8bef9SDimitry Andric BasicBlock *EntryBlock = &F.front(); 5190b57cec5SDimitry Andric BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator()); 5200b57cec5SDimitry Andric if (!BR || BR->isUnconditional()) 5210b57cec5SDimitry Andric return std::unique_ptr<FunctionOutliningInfo>(); 5220b57cec5SDimitry Andric 5230b57cec5SDimitry Andric // Returns true if Succ is BB's successor 5240b57cec5SDimitry Andric auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) { 5250b57cec5SDimitry Andric return is_contained(successors(BB), Succ); 5260b57cec5SDimitry Andric }; 5270b57cec5SDimitry Andric 5280b57cec5SDimitry Andric auto IsReturnBlock = [](BasicBlock *BB) { 5290b57cec5SDimitry Andric Instruction *TI = BB->getTerminator(); 5300b57cec5SDimitry Andric return isa<ReturnInst>(TI); 5310b57cec5SDimitry Andric }; 5320b57cec5SDimitry Andric 5330b57cec5SDimitry Andric auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) { 5340b57cec5SDimitry Andric if (IsReturnBlock(Succ1)) 5350b57cec5SDimitry Andric return std::make_tuple(Succ1, Succ2); 5360b57cec5SDimitry Andric if (IsReturnBlock(Succ2)) 5370b57cec5SDimitry Andric return std::make_tuple(Succ2, Succ1); 5380b57cec5SDimitry Andric 5390b57cec5SDimitry Andric return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr); 5400b57cec5SDimitry Andric }; 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric // Detect a triangular shape: 5430b57cec5SDimitry Andric auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) { 5440b57cec5SDimitry Andric if (IsSuccessor(Succ1, Succ2)) 5450b57cec5SDimitry Andric return std::make_tuple(Succ1, Succ2); 5460b57cec5SDimitry Andric if (IsSuccessor(Succ2, Succ1)) 5470b57cec5SDimitry Andric return std::make_tuple(Succ2, Succ1); 5480b57cec5SDimitry Andric 5490b57cec5SDimitry Andric return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr); 5500b57cec5SDimitry Andric }; 5510b57cec5SDimitry Andric 5520b57cec5SDimitry Andric std::unique_ptr<FunctionOutliningInfo> OutliningInfo = 5538bcb0991SDimitry Andric std::make_unique<FunctionOutliningInfo>(); 5540b57cec5SDimitry Andric 5550b57cec5SDimitry Andric BasicBlock *CurrEntry = EntryBlock; 5560b57cec5SDimitry Andric bool CandidateFound = false; 5570b57cec5SDimitry Andric do { 5580b57cec5SDimitry Andric // The number of blocks to be inlined has already reached 5590b57cec5SDimitry Andric // the limit. When MaxNumInlineBlocks is set to 0 or 1, this 5600b57cec5SDimitry Andric // disables partial inlining for the function. 561e8d8bef9SDimitry Andric if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks) 5620b57cec5SDimitry Andric break; 5630b57cec5SDimitry Andric 5640b57cec5SDimitry Andric if (succ_size(CurrEntry) != 2) 5650b57cec5SDimitry Andric break; 5660b57cec5SDimitry Andric 5670b57cec5SDimitry Andric BasicBlock *Succ1 = *succ_begin(CurrEntry); 5680b57cec5SDimitry Andric BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1); 5690b57cec5SDimitry Andric 5700b57cec5SDimitry Andric BasicBlock *ReturnBlock, *NonReturnBlock; 5710b57cec5SDimitry Andric std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2); 5720b57cec5SDimitry Andric 5730b57cec5SDimitry Andric if (ReturnBlock) { 5740b57cec5SDimitry Andric OutliningInfo->Entries.push_back(CurrEntry); 5750b57cec5SDimitry Andric OutliningInfo->ReturnBlock = ReturnBlock; 5760b57cec5SDimitry Andric OutliningInfo->NonReturnBlock = NonReturnBlock; 5770b57cec5SDimitry Andric CandidateFound = true; 5780b57cec5SDimitry Andric break; 5790b57cec5SDimitry Andric } 5800b57cec5SDimitry Andric 581e8d8bef9SDimitry Andric BasicBlock *CommSucc, *OtherSucc; 5820b57cec5SDimitry Andric std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2); 5830b57cec5SDimitry Andric 5840b57cec5SDimitry Andric if (!CommSucc) 5850b57cec5SDimitry Andric break; 5860b57cec5SDimitry Andric 5870b57cec5SDimitry Andric OutliningInfo->Entries.push_back(CurrEntry); 5880b57cec5SDimitry Andric CurrEntry = OtherSucc; 5890b57cec5SDimitry Andric } while (true); 5900b57cec5SDimitry Andric 5910b57cec5SDimitry Andric if (!CandidateFound) 5920b57cec5SDimitry Andric return std::unique_ptr<FunctionOutliningInfo>(); 5930b57cec5SDimitry Andric 5944824e7fdSDimitry Andric // There should not be any successors (not in the entry set) other than 5950b57cec5SDimitry Andric // {ReturnBlock, NonReturnBlock} 596e8d8bef9SDimitry Andric assert(OutliningInfo->Entries[0] == &F.front() && 5970b57cec5SDimitry Andric "Function Entry must be the first in Entries vector"); 5980b57cec5SDimitry Andric DenseSet<BasicBlock *> Entries; 5990b57cec5SDimitry Andric for (BasicBlock *E : OutliningInfo->Entries) 6000b57cec5SDimitry Andric Entries.insert(E); 6010b57cec5SDimitry Andric 6020b57cec5SDimitry Andric // Returns true of BB has Predecessor which is not 6030b57cec5SDimitry Andric // in Entries set. 6040b57cec5SDimitry Andric auto HasNonEntryPred = [Entries](BasicBlock *BB) { 605e8d8bef9SDimitry Andric for (auto *Pred : predecessors(BB)) { 6060b57cec5SDimitry Andric if (!Entries.count(Pred)) 6070b57cec5SDimitry Andric return true; 6080b57cec5SDimitry Andric } 6090b57cec5SDimitry Andric return false; 6100b57cec5SDimitry Andric }; 6110b57cec5SDimitry Andric auto CheckAndNormalizeCandidate = 6120b57cec5SDimitry Andric [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) { 6130b57cec5SDimitry Andric for (BasicBlock *E : OutliningInfo->Entries) { 614e8d8bef9SDimitry Andric for (auto *Succ : successors(E)) { 6150b57cec5SDimitry Andric if (Entries.count(Succ)) 6160b57cec5SDimitry Andric continue; 6170b57cec5SDimitry Andric if (Succ == OutliningInfo->ReturnBlock) 6180b57cec5SDimitry Andric OutliningInfo->ReturnBlockPreds.push_back(E); 6190b57cec5SDimitry Andric else if (Succ != OutliningInfo->NonReturnBlock) 6200b57cec5SDimitry Andric return false; 6210b57cec5SDimitry Andric } 6220b57cec5SDimitry Andric // There should not be any outside incoming edges either: 6230b57cec5SDimitry Andric if (HasNonEntryPred(E)) 6240b57cec5SDimitry Andric return false; 6250b57cec5SDimitry Andric } 6260b57cec5SDimitry Andric return true; 6270b57cec5SDimitry Andric }; 6280b57cec5SDimitry Andric 6290b57cec5SDimitry Andric if (!CheckAndNormalizeCandidate(OutliningInfo.get())) 6300b57cec5SDimitry Andric return std::unique_ptr<FunctionOutliningInfo>(); 6310b57cec5SDimitry Andric 6320b57cec5SDimitry Andric // Now further growing the candidate's inlining region by 6330b57cec5SDimitry Andric // peeling off dominating blocks from the outlining region: 634e8d8bef9SDimitry Andric while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) { 6350b57cec5SDimitry Andric BasicBlock *Cand = OutliningInfo->NonReturnBlock; 6360b57cec5SDimitry Andric if (succ_size(Cand) != 2) 6370b57cec5SDimitry Andric break; 6380b57cec5SDimitry Andric 6390b57cec5SDimitry Andric if (HasNonEntryPred(Cand)) 6400b57cec5SDimitry Andric break; 6410b57cec5SDimitry Andric 6420b57cec5SDimitry Andric BasicBlock *Succ1 = *succ_begin(Cand); 6430b57cec5SDimitry Andric BasicBlock *Succ2 = *(succ_begin(Cand) + 1); 6440b57cec5SDimitry Andric 6450b57cec5SDimitry Andric BasicBlock *ReturnBlock, *NonReturnBlock; 6460b57cec5SDimitry Andric std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2); 6470b57cec5SDimitry Andric if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock) 6480b57cec5SDimitry Andric break; 6490b57cec5SDimitry Andric 6500b57cec5SDimitry Andric if (NonReturnBlock->getSinglePredecessor() != Cand) 6510b57cec5SDimitry Andric break; 6520b57cec5SDimitry Andric 6530b57cec5SDimitry Andric // Now grow and update OutlininigInfo: 6540b57cec5SDimitry Andric OutliningInfo->Entries.push_back(Cand); 6550b57cec5SDimitry Andric OutliningInfo->NonReturnBlock = NonReturnBlock; 6560b57cec5SDimitry Andric OutliningInfo->ReturnBlockPreds.push_back(Cand); 6570b57cec5SDimitry Andric Entries.insert(Cand); 6580b57cec5SDimitry Andric } 6590b57cec5SDimitry Andric 6600b57cec5SDimitry Andric return OutliningInfo; 6610b57cec5SDimitry Andric } 6620b57cec5SDimitry Andric 663480093f4SDimitry Andric // Check if there is PGO data or user annotated branch data: 664e8d8bef9SDimitry Andric static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) { 665e8d8bef9SDimitry Andric if (F.hasProfileData()) 6660b57cec5SDimitry Andric return true; 6670b57cec5SDimitry Andric // Now check if any of the entry block has MD_prof data: 668e8d8bef9SDimitry Andric for (auto *E : OI.Entries) { 6690b57cec5SDimitry Andric BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator()); 6700b57cec5SDimitry Andric if (!BR || BR->isUnconditional()) 6710b57cec5SDimitry Andric continue; 672bdd1243dSDimitry Andric if (hasBranchWeightMD(*BR)) 6730b57cec5SDimitry Andric return true; 6740b57cec5SDimitry Andric } 6750b57cec5SDimitry Andric return false; 6760b57cec5SDimitry Andric } 6770b57cec5SDimitry Andric 678e8d8bef9SDimitry Andric BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq( 679e8d8bef9SDimitry Andric FunctionCloner &Cloner) const { 6800b57cec5SDimitry Andric BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second; 6810b57cec5SDimitry Andric auto EntryFreq = 6820b57cec5SDimitry Andric Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock()); 6830b57cec5SDimitry Andric auto OutliningCallFreq = 6840b57cec5SDimitry Andric Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB); 6850b57cec5SDimitry Andric // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE 6860b57cec5SDimitry Andric // we outlined any regions, so we may encounter situations where the 6870b57cec5SDimitry Andric // OutliningCallFreq is *slightly* bigger than the EntryFreq. 688e8d8bef9SDimitry Andric if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency()) 6890b57cec5SDimitry Andric OutliningCallFreq = EntryFreq; 690e8d8bef9SDimitry Andric 6910b57cec5SDimitry Andric auto OutlineRegionRelFreq = BranchProbability::getBranchProbability( 6920b57cec5SDimitry Andric OutliningCallFreq.getFrequency(), EntryFreq.getFrequency()); 6930b57cec5SDimitry Andric 69481ad6265SDimitry Andric if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI)) 6950b57cec5SDimitry Andric return OutlineRegionRelFreq; 6960b57cec5SDimitry Andric 6970b57cec5SDimitry Andric // When profile data is not available, we need to be conservative in 6980b57cec5SDimitry Andric // estimating the overall savings. Static branch prediction can usually 6990b57cec5SDimitry Andric // guess the branch direction right (taken/non-taken), but the guessed 7000b57cec5SDimitry Andric // branch probability is usually not biased enough. In case when the 7010b57cec5SDimitry Andric // outlined region is predicted to be likely, its probability needs 7020b57cec5SDimitry Andric // to be made higher (more biased) to not under-estimate the cost of 7030b57cec5SDimitry Andric // function outlining. On the other hand, if the outlined region 7040b57cec5SDimitry Andric // is predicted to be less likely, the predicted probablity is usually 7050b57cec5SDimitry Andric // higher than the actual. For instance, the actual probability of the 7060b57cec5SDimitry Andric // less likely target is only 5%, but the guessed probablity can be 707bdd1243dSDimitry Andric // 40%. In the latter case, there is no need for further adjustment. 7080b57cec5SDimitry Andric // FIXME: add an option for this. 7090b57cec5SDimitry Andric if (OutlineRegionRelFreq < BranchProbability(45, 100)) 7100b57cec5SDimitry Andric return OutlineRegionRelFreq; 7110b57cec5SDimitry Andric 7120b57cec5SDimitry Andric OutlineRegionRelFreq = std::max( 7130b57cec5SDimitry Andric OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100)); 7140b57cec5SDimitry Andric 7150b57cec5SDimitry Andric return OutlineRegionRelFreq; 7160b57cec5SDimitry Andric } 7170b57cec5SDimitry Andric 7180b57cec5SDimitry Andric bool PartialInlinerImpl::shouldPartialInline( 7195ffd83dbSDimitry Andric CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost, 720e8d8bef9SDimitry Andric OptimizationRemarkEmitter &ORE) const { 7210b57cec5SDimitry Andric using namespace ore; 7220b57cec5SDimitry Andric 7235ffd83dbSDimitry Andric Function *Callee = CB.getCalledFunction(); 7240b57cec5SDimitry Andric assert(Callee == Cloner.ClonedFunc); 7250b57cec5SDimitry Andric 7260b57cec5SDimitry Andric if (SkipCostAnalysis) 7275ffd83dbSDimitry Andric return isInlineViable(*Callee).isSuccess(); 7280b57cec5SDimitry Andric 7295ffd83dbSDimitry Andric Function *Caller = CB.getCaller(); 7305ffd83dbSDimitry Andric auto &CalleeTTI = GetTTI(*Callee); 7310b57cec5SDimitry Andric bool RemarksEnabled = 7320b57cec5SDimitry Andric Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled( 7330b57cec5SDimitry Andric DEBUG_TYPE); 7345ffd83dbSDimitry Andric InlineCost IC = 7355ffd83dbSDimitry Andric getInlineCost(CB, getInlineParams(), CalleeTTI, GetAssumptionCache, 7365ffd83dbSDimitry Andric GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE : nullptr); 7370b57cec5SDimitry Andric 7380b57cec5SDimitry Andric if (IC.isAlways()) { 7390b57cec5SDimitry Andric ORE.emit([&]() { 7405ffd83dbSDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB) 7410b57cec5SDimitry Andric << NV("Callee", Cloner.OrigFunc) 7420b57cec5SDimitry Andric << " should always be fully inlined, not partially"; 7430b57cec5SDimitry Andric }); 7440b57cec5SDimitry Andric return false; 7450b57cec5SDimitry Andric } 7460b57cec5SDimitry Andric 7470b57cec5SDimitry Andric if (IC.isNever()) { 7480b57cec5SDimitry Andric ORE.emit([&]() { 7495ffd83dbSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB) 7500b57cec5SDimitry Andric << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " 7510b57cec5SDimitry Andric << NV("Caller", Caller) 7520b57cec5SDimitry Andric << " because it should never be inlined (cost=never)"; 7530b57cec5SDimitry Andric }); 7540b57cec5SDimitry Andric return false; 7550b57cec5SDimitry Andric } 7560b57cec5SDimitry Andric 7570b57cec5SDimitry Andric if (!IC) { 7580b57cec5SDimitry Andric ORE.emit([&]() { 7595ffd83dbSDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB) 7600b57cec5SDimitry Andric << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " 7610b57cec5SDimitry Andric << NV("Caller", Caller) << " because too costly to inline (cost=" 7620b57cec5SDimitry Andric << NV("Cost", IC.getCost()) << ", threshold=" 7630b57cec5SDimitry Andric << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"; 7640b57cec5SDimitry Andric }); 7650b57cec5SDimitry Andric return false; 7660b57cec5SDimitry Andric } 767*0fca6ea1SDimitry Andric const DataLayout &DL = Caller->getDataLayout(); 7680b57cec5SDimitry Andric 7690b57cec5SDimitry Andric // The savings of eliminating the call: 7705f757f3fSDimitry Andric int NonWeightedSavings = getCallsiteCost(CalleeTTI, CB, DL); 7710b57cec5SDimitry Andric BlockFrequency NormWeightedSavings(NonWeightedSavings); 7720b57cec5SDimitry Andric 7730b57cec5SDimitry Andric // Weighted saving is smaller than weighted cost, return false 7740b57cec5SDimitry Andric if (NormWeightedSavings < WeightedOutliningRcost) { 7750b57cec5SDimitry Andric ORE.emit([&]() { 7760b57cec5SDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", 7775ffd83dbSDimitry Andric &CB) 7780b57cec5SDimitry Andric << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " 7790b57cec5SDimitry Andric << NV("Caller", Caller) << " runtime overhead (overhead=" 7800b57cec5SDimitry Andric << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency()) 7810b57cec5SDimitry Andric << ", savings=" 7820b57cec5SDimitry Andric << NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) 7830b57cec5SDimitry Andric << ")" 7840b57cec5SDimitry Andric << " of making the outlined call is too high"; 7850b57cec5SDimitry Andric }); 7860b57cec5SDimitry Andric 7870b57cec5SDimitry Andric return false; 7880b57cec5SDimitry Andric } 7890b57cec5SDimitry Andric 7900b57cec5SDimitry Andric ORE.emit([&]() { 7915ffd83dbSDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB) 7920b57cec5SDimitry Andric << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into " 7930b57cec5SDimitry Andric << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost()) 7940b57cec5SDimitry Andric << " (threshold=" 7950b57cec5SDimitry Andric << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"; 7960b57cec5SDimitry Andric }); 7970b57cec5SDimitry Andric return true; 7980b57cec5SDimitry Andric } 7990b57cec5SDimitry Andric 8000b57cec5SDimitry Andric // TODO: Ideally we should share Inliner's InlineCost Analysis code. 8010b57cec5SDimitry Andric // For now use a simplified version. The returned 'InlineCost' will be used 8020b57cec5SDimitry Andric // to esimate the size cost as well as runtime cost of the BB. 803fe6060f1SDimitry Andric InstructionCost 804fe6060f1SDimitry Andric PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB, 805e8d8bef9SDimitry Andric TargetTransformInfo *TTI) { 806fe6060f1SDimitry Andric InstructionCost InlineCost = 0; 807*0fca6ea1SDimitry Andric const DataLayout &DL = BB->getDataLayout(); 808bdd1243dSDimitry Andric int InstrCost = InlineConstants::getInstrCost(); 8090b57cec5SDimitry Andric for (Instruction &I : BB->instructionsWithoutDebug()) { 8100b57cec5SDimitry Andric // Skip free instructions. 8110b57cec5SDimitry Andric switch (I.getOpcode()) { 8120b57cec5SDimitry Andric case Instruction::BitCast: 8130b57cec5SDimitry Andric case Instruction::PtrToInt: 8140b57cec5SDimitry Andric case Instruction::IntToPtr: 8150b57cec5SDimitry Andric case Instruction::Alloca: 8160b57cec5SDimitry Andric case Instruction::PHI: 8170b57cec5SDimitry Andric continue; 8180b57cec5SDimitry Andric case Instruction::GetElementPtr: 8190b57cec5SDimitry Andric if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices()) 8200b57cec5SDimitry Andric continue; 8210b57cec5SDimitry Andric break; 8220b57cec5SDimitry Andric default: 8230b57cec5SDimitry Andric break; 8240b57cec5SDimitry Andric } 8250b57cec5SDimitry Andric 8260b57cec5SDimitry Andric if (I.isLifetimeStartOrEnd()) 8270b57cec5SDimitry Andric continue; 8280b57cec5SDimitry Andric 829e8d8bef9SDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(&I)) { 830e8d8bef9SDimitry Andric Intrinsic::ID IID = II->getIntrinsicID(); 831e8d8bef9SDimitry Andric SmallVector<Type *, 4> Tys; 832e8d8bef9SDimitry Andric FastMathFlags FMF; 833e8d8bef9SDimitry Andric for (Value *Val : II->args()) 834e8d8bef9SDimitry Andric Tys.push_back(Val->getType()); 835e8d8bef9SDimitry Andric 836e8d8bef9SDimitry Andric if (auto *FPMO = dyn_cast<FPMathOperator>(II)) 837e8d8bef9SDimitry Andric FMF = FPMO->getFastMathFlags(); 838e8d8bef9SDimitry Andric 839e8d8bef9SDimitry Andric IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF); 840e8d8bef9SDimitry Andric InlineCost += TTI->getIntrinsicInstrCost(ICA, TTI::TCK_SizeAndLatency); 841e8d8bef9SDimitry Andric continue; 842e8d8bef9SDimitry Andric } 843e8d8bef9SDimitry Andric 8440b57cec5SDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(&I)) { 8455f757f3fSDimitry Andric InlineCost += getCallsiteCost(*TTI, *CI, DL); 8460b57cec5SDimitry Andric continue; 8470b57cec5SDimitry Andric } 8480b57cec5SDimitry Andric 8490b57cec5SDimitry Andric if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) { 8505f757f3fSDimitry Andric InlineCost += getCallsiteCost(*TTI, *II, DL); 8510b57cec5SDimitry Andric continue; 8520b57cec5SDimitry Andric } 8530b57cec5SDimitry Andric 8540b57cec5SDimitry Andric if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) { 855bdd1243dSDimitry Andric InlineCost += (SI->getNumCases() + 1) * InstrCost; 8560b57cec5SDimitry Andric continue; 8570b57cec5SDimitry Andric } 858bdd1243dSDimitry Andric InlineCost += InstrCost; 8590b57cec5SDimitry Andric } 860fe6060f1SDimitry Andric 8610b57cec5SDimitry Andric return InlineCost; 8620b57cec5SDimitry Andric } 8630b57cec5SDimitry Andric 864fe6060f1SDimitry Andric std::tuple<InstructionCost, InstructionCost> 865e8d8bef9SDimitry Andric PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const { 866fe6060f1SDimitry Andric InstructionCost OutliningFuncCallCost = 0, OutlinedFunctionCost = 0; 8670b57cec5SDimitry Andric for (auto FuncBBPair : Cloner.OutlinedFunctions) { 8680b57cec5SDimitry Andric Function *OutlinedFunc = FuncBBPair.first; 8690b57cec5SDimitry Andric BasicBlock* OutliningCallBB = FuncBBPair.second; 8700b57cec5SDimitry Andric // Now compute the cost of the call sequence to the outlined function 8710b57cec5SDimitry Andric // 'OutlinedFunction' in BB 'OutliningCallBB': 872e8d8bef9SDimitry Andric auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc); 873e8d8bef9SDimitry Andric OutliningFuncCallCost += 874e8d8bef9SDimitry Andric computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI); 8750b57cec5SDimitry Andric 8760b57cec5SDimitry Andric // Now compute the cost of the extracted/outlined function itself: 8770b57cec5SDimitry Andric for (BasicBlock &BB : *OutlinedFunc) 878e8d8bef9SDimitry Andric OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI); 8790b57cec5SDimitry Andric } 8800b57cec5SDimitry Andric assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost && 8810b57cec5SDimitry Andric "Outlined function cost should be no less than the outlined region"); 8820b57cec5SDimitry Andric 8830b57cec5SDimitry Andric // The code extractor introduces a new root and exit stub blocks with 8840b57cec5SDimitry Andric // additional unconditional branches. Those branches will be eliminated 8850b57cec5SDimitry Andric // later with bb layout. The cost should be adjusted accordingly: 8860b57cec5SDimitry Andric OutlinedFunctionCost -= 887bdd1243dSDimitry Andric 2 * InlineConstants::getInstrCost() * Cloner.OutlinedFunctions.size(); 8880b57cec5SDimitry Andric 889fe6060f1SDimitry Andric InstructionCost OutliningRuntimeOverhead = 8900b57cec5SDimitry Andric OutliningFuncCallCost + 8910b57cec5SDimitry Andric (OutlinedFunctionCost - Cloner.OutlinedRegionCost) + 892fe6060f1SDimitry Andric ExtraOutliningPenalty.getValue(); 8930b57cec5SDimitry Andric 8940b57cec5SDimitry Andric return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead); 8950b57cec5SDimitry Andric } 8960b57cec5SDimitry Andric 8970b57cec5SDimitry Andric // Create the callsite to profile count map which is 8980b57cec5SDimitry Andric // used to update the original function's entry count, 8990b57cec5SDimitry Andric // after the function is partially inlined into the callsite. 9000b57cec5SDimitry Andric void PartialInlinerImpl::computeCallsiteToProfCountMap( 9010b57cec5SDimitry Andric Function *DuplicateFunction, 902e8d8bef9SDimitry Andric DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const { 9030b57cec5SDimitry Andric std::vector<User *> Users(DuplicateFunction->user_begin(), 9040b57cec5SDimitry Andric DuplicateFunction->user_end()); 9050b57cec5SDimitry Andric Function *CurrentCaller = nullptr; 9060b57cec5SDimitry Andric std::unique_ptr<BlockFrequencyInfo> TempBFI; 9070b57cec5SDimitry Andric BlockFrequencyInfo *CurrentCallerBFI = nullptr; 9080b57cec5SDimitry Andric 9090b57cec5SDimitry Andric auto ComputeCurrBFI = [&,this](Function *Caller) { 9100b57cec5SDimitry Andric // For the old pass manager: 9110b57cec5SDimitry Andric if (!GetBFI) { 9120b57cec5SDimitry Andric DominatorTree DT(*Caller); 9130b57cec5SDimitry Andric LoopInfo LI(DT); 9140b57cec5SDimitry Andric BranchProbabilityInfo BPI(*Caller, LI); 9150b57cec5SDimitry Andric TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI)); 9160b57cec5SDimitry Andric CurrentCallerBFI = TempBFI.get(); 9170b57cec5SDimitry Andric } else { 9180b57cec5SDimitry Andric // New pass manager: 9195ffd83dbSDimitry Andric CurrentCallerBFI = &(GetBFI(*Caller)); 9200b57cec5SDimitry Andric } 9210b57cec5SDimitry Andric }; 9220b57cec5SDimitry Andric 9230b57cec5SDimitry Andric for (User *User : Users) { 92404eeddc0SDimitry Andric // Don't bother with BlockAddress used by CallBr for asm goto. 92504eeddc0SDimitry Andric if (isa<BlockAddress>(User)) 92604eeddc0SDimitry Andric continue; 9275ffd83dbSDimitry Andric CallBase *CB = getSupportedCallBase(User); 9285ffd83dbSDimitry Andric Function *Caller = CB->getCaller(); 9290b57cec5SDimitry Andric if (CurrentCaller != Caller) { 9300b57cec5SDimitry Andric CurrentCaller = Caller; 9310b57cec5SDimitry Andric ComputeCurrBFI(Caller); 9320b57cec5SDimitry Andric } else { 9330b57cec5SDimitry Andric assert(CurrentCallerBFI && "CallerBFI is not set"); 9340b57cec5SDimitry Andric } 9355ffd83dbSDimitry Andric BasicBlock *CallBB = CB->getParent(); 9360b57cec5SDimitry Andric auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB); 9370b57cec5SDimitry Andric if (Count) 9380b57cec5SDimitry Andric CallSiteToProfCountMap[User] = *Count; 9390b57cec5SDimitry Andric else 9400b57cec5SDimitry Andric CallSiteToProfCountMap[User] = 0; 9410b57cec5SDimitry Andric } 9420b57cec5SDimitry Andric } 9430b57cec5SDimitry Andric 9440b57cec5SDimitry Andric PartialInlinerImpl::FunctionCloner::FunctionCloner( 9450b57cec5SDimitry Andric Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE, 946e8d8bef9SDimitry Andric function_ref<AssumptionCache *(Function &)> LookupAC, 947e8d8bef9SDimitry Andric function_ref<TargetTransformInfo &(Function &)> GetTTI) 948e8d8bef9SDimitry Andric : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) { 9498bcb0991SDimitry Andric ClonedOI = std::make_unique<FunctionOutliningInfo>(); 9500b57cec5SDimitry Andric 9510b57cec5SDimitry Andric // Clone the function, so that we can hack away on it. 9520b57cec5SDimitry Andric ValueToValueMapTy VMap; 9530b57cec5SDimitry Andric ClonedFunc = CloneFunction(F, VMap); 9540b57cec5SDimitry Andric 9550b57cec5SDimitry Andric ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]); 9560b57cec5SDimitry Andric ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]); 957e8d8bef9SDimitry Andric for (BasicBlock *BB : OI->Entries) 9580b57cec5SDimitry Andric ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB])); 959e8d8bef9SDimitry Andric 9600b57cec5SDimitry Andric for (BasicBlock *E : OI->ReturnBlockPreds) { 9610b57cec5SDimitry Andric BasicBlock *NewE = cast<BasicBlock>(VMap[E]); 9620b57cec5SDimitry Andric ClonedOI->ReturnBlockPreds.push_back(NewE); 9630b57cec5SDimitry Andric } 9640b57cec5SDimitry Andric // Go ahead and update all uses to the duplicate, so that we can just 9650b57cec5SDimitry Andric // use the inliner functionality when we're done hacking. 9660b57cec5SDimitry Andric F->replaceAllUsesWith(ClonedFunc); 9670b57cec5SDimitry Andric } 9680b57cec5SDimitry Andric 9690b57cec5SDimitry Andric PartialInlinerImpl::FunctionCloner::FunctionCloner( 9700b57cec5SDimitry Andric Function *F, FunctionOutliningMultiRegionInfo *OI, 9710b57cec5SDimitry Andric OptimizationRemarkEmitter &ORE, 972e8d8bef9SDimitry Andric function_ref<AssumptionCache *(Function &)> LookupAC, 973e8d8bef9SDimitry Andric function_ref<TargetTransformInfo &(Function &)> GetTTI) 974e8d8bef9SDimitry Andric : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) { 9758bcb0991SDimitry Andric ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>(); 9760b57cec5SDimitry Andric 9770b57cec5SDimitry Andric // Clone the function, so that we can hack away on it. 9780b57cec5SDimitry Andric ValueToValueMapTy VMap; 9790b57cec5SDimitry Andric ClonedFunc = CloneFunction(F, VMap); 9800b57cec5SDimitry Andric 9810b57cec5SDimitry Andric // Go through all Outline Candidate Regions and update all BasicBlock 9820b57cec5SDimitry Andric // information. 98306c3fb27SDimitry Andric for (const FunctionOutliningMultiRegionInfo::OutlineRegionInfo &RegionInfo : 9840b57cec5SDimitry Andric OI->ORI) { 9850b57cec5SDimitry Andric SmallVector<BasicBlock *, 8> Region; 986e8d8bef9SDimitry Andric for (BasicBlock *BB : RegionInfo.Region) 9870b57cec5SDimitry Andric Region.push_back(cast<BasicBlock>(VMap[BB])); 988e8d8bef9SDimitry Andric 9890b57cec5SDimitry Andric BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]); 9900b57cec5SDimitry Andric BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]); 9910b57cec5SDimitry Andric BasicBlock *NewReturnBlock = nullptr; 9920b57cec5SDimitry Andric if (RegionInfo.ReturnBlock) 9930b57cec5SDimitry Andric NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]); 9940b57cec5SDimitry Andric FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo( 9950b57cec5SDimitry Andric Region, NewEntryBlock, NewExitBlock, NewReturnBlock); 9960b57cec5SDimitry Andric ClonedOMRI->ORI.push_back(MappedRegionInfo); 9970b57cec5SDimitry Andric } 9980b57cec5SDimitry Andric // Go ahead and update all uses to the duplicate, so that we can just 9990b57cec5SDimitry Andric // use the inliner functionality when we're done hacking. 10000b57cec5SDimitry Andric F->replaceAllUsesWith(ClonedFunc); 10010b57cec5SDimitry Andric } 10020b57cec5SDimitry Andric 1003e8d8bef9SDimitry Andric void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const { 1004e8d8bef9SDimitry Andric auto GetFirstPHI = [](BasicBlock *BB) { 10050b57cec5SDimitry Andric BasicBlock::iterator I = BB->begin(); 10060b57cec5SDimitry Andric PHINode *FirstPhi = nullptr; 10070b57cec5SDimitry Andric while (I != BB->end()) { 10080b57cec5SDimitry Andric PHINode *Phi = dyn_cast<PHINode>(I); 10090b57cec5SDimitry Andric if (!Phi) 10100b57cec5SDimitry Andric break; 10110b57cec5SDimitry Andric if (!FirstPhi) { 10120b57cec5SDimitry Andric FirstPhi = Phi; 10130b57cec5SDimitry Andric break; 10140b57cec5SDimitry Andric } 10150b57cec5SDimitry Andric } 10160b57cec5SDimitry Andric return FirstPhi; 10170b57cec5SDimitry Andric }; 10180b57cec5SDimitry Andric 10190b57cec5SDimitry Andric // Shouldn't need to normalize PHIs if we're not outlining non-early return 10200b57cec5SDimitry Andric // blocks. 10210b57cec5SDimitry Andric if (!ClonedOI) 10220b57cec5SDimitry Andric return; 10230b57cec5SDimitry Andric 10240b57cec5SDimitry Andric // Special hackery is needed with PHI nodes that have inputs from more than 10250b57cec5SDimitry Andric // one extracted block. For simplicity, just split the PHIs into a two-level 10260b57cec5SDimitry Andric // sequence of PHIs, some of which will go in the extracted region, and some 10270b57cec5SDimitry Andric // of which will go outside. 10280b57cec5SDimitry Andric BasicBlock *PreReturn = ClonedOI->ReturnBlock; 10290b57cec5SDimitry Andric // only split block when necessary: 1030e8d8bef9SDimitry Andric PHINode *FirstPhi = GetFirstPHI(PreReturn); 10310b57cec5SDimitry Andric unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size(); 10320b57cec5SDimitry Andric 10330b57cec5SDimitry Andric if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1) 10340b57cec5SDimitry Andric return; 10350b57cec5SDimitry Andric 10360b57cec5SDimitry Andric auto IsTrivialPhi = [](PHINode *PN) -> Value * { 1037bdd1243dSDimitry Andric if (llvm::all_equal(PN->incoming_values())) 1038bdd1243dSDimitry Andric return PN->getIncomingValue(0); 10390b57cec5SDimitry Andric return nullptr; 10400b57cec5SDimitry Andric }; 10410b57cec5SDimitry Andric 10420b57cec5SDimitry Andric ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock( 10430b57cec5SDimitry Andric ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator()); 10440b57cec5SDimitry Andric BasicBlock::iterator I = PreReturn->begin(); 10455f757f3fSDimitry Andric BasicBlock::iterator Ins = ClonedOI->ReturnBlock->begin(); 10460b57cec5SDimitry Andric SmallVector<Instruction *, 4> DeadPhis; 10470b57cec5SDimitry Andric while (I != PreReturn->end()) { 10480b57cec5SDimitry Andric PHINode *OldPhi = dyn_cast<PHINode>(I); 10490b57cec5SDimitry Andric if (!OldPhi) 10500b57cec5SDimitry Andric break; 10510b57cec5SDimitry Andric 10520b57cec5SDimitry Andric PHINode *RetPhi = 10535f757f3fSDimitry Andric PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, ""); 10545f757f3fSDimitry Andric RetPhi->insertBefore(Ins); 10550b57cec5SDimitry Andric OldPhi->replaceAllUsesWith(RetPhi); 10565f757f3fSDimitry Andric Ins = ClonedOI->ReturnBlock->getFirstNonPHIIt(); 10570b57cec5SDimitry Andric 10580b57cec5SDimitry Andric RetPhi->addIncoming(&*I, PreReturn); 10590b57cec5SDimitry Andric for (BasicBlock *E : ClonedOI->ReturnBlockPreds) { 10600b57cec5SDimitry Andric RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E); 10610b57cec5SDimitry Andric OldPhi->removeIncomingValue(E); 10620b57cec5SDimitry Andric } 10630b57cec5SDimitry Andric 10640b57cec5SDimitry Andric // After incoming values splitting, the old phi may become trivial. 10650b57cec5SDimitry Andric // Keeping the trivial phi can introduce definition inside the outline 10660b57cec5SDimitry Andric // region which is live-out, causing necessary overhead (load, store 10670b57cec5SDimitry Andric // arg passing etc). 10680b57cec5SDimitry Andric if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) { 10690b57cec5SDimitry Andric OldPhi->replaceAllUsesWith(OldPhiVal); 10700b57cec5SDimitry Andric DeadPhis.push_back(OldPhi); 10710b57cec5SDimitry Andric } 10720b57cec5SDimitry Andric ++I; 10730b57cec5SDimitry Andric } 10740b57cec5SDimitry Andric for (auto *DP : DeadPhis) 10750b57cec5SDimitry Andric DP->eraseFromParent(); 10760b57cec5SDimitry Andric 1077e8d8bef9SDimitry Andric for (auto *E : ClonedOI->ReturnBlockPreds) 10780b57cec5SDimitry Andric E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock); 10790b57cec5SDimitry Andric } 10800b57cec5SDimitry Andric 10810b57cec5SDimitry Andric bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() { 10820b57cec5SDimitry Andric 1083fe6060f1SDimitry Andric auto ComputeRegionCost = 1084fe6060f1SDimitry Andric [&](SmallVectorImpl<BasicBlock *> &Region) -> InstructionCost { 1085fe6060f1SDimitry Andric InstructionCost Cost = 0; 10860b57cec5SDimitry Andric for (BasicBlock* BB : Region) 1087e8d8bef9SDimitry Andric Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent())); 10880b57cec5SDimitry Andric return Cost; 10890b57cec5SDimitry Andric }; 10900b57cec5SDimitry Andric 10910b57cec5SDimitry Andric assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline"); 10920b57cec5SDimitry Andric 10930b57cec5SDimitry Andric if (ClonedOMRI->ORI.empty()) 10940b57cec5SDimitry Andric return false; 10950b57cec5SDimitry Andric 10960b57cec5SDimitry Andric // The CodeExtractor needs a dominator tree. 10970b57cec5SDimitry Andric DominatorTree DT; 10980b57cec5SDimitry Andric DT.recalculate(*ClonedFunc); 10990b57cec5SDimitry Andric 11000b57cec5SDimitry Andric // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo. 11010b57cec5SDimitry Andric LoopInfo LI(DT); 11020b57cec5SDimitry Andric BranchProbabilityInfo BPI(*ClonedFunc, LI); 11030b57cec5SDimitry Andric ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI)); 11040b57cec5SDimitry Andric 11058bcb0991SDimitry Andric // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time. 11068bcb0991SDimitry Andric CodeExtractorAnalysisCache CEAC(*ClonedFunc); 11078bcb0991SDimitry Andric 11080b57cec5SDimitry Andric SetVector<Value *> Inputs, Outputs, Sinks; 11090b57cec5SDimitry Andric for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo : 11100b57cec5SDimitry Andric ClonedOMRI->ORI) { 1111fe6060f1SDimitry Andric InstructionCost CurrentOutlinedRegionCost = 1112fe6060f1SDimitry Andric ComputeRegionCost(RegionInfo.Region); 11130b57cec5SDimitry Andric 11140b57cec5SDimitry Andric CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false, 11150b57cec5SDimitry Andric ClonedFuncBFI.get(), &BPI, 11160b57cec5SDimitry Andric LookupAC(*RegionInfo.EntryBlock->getParent()), 11170b57cec5SDimitry Andric /* AllowVarargs */ false); 11180b57cec5SDimitry Andric 11190b57cec5SDimitry Andric CE.findInputsOutputs(Inputs, Outputs, Sinks); 11200b57cec5SDimitry Andric 1121e8d8bef9SDimitry Andric LLVM_DEBUG({ 11220b57cec5SDimitry Andric dbgs() << "inputs: " << Inputs.size() << "\n"; 11230b57cec5SDimitry Andric dbgs() << "outputs: " << Outputs.size() << "\n"; 11240b57cec5SDimitry Andric for (Value *value : Inputs) 11250b57cec5SDimitry Andric dbgs() << "value used in func: " << *value << "\n"; 11260b57cec5SDimitry Andric for (Value *output : Outputs) 11270b57cec5SDimitry Andric dbgs() << "instr used in func: " << *output << "\n"; 1128e8d8bef9SDimitry Andric }); 1129e8d8bef9SDimitry Andric 11300b57cec5SDimitry Andric // Do not extract regions that have live exit variables. 11310b57cec5SDimitry Andric if (Outputs.size() > 0 && !ForceLiveExit) 11320b57cec5SDimitry Andric continue; 11330b57cec5SDimitry Andric 1134e8d8bef9SDimitry Andric if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) { 1135e8d8bef9SDimitry Andric CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc); 11365ffd83dbSDimitry Andric BasicBlock *OutliningCallBB = OCS->getParent(); 11370b57cec5SDimitry Andric assert(OutliningCallBB->getParent() == ClonedFunc); 11380b57cec5SDimitry Andric OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB)); 11390b57cec5SDimitry Andric NumColdRegionsOutlined++; 11400b57cec5SDimitry Andric OutlinedRegionCost += CurrentOutlinedRegionCost; 11410b57cec5SDimitry Andric 11420b57cec5SDimitry Andric if (MarkOutlinedColdCC) { 11430b57cec5SDimitry Andric OutlinedFunc->setCallingConv(CallingConv::Cold); 11445ffd83dbSDimitry Andric OCS->setCallingConv(CallingConv::Cold); 11450b57cec5SDimitry Andric } 11460b57cec5SDimitry Andric } else 11470b57cec5SDimitry Andric ORE.emit([&]() { 11480b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed", 11490b57cec5SDimitry Andric &RegionInfo.Region.front()->front()) 11500b57cec5SDimitry Andric << "Failed to extract region at block " 11510b57cec5SDimitry Andric << ore::NV("Block", RegionInfo.Region.front()); 11520b57cec5SDimitry Andric }); 11530b57cec5SDimitry Andric } 11540b57cec5SDimitry Andric 11550b57cec5SDimitry Andric return !OutlinedFunctions.empty(); 11560b57cec5SDimitry Andric } 11570b57cec5SDimitry Andric 11580b57cec5SDimitry Andric Function * 11590b57cec5SDimitry Andric PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() { 11600b57cec5SDimitry Andric // Returns true if the block is to be partial inlined into the caller 11610b57cec5SDimitry Andric // (i.e. not to be extracted to the out of line function) 11620b57cec5SDimitry Andric auto ToBeInlined = [&, this](BasicBlock *BB) { 11630b57cec5SDimitry Andric return BB == ClonedOI->ReturnBlock || 1164e8d8bef9SDimitry Andric llvm::is_contained(ClonedOI->Entries, BB); 11650b57cec5SDimitry Andric }; 11660b57cec5SDimitry Andric 11670b57cec5SDimitry Andric assert(ClonedOI && "Expecting OutlineInfo for single region outline"); 11680b57cec5SDimitry Andric // The CodeExtractor needs a dominator tree. 11690b57cec5SDimitry Andric DominatorTree DT; 11700b57cec5SDimitry Andric DT.recalculate(*ClonedFunc); 11710b57cec5SDimitry Andric 11720b57cec5SDimitry Andric // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo. 11730b57cec5SDimitry Andric LoopInfo LI(DT); 11740b57cec5SDimitry Andric BranchProbabilityInfo BPI(*ClonedFunc, LI); 11750b57cec5SDimitry Andric ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI)); 11760b57cec5SDimitry Andric 11770b57cec5SDimitry Andric // Gather up the blocks that we're going to extract. 11780b57cec5SDimitry Andric std::vector<BasicBlock *> ToExtract; 1179e8d8bef9SDimitry Andric auto *ClonedFuncTTI = &GetTTI(*ClonedFunc); 11800b57cec5SDimitry Andric ToExtract.push_back(ClonedOI->NonReturnBlock); 1181e8d8bef9SDimitry Andric OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost( 1182e8d8bef9SDimitry Andric ClonedOI->NonReturnBlock, ClonedFuncTTI); 118306c3fb27SDimitry Andric for (BasicBlock *BB : depth_first(&ClonedFunc->getEntryBlock())) 118406c3fb27SDimitry Andric if (!ToBeInlined(BB) && BB != ClonedOI->NonReturnBlock) { 118506c3fb27SDimitry Andric ToExtract.push_back(BB); 11860b57cec5SDimitry Andric // FIXME: the code extractor may hoist/sink more code 11870b57cec5SDimitry Andric // into the outlined function which may make the outlining 11880b57cec5SDimitry Andric // overhead (the difference of the outlined function cost 11890b57cec5SDimitry Andric // and OutliningRegionCost) look larger. 119006c3fb27SDimitry Andric OutlinedRegionCost += computeBBInlineCost(BB, ClonedFuncTTI); 11910b57cec5SDimitry Andric } 11920b57cec5SDimitry Andric 11930b57cec5SDimitry Andric // Extract the body of the if. 11948bcb0991SDimitry Andric CodeExtractorAnalysisCache CEAC(*ClonedFunc); 11950b57cec5SDimitry Andric Function *OutlinedFunc = 11960b57cec5SDimitry Andric CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, 11970b57cec5SDimitry Andric ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc), 11980b57cec5SDimitry Andric /* AllowVarargs */ true) 11998bcb0991SDimitry Andric .extractCodeRegion(CEAC); 12000b57cec5SDimitry Andric 12010b57cec5SDimitry Andric if (OutlinedFunc) { 12020b57cec5SDimitry Andric BasicBlock *OutliningCallBB = 1203e8d8bef9SDimitry Andric PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent(); 12040b57cec5SDimitry Andric assert(OutliningCallBB->getParent() == ClonedFunc); 12050b57cec5SDimitry Andric OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB)); 12060b57cec5SDimitry Andric } else 12070b57cec5SDimitry Andric ORE.emit([&]() { 12080b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed", 12090b57cec5SDimitry Andric &ToExtract.front()->front()) 12100b57cec5SDimitry Andric << "Failed to extract region at block " 12110b57cec5SDimitry Andric << ore::NV("Block", ToExtract.front()); 12120b57cec5SDimitry Andric }); 12130b57cec5SDimitry Andric 12140b57cec5SDimitry Andric return OutlinedFunc; 12150b57cec5SDimitry Andric } 12160b57cec5SDimitry Andric 12170b57cec5SDimitry Andric PartialInlinerImpl::FunctionCloner::~FunctionCloner() { 12180b57cec5SDimitry Andric // Ditch the duplicate, since we're done with it, and rewrite all remaining 12190b57cec5SDimitry Andric // users (function pointers, etc.) back to the original function. 12200b57cec5SDimitry Andric ClonedFunc->replaceAllUsesWith(OrigFunc); 12210b57cec5SDimitry Andric ClonedFunc->eraseFromParent(); 12220b57cec5SDimitry Andric if (!IsFunctionInlined) { 12230b57cec5SDimitry Andric // Remove each function that was speculatively created if there is no 12240b57cec5SDimitry Andric // reference. 12250b57cec5SDimitry Andric for (auto FuncBBPair : OutlinedFunctions) { 12260b57cec5SDimitry Andric Function *Func = FuncBBPair.first; 12270b57cec5SDimitry Andric Func->eraseFromParent(); 12280b57cec5SDimitry Andric } 12290b57cec5SDimitry Andric } 12300b57cec5SDimitry Andric } 12310b57cec5SDimitry Andric 1232e8d8bef9SDimitry Andric std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) { 1233e8d8bef9SDimitry Andric if (F.hasAddressTaken()) 12340b57cec5SDimitry Andric return {false, nullptr}; 12350b57cec5SDimitry Andric 12360b57cec5SDimitry Andric // Let inliner handle it 1237e8d8bef9SDimitry Andric if (F.hasFnAttribute(Attribute::AlwaysInline)) 12380b57cec5SDimitry Andric return {false, nullptr}; 12390b57cec5SDimitry Andric 1240e8d8bef9SDimitry Andric if (F.hasFnAttribute(Attribute::NoInline)) 12410b57cec5SDimitry Andric return {false, nullptr}; 12420b57cec5SDimitry Andric 1243e8d8bef9SDimitry Andric if (PSI.isFunctionEntryCold(&F)) 12440b57cec5SDimitry Andric return {false, nullptr}; 12450b57cec5SDimitry Andric 1246e8d8bef9SDimitry Andric if (F.users().empty()) 12470b57cec5SDimitry Andric return {false, nullptr}; 12480b57cec5SDimitry Andric 1249e8d8bef9SDimitry Andric OptimizationRemarkEmitter ORE(&F); 12500b57cec5SDimitry Andric 12510b57cec5SDimitry Andric // Only try to outline cold regions if we have a profile summary, which 12520b57cec5SDimitry Andric // implies we have profiling information. 1253e8d8bef9SDimitry Andric if (PSI.hasProfileSummary() && F.hasProfileData() && 12540b57cec5SDimitry Andric !DisableMultiRegionPartialInline) { 12550b57cec5SDimitry Andric std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI = 12560b57cec5SDimitry Andric computeOutliningColdRegionsInfo(F, ORE); 12570b57cec5SDimitry Andric if (OMRI) { 1258e8d8bef9SDimitry Andric FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI); 12590b57cec5SDimitry Andric 1260e8d8bef9SDimitry Andric LLVM_DEBUG({ 12615ffd83dbSDimitry Andric dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n"; 12625ffd83dbSDimitry Andric dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold() 12630b57cec5SDimitry Andric << "\n"; 1264e8d8bef9SDimitry Andric }); 1265e8d8bef9SDimitry Andric 12660b57cec5SDimitry Andric bool DidOutline = Cloner.doMultiRegionFunctionOutlining(); 12670b57cec5SDimitry Andric 12680b57cec5SDimitry Andric if (DidOutline) { 1269e8d8bef9SDimitry Andric LLVM_DEBUG({ 12700b57cec5SDimitry Andric dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n"; 12710b57cec5SDimitry Andric Cloner.ClonedFunc->print(dbgs()); 12720b57cec5SDimitry Andric dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n"; 1273e8d8bef9SDimitry Andric }); 12740b57cec5SDimitry Andric 12750b57cec5SDimitry Andric if (tryPartialInline(Cloner)) 12760b57cec5SDimitry Andric return {true, nullptr}; 12770b57cec5SDimitry Andric } 12780b57cec5SDimitry Andric } 12790b57cec5SDimitry Andric } 12800b57cec5SDimitry Andric 12810b57cec5SDimitry Andric // Fall-thru to regular partial inlining if we: 12820b57cec5SDimitry Andric // i) can't find any cold regions to outline, or 12830b57cec5SDimitry Andric // ii) can't inline the outlined function anywhere. 12840b57cec5SDimitry Andric std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F); 12850b57cec5SDimitry Andric if (!OI) 12860b57cec5SDimitry Andric return {false, nullptr}; 12870b57cec5SDimitry Andric 1288e8d8bef9SDimitry Andric FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI); 1289e8d8bef9SDimitry Andric Cloner.normalizeReturnBlock(); 12900b57cec5SDimitry Andric 12910b57cec5SDimitry Andric Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining(); 12920b57cec5SDimitry Andric 12930b57cec5SDimitry Andric if (!OutlinedFunction) 12940b57cec5SDimitry Andric return {false, nullptr}; 12950b57cec5SDimitry Andric 1296e8d8bef9SDimitry Andric if (tryPartialInline(Cloner)) 12970b57cec5SDimitry Andric return {true, OutlinedFunction}; 12980b57cec5SDimitry Andric 12990b57cec5SDimitry Andric return {false, nullptr}; 13000b57cec5SDimitry Andric } 13010b57cec5SDimitry Andric 13020b57cec5SDimitry Andric bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { 13030b57cec5SDimitry Andric if (Cloner.OutlinedFunctions.empty()) 13040b57cec5SDimitry Andric return false; 13050b57cec5SDimitry Andric 1306fe6060f1SDimitry Andric auto OutliningCosts = computeOutliningCosts(Cloner); 1307fe6060f1SDimitry Andric 1308bdd1243dSDimitry Andric InstructionCost SizeCost = std::get<0>(OutliningCosts); 1309bdd1243dSDimitry Andric InstructionCost NonWeightedRcost = std::get<1>(OutliningCosts); 1310bdd1243dSDimitry Andric 1311bdd1243dSDimitry Andric assert(SizeCost.isValid() && NonWeightedRcost.isValid() && 1312bdd1243dSDimitry Andric "Expected valid costs"); 13130b57cec5SDimitry Andric 13140b57cec5SDimitry Andric // Only calculate RelativeToEntryFreq when we are doing single region 13150b57cec5SDimitry Andric // outlining. 13160b57cec5SDimitry Andric BranchProbability RelativeToEntryFreq; 1317e8d8bef9SDimitry Andric if (Cloner.ClonedOI) 13180b57cec5SDimitry Andric RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner); 1319e8d8bef9SDimitry Andric else 13200b57cec5SDimitry Andric // RelativeToEntryFreq doesn't make sense when we have more than one 13210b57cec5SDimitry Andric // outlined call because each call will have a different relative frequency 13220b57cec5SDimitry Andric // to the entry block. We can consider using the average, but the 13230b57cec5SDimitry Andric // usefulness of that information is questionable. For now, assume we never 13240b57cec5SDimitry Andric // execute the calls to outlined functions. 13250b57cec5SDimitry Andric RelativeToEntryFreq = BranchProbability(0, 1); 13260b57cec5SDimitry Andric 1327bdd1243dSDimitry Andric BlockFrequency WeightedRcost = 1328bdd1243dSDimitry Andric BlockFrequency(*NonWeightedRcost.getValue()) * RelativeToEntryFreq; 13290b57cec5SDimitry Andric 13300b57cec5SDimitry Andric // The call sequence(s) to the outlined function(s) are larger than the sum of 13310b57cec5SDimitry Andric // the original outlined region size(s), it does not increase the chances of 13320b57cec5SDimitry Andric // inlining the function with outlining (The inliner uses the size increase to 13330b57cec5SDimitry Andric // model the cost of inlining a callee). 13340b57cec5SDimitry Andric if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) { 13350b57cec5SDimitry Andric OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc); 13360b57cec5SDimitry Andric DebugLoc DLoc; 13370b57cec5SDimitry Andric BasicBlock *Block; 1338e8d8bef9SDimitry Andric std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc); 13390b57cec5SDimitry Andric OrigFuncORE.emit([&]() { 13400b57cec5SDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall", 13410b57cec5SDimitry Andric DLoc, Block) 13420b57cec5SDimitry Andric << ore::NV("Function", Cloner.OrigFunc) 13430b57cec5SDimitry Andric << " not partially inlined into callers (Original Size = " 13440b57cec5SDimitry Andric << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost) 13450b57cec5SDimitry Andric << ", Size of call sequence to outlined function = " 13460b57cec5SDimitry Andric << ore::NV("NewSize", SizeCost) << ")"; 13470b57cec5SDimitry Andric }); 13480b57cec5SDimitry Andric return false; 13490b57cec5SDimitry Andric } 13500b57cec5SDimitry Andric 13518bcb0991SDimitry Andric assert(Cloner.OrigFunc->users().empty() && 13520b57cec5SDimitry Andric "F's users should all be replaced!"); 13530b57cec5SDimitry Andric 13540b57cec5SDimitry Andric std::vector<User *> Users(Cloner.ClonedFunc->user_begin(), 13550b57cec5SDimitry Andric Cloner.ClonedFunc->user_end()); 13560b57cec5SDimitry Andric 13570b57cec5SDimitry Andric DenseMap<User *, uint64_t> CallSiteToProfCountMap; 13580b57cec5SDimitry Andric auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount(); 13590b57cec5SDimitry Andric if (CalleeEntryCount) 13600b57cec5SDimitry Andric computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap); 13610b57cec5SDimitry Andric 13620b57cec5SDimitry Andric uint64_t CalleeEntryCountV = 1363349cc55cSDimitry Andric (CalleeEntryCount ? CalleeEntryCount->getCount() : 0); 13640b57cec5SDimitry Andric 13650b57cec5SDimitry Andric bool AnyInline = false; 13660b57cec5SDimitry Andric for (User *User : Users) { 136704eeddc0SDimitry Andric // Don't bother with BlockAddress used by CallBr for asm goto. 136804eeddc0SDimitry Andric if (isa<BlockAddress>(User)) 136904eeddc0SDimitry Andric continue; 137004eeddc0SDimitry Andric 13715ffd83dbSDimitry Andric CallBase *CB = getSupportedCallBase(User); 13720b57cec5SDimitry Andric 1373e8d8bef9SDimitry Andric if (isLimitReached()) 13740b57cec5SDimitry Andric continue; 13750b57cec5SDimitry Andric 13765ffd83dbSDimitry Andric OptimizationRemarkEmitter CallerORE(CB->getCaller()); 13775ffd83dbSDimitry Andric if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE)) 13780b57cec5SDimitry Andric continue; 13790b57cec5SDimitry Andric 13800b57cec5SDimitry Andric // Construct remark before doing the inlining, as after successful inlining 13810b57cec5SDimitry Andric // the callsite is removed. 13825ffd83dbSDimitry Andric OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB); 13830b57cec5SDimitry Andric OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into " 13845ffd83dbSDimitry Andric << ore::NV("Caller", CB->getCaller()); 13850b57cec5SDimitry Andric 138606c3fb27SDimitry Andric InlineFunctionInfo IFI(GetAssumptionCache, &PSI); 13870b57cec5SDimitry Andric // We can only forward varargs when we outlined a single region, else we 13880b57cec5SDimitry Andric // bail on vararg functions. 1389bdd1243dSDimitry Andric if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr, true, 13900b57cec5SDimitry Andric (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first 13915ffd83dbSDimitry Andric : nullptr)) 13925ffd83dbSDimitry Andric .isSuccess()) 13930b57cec5SDimitry Andric continue; 13940b57cec5SDimitry Andric 13950b57cec5SDimitry Andric CallerORE.emit(OR); 13960b57cec5SDimitry Andric 13970b57cec5SDimitry Andric // Now update the entry count: 13980b57cec5SDimitry Andric if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) { 13990b57cec5SDimitry Andric uint64_t CallSiteCount = CallSiteToProfCountMap[User]; 14000b57cec5SDimitry Andric CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount); 14010b57cec5SDimitry Andric } 14020b57cec5SDimitry Andric 14030b57cec5SDimitry Andric AnyInline = true; 14040b57cec5SDimitry Andric NumPartialInlining++; 14050b57cec5SDimitry Andric // Update the stats 14060b57cec5SDimitry Andric if (Cloner.ClonedOI) 14070b57cec5SDimitry Andric NumPartialInlined++; 14080b57cec5SDimitry Andric else 14090b57cec5SDimitry Andric NumColdOutlinePartialInlined++; 14100b57cec5SDimitry Andric } 14110b57cec5SDimitry Andric 14120b57cec5SDimitry Andric if (AnyInline) { 14130b57cec5SDimitry Andric Cloner.IsFunctionInlined = true; 14140b57cec5SDimitry Andric if (CalleeEntryCount) 1415349cc55cSDimitry Andric Cloner.OrigFunc->setEntryCount(Function::ProfileCount( 1416349cc55cSDimitry Andric CalleeEntryCountV, CalleeEntryCount->getType())); 14170b57cec5SDimitry Andric OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc); 14180b57cec5SDimitry Andric OrigFuncORE.emit([&]() { 14190b57cec5SDimitry Andric return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc) 14200b57cec5SDimitry Andric << "Partially inlined into at least one caller"; 14210b57cec5SDimitry Andric }); 14220b57cec5SDimitry Andric } 14230b57cec5SDimitry Andric 14240b57cec5SDimitry Andric return AnyInline; 14250b57cec5SDimitry Andric } 14260b57cec5SDimitry Andric 14270b57cec5SDimitry Andric bool PartialInlinerImpl::run(Module &M) { 14280b57cec5SDimitry Andric if (DisablePartialInlining) 14290b57cec5SDimitry Andric return false; 14300b57cec5SDimitry Andric 14310b57cec5SDimitry Andric std::vector<Function *> Worklist; 14320b57cec5SDimitry Andric Worklist.reserve(M.size()); 14330b57cec5SDimitry Andric for (Function &F : M) 14340b57cec5SDimitry Andric if (!F.use_empty() && !F.isDeclaration()) 14350b57cec5SDimitry Andric Worklist.push_back(&F); 14360b57cec5SDimitry Andric 14370b57cec5SDimitry Andric bool Changed = false; 14380b57cec5SDimitry Andric while (!Worklist.empty()) { 14390b57cec5SDimitry Andric Function *CurrFunc = Worklist.back(); 14400b57cec5SDimitry Andric Worklist.pop_back(); 14410b57cec5SDimitry Andric 14420b57cec5SDimitry Andric if (CurrFunc->use_empty()) 14430b57cec5SDimitry Andric continue; 14440b57cec5SDimitry Andric 1445e8d8bef9SDimitry Andric std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc); 14460b57cec5SDimitry Andric if (Result.second) 14470b57cec5SDimitry Andric Worklist.push_back(Result.second); 14480b57cec5SDimitry Andric Changed |= Result.first; 14490b57cec5SDimitry Andric } 14500b57cec5SDimitry Andric 14510b57cec5SDimitry Andric return Changed; 14520b57cec5SDimitry Andric } 14530b57cec5SDimitry Andric 14540b57cec5SDimitry Andric PreservedAnalyses PartialInlinerPass::run(Module &M, 14550b57cec5SDimitry Andric ModuleAnalysisManager &AM) { 14560b57cec5SDimitry Andric auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 14570b57cec5SDimitry Andric 14585ffd83dbSDimitry Andric auto GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & { 14590b57cec5SDimitry Andric return FAM.getResult<AssumptionAnalysis>(F); 14600b57cec5SDimitry Andric }; 14610b57cec5SDimitry Andric 14620b57cec5SDimitry Andric auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * { 14630b57cec5SDimitry Andric return FAM.getCachedResult<AssumptionAnalysis>(F); 14640b57cec5SDimitry Andric }; 14650b57cec5SDimitry Andric 14665ffd83dbSDimitry Andric auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & { 14670b57cec5SDimitry Andric return FAM.getResult<BlockFrequencyAnalysis>(F); 14680b57cec5SDimitry Andric }; 14690b57cec5SDimitry Andric 14705ffd83dbSDimitry Andric auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { 14710b57cec5SDimitry Andric return FAM.getResult<TargetIRAnalysis>(F); 14720b57cec5SDimitry Andric }; 14730b57cec5SDimitry Andric 14745ffd83dbSDimitry Andric auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 14755ffd83dbSDimitry Andric return FAM.getResult<TargetLibraryAnalysis>(F); 14765ffd83dbSDimitry Andric }; 14770b57cec5SDimitry Andric 14785ffd83dbSDimitry Andric ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M); 14795ffd83dbSDimitry Andric 14805ffd83dbSDimitry Andric if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI, 14815ffd83dbSDimitry Andric GetTLI, PSI, GetBFI) 14820b57cec5SDimitry Andric .run(M)) 14830b57cec5SDimitry Andric return PreservedAnalyses::none(); 14840b57cec5SDimitry Andric return PreservedAnalyses::all(); 14850b57cec5SDimitry Andric } 1486