10b57cec5SDimitry Andric //===- InlineCost.cpp - Cost analysis for inliner -------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements inline cost analysis. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "llvm/Analysis/InlineCost.h" 140b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 150b57cec5SDimitry Andric #include "llvm/ADT/SetVector.h" 160b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 170b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 180b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 190b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h" 200b57cec5SDimitry Andric #include "llvm/Analysis/BlockFrequencyInfo.h" 210b57cec5SDimitry Andric #include "llvm/Analysis/CodeMetrics.h" 220b57cec5SDimitry Andric #include "llvm/Analysis/ConstantFolding.h" 230b57cec5SDimitry Andric #include "llvm/Analysis/InstructionSimplify.h" 240b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 25bdd1243dSDimitry Andric #include "llvm/Analysis/MemoryBuiltins.h" 2681ad6265SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 270b57cec5SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h" 285ffd83dbSDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h" 290b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 300b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 310b57cec5SDimitry Andric #include "llvm/Config/llvm-config.h" 325ffd83dbSDimitry Andric #include "llvm/IR/AssemblyAnnotationWriter.h" 330b57cec5SDimitry Andric #include "llvm/IR/CallingConv.h" 340b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 350b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 360b57cec5SDimitry Andric #include "llvm/IR/GetElementPtrTypeIterator.h" 370b57cec5SDimitry Andric #include "llvm/IR/GlobalAlias.h" 380b57cec5SDimitry Andric #include "llvm/IR/InstVisitor.h" 390b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 400b57cec5SDimitry Andric #include "llvm/IR/Operator.h" 410b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h" 42480093f4SDimitry Andric #include "llvm/Support/CommandLine.h" 430b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 445ffd83dbSDimitry Andric #include "llvm/Support/FormattedStream.h" 450b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 46bdd1243dSDimitry Andric #include <climits> 4781ad6265SDimitry Andric #include <limits> 48bdd1243dSDimitry Andric #include <optional> 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric using namespace llvm; 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric #define DEBUG_TYPE "inline-cost" 530b57cec5SDimitry Andric 540b57cec5SDimitry Andric STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); 550b57cec5SDimitry Andric 565ffd83dbSDimitry Andric static cl::opt<int> 575ffd83dbSDimitry Andric DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), 585ffd83dbSDimitry Andric cl::desc("Default amount of inlining to perform")); 595ffd83dbSDimitry Andric 6081ad6265SDimitry Andric // We introduce this option since there is a minor compile-time win by avoiding 6181ad6265SDimitry Andric // addition of TTI attributes (target-features in particular) to inline 6281ad6265SDimitry Andric // candidates when they are guaranteed to be the same as top level methods in 6381ad6265SDimitry Andric // some use cases. If we avoid adding the attribute, we need an option to avoid 6481ad6265SDimitry Andric // checking these attributes. 6581ad6265SDimitry Andric static cl::opt<bool> IgnoreTTIInlineCompatible( 6681ad6265SDimitry Andric "ignore-tti-inline-compatible", cl::Hidden, cl::init(false), 6781ad6265SDimitry Andric cl::desc("Ignore TTI attributes compatibility check between callee/caller " 6881ad6265SDimitry Andric "during inline cost calculation")); 6981ad6265SDimitry Andric 705ffd83dbSDimitry Andric static cl::opt<bool> PrintInstructionComments( 715ffd83dbSDimitry Andric "print-instruction-comments", cl::Hidden, cl::init(false), 725ffd83dbSDimitry Andric cl::desc("Prints comments for instruction based on inline cost analysis")); 735ffd83dbSDimitry Andric 740b57cec5SDimitry Andric static cl::opt<int> InlineThreshold( 7581ad6265SDimitry Andric "inline-threshold", cl::Hidden, cl::init(225), 760b57cec5SDimitry Andric cl::desc("Control the amount of inlining to perform (default = 225)")); 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric static cl::opt<int> HintThreshold( 7981ad6265SDimitry Andric "inlinehint-threshold", cl::Hidden, cl::init(325), 800b57cec5SDimitry Andric cl::desc("Threshold for inlining functions with inline hint")); 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric static cl::opt<int> 830b57cec5SDimitry Andric ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, 8481ad6265SDimitry Andric cl::init(45), 850b57cec5SDimitry Andric cl::desc("Threshold for inlining cold callsites")); 860b57cec5SDimitry Andric 87e8d8bef9SDimitry Andric static cl::opt<bool> InlineEnableCostBenefitAnalysis( 88e8d8bef9SDimitry Andric "inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), 89e8d8bef9SDimitry Andric cl::desc("Enable the cost-benefit analysis for the inliner")); 90e8d8bef9SDimitry Andric 915f757f3fSDimitry Andric // InlineSavingsMultiplier overrides per TTI multipliers iff it is 925f757f3fSDimitry Andric // specified explicitly in command line options. This option is exposed 935f757f3fSDimitry Andric // for tuning and testing. 94e8d8bef9SDimitry Andric static cl::opt<int> InlineSavingsMultiplier( 9581ad6265SDimitry Andric "inline-savings-multiplier", cl::Hidden, cl::init(8), 96e8d8bef9SDimitry Andric cl::desc("Multiplier to multiply cycle savings by during inlining")); 97e8d8bef9SDimitry Andric 985f757f3fSDimitry Andric // InlineSavingsProfitableMultiplier overrides per TTI multipliers iff it is 995f757f3fSDimitry Andric // specified explicitly in command line options. This option is exposed 1005f757f3fSDimitry Andric // for tuning and testing. 1015f757f3fSDimitry Andric static cl::opt<int> InlineSavingsProfitableMultiplier( 1025f757f3fSDimitry Andric "inline-savings-profitable-multiplier", cl::Hidden, cl::init(4), 1035f757f3fSDimitry Andric cl::desc("A multiplier on top of cycle savings to decide whether the " 1045f757f3fSDimitry Andric "savings won't justify the cost")); 1055f757f3fSDimitry Andric 106e8d8bef9SDimitry Andric static cl::opt<int> 107e8d8bef9SDimitry Andric InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), 108e8d8bef9SDimitry Andric cl::desc("The maximum size of a callee that get's " 109e8d8bef9SDimitry Andric "inlined without sufficient cycle savings")); 110e8d8bef9SDimitry Andric 1110b57cec5SDimitry Andric // We introduce this threshold to help performance of instrumentation based 1120b57cec5SDimitry Andric // PGO before we actually hook up inliner with analysis passes such as BPI and 1130b57cec5SDimitry Andric // BFI. 1140b57cec5SDimitry Andric static cl::opt<int> ColdThreshold( 11581ad6265SDimitry Andric "inlinecold-threshold", cl::Hidden, cl::init(45), 1160b57cec5SDimitry Andric cl::desc("Threshold for inlining functions with cold attribute")); 1170b57cec5SDimitry Andric 1180b57cec5SDimitry Andric static cl::opt<int> 1190b57cec5SDimitry Andric HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), 1200b57cec5SDimitry Andric cl::desc("Threshold for hot callsites ")); 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric static cl::opt<int> LocallyHotCallSiteThreshold( 12381ad6265SDimitry Andric "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), 1240b57cec5SDimitry Andric cl::desc("Threshold for locally hot callsites ")); 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric static cl::opt<int> ColdCallSiteRelFreq( 12781ad6265SDimitry Andric "cold-callsite-rel-freq", cl::Hidden, cl::init(2), 1280b57cec5SDimitry Andric cl::desc("Maximum block frequency, expressed as a percentage of caller's " 1290b57cec5SDimitry Andric "entry frequency, for a callsite to be cold in the absence of " 1300b57cec5SDimitry Andric "profile information.")); 1310b57cec5SDimitry Andric 1325f757f3fSDimitry Andric static cl::opt<uint64_t> HotCallSiteRelFreq( 13381ad6265SDimitry Andric "hot-callsite-rel-freq", cl::Hidden, cl::init(60), 1340b57cec5SDimitry Andric cl::desc("Minimum block frequency, expressed as a multiple of caller's " 1350b57cec5SDimitry Andric "entry frequency, for a callsite to be hot in the absence of " 1360b57cec5SDimitry Andric "profile information.")); 1370b57cec5SDimitry Andric 138bdd1243dSDimitry Andric static cl::opt<int> 139bdd1243dSDimitry Andric InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), 140bdd1243dSDimitry Andric cl::desc("Cost of a single instruction when inlining")); 141bdd1243dSDimitry Andric 142bdd1243dSDimitry Andric static cl::opt<int> 143bdd1243dSDimitry Andric MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0), 144bdd1243dSDimitry Andric cl::desc("Cost of load/store instruction when inlining")); 145bdd1243dSDimitry Andric 146fe6060f1SDimitry Andric static cl::opt<int> CallPenalty( 147fe6060f1SDimitry Andric "inline-call-penalty", cl::Hidden, cl::init(25), 148fe6060f1SDimitry Andric cl::desc("Call penalty that is applied per callsite when inlining")); 149fe6060f1SDimitry Andric 15081ad6265SDimitry Andric static cl::opt<size_t> 15181ad6265SDimitry Andric StackSizeThreshold("inline-max-stacksize", cl::Hidden, 15281ad6265SDimitry Andric cl::init(std::numeric_limits<size_t>::max()), 15381ad6265SDimitry Andric cl::desc("Do not inline functions with a stack size " 15481ad6265SDimitry Andric "that exceeds the specified limit")); 15581ad6265SDimitry Andric 15606c3fb27SDimitry Andric static cl::opt<size_t> RecurStackSizeThreshold( 15706c3fb27SDimitry Andric "recursive-inline-max-stacksize", cl::Hidden, 158753f127fSDimitry Andric cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller), 159753f127fSDimitry Andric cl::desc("Do not inline recursive functions with a stack " 160753f127fSDimitry Andric "size that exceeds the specified limit")); 161753f127fSDimitry Andric 1620b57cec5SDimitry Andric static cl::opt<bool> OptComputeFullInlineCost( 16381ad6265SDimitry Andric "inline-cost-full", cl::Hidden, 1640b57cec5SDimitry Andric cl::desc("Compute the full inline cost of a call site even when the cost " 1650b57cec5SDimitry Andric "exceeds the threshold.")); 1660b57cec5SDimitry Andric 1675ffd83dbSDimitry Andric static cl::opt<bool> InlineCallerSupersetNoBuiltin( 1685ffd83dbSDimitry Andric "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), 1695ffd83dbSDimitry Andric cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " 1705ffd83dbSDimitry Andric "attributes.")); 1715ffd83dbSDimitry Andric 1725ffd83dbSDimitry Andric static cl::opt<bool> DisableGEPConstOperand( 1735ffd83dbSDimitry Andric "disable-gep-const-evaluation", cl::Hidden, cl::init(false), 1745ffd83dbSDimitry Andric cl::desc("Disables evaluation of GetElementPtr with constant operands")); 1755ffd83dbSDimitry Andric 176fb03ea46SDimitry Andric namespace llvm { 177bdd1243dSDimitry Andric std::optional<int> getStringFnAttrAsInt(const Attribute &Attr) { 178bdd1243dSDimitry Andric if (Attr.isValid()) { 179bdd1243dSDimitry Andric int AttrValue = 0; 180bdd1243dSDimitry Andric if (!Attr.getValueAsString().getAsInteger(10, AttrValue)) 181349cc55cSDimitry Andric return AttrValue; 182349cc55cSDimitry Andric } 183bdd1243dSDimitry Andric return std::nullopt; 184bdd1243dSDimitry Andric } 185bdd1243dSDimitry Andric 186bdd1243dSDimitry Andric std::optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) { 187bdd1243dSDimitry Andric return getStringFnAttrAsInt(CB.getFnAttr(AttrKind)); 188bdd1243dSDimitry Andric } 189bdd1243dSDimitry Andric 190bdd1243dSDimitry Andric std::optional<int> getStringFnAttrAsInt(Function *F, StringRef AttrKind) { 191bdd1243dSDimitry Andric return getStringFnAttrAsInt(F->getFnAttribute(AttrKind)); 192bdd1243dSDimitry Andric } 193bdd1243dSDimitry Andric 194bdd1243dSDimitry Andric namespace InlineConstants { 195bdd1243dSDimitry Andric int getInstrCost() { return InstrCost; } 196bdd1243dSDimitry Andric 197bdd1243dSDimitry Andric } // namespace InlineConstants 198bdd1243dSDimitry Andric 199fb03ea46SDimitry Andric } // namespace llvm 200fb03ea46SDimitry Andric 201fb03ea46SDimitry Andric namespace { 202fb03ea46SDimitry Andric class InlineCostCallAnalyzer; 203349cc55cSDimitry Andric 2045ffd83dbSDimitry Andric // This struct is used to store information about inline cost of a 2055ffd83dbSDimitry Andric // particular instruction 2065ffd83dbSDimitry Andric struct InstructionCostDetail { 2075ffd83dbSDimitry Andric int CostBefore = 0; 2085ffd83dbSDimitry Andric int CostAfter = 0; 2095ffd83dbSDimitry Andric int ThresholdBefore = 0; 2105ffd83dbSDimitry Andric int ThresholdAfter = 0; 2115ffd83dbSDimitry Andric 2125ffd83dbSDimitry Andric int getThresholdDelta() const { return ThresholdAfter - ThresholdBefore; } 2135ffd83dbSDimitry Andric 2145ffd83dbSDimitry Andric int getCostDelta() const { return CostAfter - CostBefore; } 2155ffd83dbSDimitry Andric 2165ffd83dbSDimitry Andric bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; } 2175ffd83dbSDimitry Andric }; 2185ffd83dbSDimitry Andric 2195ffd83dbSDimitry Andric class InlineCostAnnotationWriter : public AssemblyAnnotationWriter { 2205ffd83dbSDimitry Andric private: 2215ffd83dbSDimitry Andric InlineCostCallAnalyzer *const ICCA; 2225ffd83dbSDimitry Andric 2235ffd83dbSDimitry Andric public: 2245ffd83dbSDimitry Andric InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {} 225972a253aSDimitry Andric void emitInstructionAnnot(const Instruction *I, 2265ffd83dbSDimitry Andric formatted_raw_ostream &OS) override; 2275ffd83dbSDimitry Andric }; 2285ffd83dbSDimitry Andric 2295ffd83dbSDimitry Andric /// Carry out call site analysis, in order to evaluate inlinability. 2305ffd83dbSDimitry Andric /// NOTE: the type is currently used as implementation detail of functions such 2315ffd83dbSDimitry Andric /// as llvm::getInlineCost. Note the function_ref constructor parameters - the 2325ffd83dbSDimitry Andric /// expectation is that they come from the outer scope, from the wrapper 2335ffd83dbSDimitry Andric /// functions. If we want to support constructing CallAnalyzer objects where 2345ffd83dbSDimitry Andric /// lambdas are provided inline at construction, or where the object needs to 2355ffd83dbSDimitry Andric /// otherwise survive past the scope of the provided functions, we need to 2365ffd83dbSDimitry Andric /// revisit the argument types. 2370b57cec5SDimitry Andric class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { 2380b57cec5SDimitry Andric typedef InstVisitor<CallAnalyzer, bool> Base; 2390b57cec5SDimitry Andric friend class InstVisitor<CallAnalyzer, bool>; 2400b57cec5SDimitry Andric 241480093f4SDimitry Andric protected: 24281ad6265SDimitry Andric virtual ~CallAnalyzer() = default; 2430b57cec5SDimitry Andric /// The TargetTransformInfo available for this compilation. 2440b57cec5SDimitry Andric const TargetTransformInfo &TTI; 2450b57cec5SDimitry Andric 2460b57cec5SDimitry Andric /// Getter for the cache of @llvm.assume intrinsics. 2475ffd83dbSDimitry Andric function_ref<AssumptionCache &(Function &)> GetAssumptionCache; 2480b57cec5SDimitry Andric 2490b57cec5SDimitry Andric /// Getter for BlockFrequencyInfo 2505ffd83dbSDimitry Andric function_ref<BlockFrequencyInfo &(Function &)> GetBFI; 2510b57cec5SDimitry Andric 2520b57cec5SDimitry Andric /// Profile summary information. 2530b57cec5SDimitry Andric ProfileSummaryInfo *PSI; 2540b57cec5SDimitry Andric 2550b57cec5SDimitry Andric /// The called function. 2560b57cec5SDimitry Andric Function &F; 2570b57cec5SDimitry Andric 2580b57cec5SDimitry Andric // Cache the DataLayout since we use it a lot. 2590b57cec5SDimitry Andric const DataLayout &DL; 2600b57cec5SDimitry Andric 2610b57cec5SDimitry Andric /// The OptimizationRemarkEmitter available for this compilation. 2620b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE; 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andric /// The candidate callsite being analyzed. Please do not use this to do 2650b57cec5SDimitry Andric /// analysis in the caller function; we want the inline cost query to be 2660b57cec5SDimitry Andric /// easily cacheable. Instead, use the cover function paramHasAttr. 2670b57cec5SDimitry Andric CallBase &CandidateCall; 2680b57cec5SDimitry Andric 269480093f4SDimitry Andric /// Extension points for handling callsite features. 270e8d8bef9SDimitry Andric // Called before a basic block was analyzed. 271e8d8bef9SDimitry Andric virtual void onBlockStart(const BasicBlock *BB) {} 272e8d8bef9SDimitry Andric 273480093f4SDimitry Andric /// Called after a basic block was analyzed. 274480093f4SDimitry Andric virtual void onBlockAnalyzed(const BasicBlock *BB) {} 2750b57cec5SDimitry Andric 2765ffd83dbSDimitry Andric /// Called before an instruction was analyzed 2775ffd83dbSDimitry Andric virtual void onInstructionAnalysisStart(const Instruction *I) {} 2785ffd83dbSDimitry Andric 2795ffd83dbSDimitry Andric /// Called after an instruction was analyzed 2805ffd83dbSDimitry Andric virtual void onInstructionAnalysisFinish(const Instruction *I) {} 2815ffd83dbSDimitry Andric 282480093f4SDimitry Andric /// Called at the end of the analysis of the callsite. Return the outcome of 283480093f4SDimitry Andric /// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or 284480093f4SDimitry Andric /// the reason it can't. 2855ffd83dbSDimitry Andric virtual InlineResult finalizeAnalysis() { return InlineResult::success(); } 286480093f4SDimitry Andric /// Called when we're about to start processing a basic block, and every time 287480093f4SDimitry Andric /// we are done processing an instruction. Return true if there is no point in 288480093f4SDimitry Andric /// continuing the analysis (e.g. we've determined already the call site is 289480093f4SDimitry Andric /// too expensive to inline) 290480093f4SDimitry Andric virtual bool shouldStop() { return false; } 2910b57cec5SDimitry Andric 292480093f4SDimitry Andric /// Called before the analysis of the callee body starts (with callsite 293480093f4SDimitry Andric /// contexts propagated). It checks callsite-specific information. Return a 294480093f4SDimitry Andric /// reason analysis can't continue if that's the case, or 'true' if it may 295480093f4SDimitry Andric /// continue. 2965ffd83dbSDimitry Andric virtual InlineResult onAnalysisStart() { return InlineResult::success(); } 297480093f4SDimitry Andric /// Called if the analysis engine decides SROA cannot be done for the given 298480093f4SDimitry Andric /// alloca. 299480093f4SDimitry Andric virtual void onDisableSROA(AllocaInst *Arg) {} 300480093f4SDimitry Andric 301480093f4SDimitry Andric /// Called the analysis engine determines load elimination won't happen. 302480093f4SDimitry Andric virtual void onDisableLoadElimination() {} 303480093f4SDimitry Andric 304349cc55cSDimitry Andric /// Called when we visit a CallBase, before the analysis starts. Return false 305349cc55cSDimitry Andric /// to stop further processing of the instruction. 306349cc55cSDimitry Andric virtual bool onCallBaseVisitStart(CallBase &Call) { return true; } 307349cc55cSDimitry Andric 308480093f4SDimitry Andric /// Called to account for a call. 309480093f4SDimitry Andric virtual void onCallPenalty() {} 310480093f4SDimitry Andric 311bdd1243dSDimitry Andric /// Called to account for a load or store. 312bdd1243dSDimitry Andric virtual void onMemAccess(){}; 313bdd1243dSDimitry Andric 314480093f4SDimitry Andric /// Called to account for the expectation the inlining would result in a load 315480093f4SDimitry Andric /// elimination. 316480093f4SDimitry Andric virtual void onLoadEliminationOpportunity() {} 317480093f4SDimitry Andric 318480093f4SDimitry Andric /// Called to account for the cost of argument setup for the Call in the 319480093f4SDimitry Andric /// callee's body (not the callsite currently under analysis). 320480093f4SDimitry Andric virtual void onCallArgumentSetup(const CallBase &Call) {} 321480093f4SDimitry Andric 322480093f4SDimitry Andric /// Called to account for a load relative intrinsic. 323480093f4SDimitry Andric virtual void onLoadRelativeIntrinsic() {} 324480093f4SDimitry Andric 325480093f4SDimitry Andric /// Called to account for a lowered call. 326480093f4SDimitry Andric virtual void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) { 327480093f4SDimitry Andric } 328480093f4SDimitry Andric 329480093f4SDimitry Andric /// Account for a jump table of given size. Return false to stop further 330480093f4SDimitry Andric /// processing the switch instruction 331480093f4SDimitry Andric virtual bool onJumpTable(unsigned JumpTableSize) { return true; } 332480093f4SDimitry Andric 333480093f4SDimitry Andric /// Account for a case cluster of given size. Return false to stop further 334480093f4SDimitry Andric /// processing of the instruction. 335480093f4SDimitry Andric virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; } 336480093f4SDimitry Andric 337480093f4SDimitry Andric /// Called at the end of processing a switch instruction, with the given 338480093f4SDimitry Andric /// number of case clusters. 339*0fca6ea1SDimitry Andric virtual void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster, 340*0fca6ea1SDimitry Andric bool DefaultDestUndefined) {} 341480093f4SDimitry Andric 342480093f4SDimitry Andric /// Called to account for any other instruction not specifically accounted 343480093f4SDimitry Andric /// for. 3445ffd83dbSDimitry Andric virtual void onMissedSimplification() {} 345480093f4SDimitry Andric 346480093f4SDimitry Andric /// Start accounting potential benefits due to SROA for the given alloca. 347480093f4SDimitry Andric virtual void onInitializeSROAArg(AllocaInst *Arg) {} 348480093f4SDimitry Andric 349480093f4SDimitry Andric /// Account SROA savings for the AllocaInst value. 350480093f4SDimitry Andric virtual void onAggregateSROAUse(AllocaInst *V) {} 351480093f4SDimitry Andric 352480093f4SDimitry Andric bool handleSROA(Value *V, bool DoNotDisable) { 353480093f4SDimitry Andric // Check for SROA candidates in comparisons. 354480093f4SDimitry Andric if (auto *SROAArg = getSROAArgForValueOrNull(V)) { 355480093f4SDimitry Andric if (DoNotDisable) { 356480093f4SDimitry Andric onAggregateSROAUse(SROAArg); 357480093f4SDimitry Andric return true; 358480093f4SDimitry Andric } 359480093f4SDimitry Andric disableSROAForArg(SROAArg); 360480093f4SDimitry Andric } 361480093f4SDimitry Andric return false; 362480093f4SDimitry Andric } 3630b57cec5SDimitry Andric 3640b57cec5SDimitry Andric bool IsCallerRecursive = false; 3650b57cec5SDimitry Andric bool IsRecursiveCall = false; 3660b57cec5SDimitry Andric bool ExposesReturnsTwice = false; 3670b57cec5SDimitry Andric bool HasDynamicAlloca = false; 3680b57cec5SDimitry Andric bool ContainsNoDuplicateCall = false; 3690b57cec5SDimitry Andric bool HasReturn = false; 3700b57cec5SDimitry Andric bool HasIndirectBr = false; 3710b57cec5SDimitry Andric bool HasUninlineableIntrinsic = false; 3720b57cec5SDimitry Andric bool InitsVargArgs = false; 3730b57cec5SDimitry Andric 3740b57cec5SDimitry Andric /// Number of bytes allocated statically by the callee. 3750b57cec5SDimitry Andric uint64_t AllocatedSize = 0; 3760b57cec5SDimitry Andric unsigned NumInstructions = 0; 3770b57cec5SDimitry Andric unsigned NumVectorInstructions = 0; 3780b57cec5SDimitry Andric 3790b57cec5SDimitry Andric /// While we walk the potentially-inlined instructions, we build up and 3800b57cec5SDimitry Andric /// maintain a mapping of simplified values specific to this callsite. The 3810b57cec5SDimitry Andric /// idea is to propagate any special information we have about arguments to 3820b57cec5SDimitry Andric /// this call through the inlinable section of the function, and account for 3830b57cec5SDimitry Andric /// likely simplifications post-inlining. The most important aspect we track 3840b57cec5SDimitry Andric /// is CFG altering simplifications -- when we prove a basic block dead, that 3850b57cec5SDimitry Andric /// can cause dramatic shifts in the cost of inlining a function. 3860b57cec5SDimitry Andric DenseMap<Value *, Constant *> SimplifiedValues; 3870b57cec5SDimitry Andric 3880b57cec5SDimitry Andric /// Keep track of the values which map back (through function arguments) to 3890b57cec5SDimitry Andric /// allocas on the caller stack which could be simplified through SROA. 390480093f4SDimitry Andric DenseMap<Value *, AllocaInst *> SROAArgValues; 3910b57cec5SDimitry Andric 392480093f4SDimitry Andric /// Keep track of Allocas for which we believe we may get SROA optimization. 3935ffd83dbSDimitry Andric DenseSet<AllocaInst *> EnabledSROAAllocas; 3940b57cec5SDimitry Andric 3950b57cec5SDimitry Andric /// Keep track of values which map to a pointer base and constant offset. 3960b57cec5SDimitry Andric DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs; 3970b57cec5SDimitry Andric 3980b57cec5SDimitry Andric /// Keep track of dead blocks due to the constant arguments. 39981ad6265SDimitry Andric SmallPtrSet<BasicBlock *, 16> DeadBlocks; 4000b57cec5SDimitry Andric 4010b57cec5SDimitry Andric /// The mapping of the blocks to their known unique successors due to the 4020b57cec5SDimitry Andric /// constant arguments. 4030b57cec5SDimitry Andric DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors; 4040b57cec5SDimitry Andric 4050b57cec5SDimitry Andric /// Model the elimination of repeated loads that is expected to happen 4060b57cec5SDimitry Andric /// whenever we simplify away the stores that would otherwise cause them to be 4070b57cec5SDimitry Andric /// loads. 40804eeddc0SDimitry Andric bool EnableLoadElimination = true; 409349cc55cSDimitry Andric 410349cc55cSDimitry Andric /// Whether we allow inlining for recursive call. 41104eeddc0SDimitry Andric bool AllowRecursiveCall = false; 412349cc55cSDimitry Andric 4130b57cec5SDimitry Andric SmallPtrSet<Value *, 16> LoadAddrSet; 414480093f4SDimitry Andric 415480093f4SDimitry Andric AllocaInst *getSROAArgForValueOrNull(Value *V) const { 416480093f4SDimitry Andric auto It = SROAArgValues.find(V); 4175ffd83dbSDimitry Andric if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0) 418480093f4SDimitry Andric return nullptr; 419480093f4SDimitry Andric return It->second; 420480093f4SDimitry Andric } 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric // Custom simplification helper routines. 4230b57cec5SDimitry Andric bool isAllocaDerivedArg(Value *V); 424480093f4SDimitry Andric void disableSROAForArg(AllocaInst *SROAArg); 4250b57cec5SDimitry Andric void disableSROA(Value *V); 4260b57cec5SDimitry Andric void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB); 4270b57cec5SDimitry Andric void disableLoadElimination(); 4280b57cec5SDimitry Andric bool isGEPFree(GetElementPtrInst &GEP); 4290b57cec5SDimitry Andric bool canFoldInboundsGEP(GetElementPtrInst &I); 4300b57cec5SDimitry Andric bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); 4310b57cec5SDimitry Andric bool simplifyCallSite(Function *F, CallBase &Call); 43281ad6265SDimitry Andric bool simplifyInstruction(Instruction &I); 433349cc55cSDimitry Andric bool simplifyIntrinsicCallIsConstant(CallBase &CB); 434bdd1243dSDimitry Andric bool simplifyIntrinsicCallObjectSize(CallBase &CB); 4350b57cec5SDimitry Andric ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); 4360b57cec5SDimitry Andric 4370b57cec5SDimitry Andric /// Return true if the given argument to the function being considered for 4380b57cec5SDimitry Andric /// inlining has the given attribute set either at the call site or the 4390b57cec5SDimitry Andric /// function declaration. Primarily used to inspect call site specific 4400b57cec5SDimitry Andric /// attributes since these can be more precise than the ones on the callee 4410b57cec5SDimitry Andric /// itself. 4420b57cec5SDimitry Andric bool paramHasAttr(Argument *A, Attribute::AttrKind Attr); 4430b57cec5SDimitry Andric 4440b57cec5SDimitry Andric /// Return true if the given value is known non null within the callee if 4450b57cec5SDimitry Andric /// inlined through this particular callsite. 4460b57cec5SDimitry Andric bool isKnownNonNullInCallee(Value *V); 4470b57cec5SDimitry Andric 4480b57cec5SDimitry Andric /// Return true if size growth is allowed when inlining the callee at \p Call. 4490b57cec5SDimitry Andric bool allowSizeGrowth(CallBase &Call); 4500b57cec5SDimitry Andric 4510b57cec5SDimitry Andric // Custom analysis routines. 4520b57cec5SDimitry Andric InlineResult analyzeBlock(BasicBlock *BB, 4530b57cec5SDimitry Andric SmallPtrSetImpl<const Value *> &EphValues); 4540b57cec5SDimitry Andric 4550b57cec5SDimitry Andric // Disable several entry points to the visitor so we don't accidentally use 4560b57cec5SDimitry Andric // them by declaring but not defining them here. 4570b57cec5SDimitry Andric void visit(Module *); 4580b57cec5SDimitry Andric void visit(Module &); 4590b57cec5SDimitry Andric void visit(Function *); 4600b57cec5SDimitry Andric void visit(Function &); 4610b57cec5SDimitry Andric void visit(BasicBlock *); 4620b57cec5SDimitry Andric void visit(BasicBlock &); 4630b57cec5SDimitry Andric 4640b57cec5SDimitry Andric // Provide base case for our instruction visit. 4650b57cec5SDimitry Andric bool visitInstruction(Instruction &I); 4660b57cec5SDimitry Andric 4670b57cec5SDimitry Andric // Our visit overrides. 4680b57cec5SDimitry Andric bool visitAlloca(AllocaInst &I); 4690b57cec5SDimitry Andric bool visitPHI(PHINode &I); 4700b57cec5SDimitry Andric bool visitGetElementPtr(GetElementPtrInst &I); 4710b57cec5SDimitry Andric bool visitBitCast(BitCastInst &I); 4720b57cec5SDimitry Andric bool visitPtrToInt(PtrToIntInst &I); 4730b57cec5SDimitry Andric bool visitIntToPtr(IntToPtrInst &I); 4740b57cec5SDimitry Andric bool visitCastInst(CastInst &I); 4750b57cec5SDimitry Andric bool visitCmpInst(CmpInst &I); 4760b57cec5SDimitry Andric bool visitSub(BinaryOperator &I); 4770b57cec5SDimitry Andric bool visitBinaryOperator(BinaryOperator &I); 4780b57cec5SDimitry Andric bool visitFNeg(UnaryOperator &I); 4790b57cec5SDimitry Andric bool visitLoad(LoadInst &I); 4800b57cec5SDimitry Andric bool visitStore(StoreInst &I); 4810b57cec5SDimitry Andric bool visitExtractValue(ExtractValueInst &I); 4820b57cec5SDimitry Andric bool visitInsertValue(InsertValueInst &I); 4830b57cec5SDimitry Andric bool visitCallBase(CallBase &Call); 4840b57cec5SDimitry Andric bool visitReturnInst(ReturnInst &RI); 4850b57cec5SDimitry Andric bool visitBranchInst(BranchInst &BI); 4860b57cec5SDimitry Andric bool visitSelectInst(SelectInst &SI); 4870b57cec5SDimitry Andric bool visitSwitchInst(SwitchInst &SI); 4880b57cec5SDimitry Andric bool visitIndirectBrInst(IndirectBrInst &IBI); 4890b57cec5SDimitry Andric bool visitResumeInst(ResumeInst &RI); 4900b57cec5SDimitry Andric bool visitCleanupReturnInst(CleanupReturnInst &RI); 4910b57cec5SDimitry Andric bool visitCatchReturnInst(CatchReturnInst &RI); 4920b57cec5SDimitry Andric bool visitUnreachableInst(UnreachableInst &I); 4930b57cec5SDimitry Andric 4940b57cec5SDimitry Andric public: 495fe6060f1SDimitry Andric CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI, 4965ffd83dbSDimitry Andric function_ref<AssumptionCache &(Function &)> GetAssumptionCache, 4975ffd83dbSDimitry Andric function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr, 4985ffd83dbSDimitry Andric ProfileSummaryInfo *PSI = nullptr, 4995ffd83dbSDimitry Andric OptimizationRemarkEmitter *ORE = nullptr) 5000b57cec5SDimitry Andric : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), 501*0fca6ea1SDimitry Andric PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE), 50204eeddc0SDimitry Andric CandidateCall(Call) {} 5030b57cec5SDimitry Andric 504480093f4SDimitry Andric InlineResult analyze(); 5050b57cec5SDimitry Andric 506bdd1243dSDimitry Andric std::optional<Constant *> getSimplifiedValue(Instruction *I) { 50706c3fb27SDimitry Andric if (SimplifiedValues.contains(I)) 5085ffd83dbSDimitry Andric return SimplifiedValues[I]; 509bdd1243dSDimitry Andric return std::nullopt; 5105ffd83dbSDimitry Andric } 5115ffd83dbSDimitry Andric 5120b57cec5SDimitry Andric // Keep a bunch of stats about the cost savings found so we can print them 5130b57cec5SDimitry Andric // out when debugging. 5140b57cec5SDimitry Andric unsigned NumConstantArgs = 0; 5150b57cec5SDimitry Andric unsigned NumConstantOffsetPtrArgs = 0; 5160b57cec5SDimitry Andric unsigned NumAllocaArgs = 0; 5170b57cec5SDimitry Andric unsigned NumConstantPtrCmps = 0; 5180b57cec5SDimitry Andric unsigned NumConstantPtrDiffs = 0; 5190b57cec5SDimitry Andric unsigned NumInstructionsSimplified = 0; 5200b57cec5SDimitry Andric 5210b57cec5SDimitry Andric void dump(); 5220b57cec5SDimitry Andric }; 5230b57cec5SDimitry Andric 524fe6060f1SDimitry Andric // Considering forming a binary search, we should find the number of nodes 525fe6060f1SDimitry Andric // which is same as the number of comparisons when lowered. For a given 526fe6060f1SDimitry Andric // number of clusters, n, we can define a recursive function, f(n), to find 527fe6060f1SDimitry Andric // the number of nodes in the tree. The recursion is : 528fe6060f1SDimitry Andric // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3, 529fe6060f1SDimitry Andric // and f(n) = n, when n <= 3. 530fe6060f1SDimitry Andric // This will lead a binary tree where the leaf should be either f(2) or f(3) 531fe6060f1SDimitry Andric // when n > 3. So, the number of comparisons from leaves should be n, while 532fe6060f1SDimitry Andric // the number of non-leaf should be : 533fe6060f1SDimitry Andric // 2^(log2(n) - 1) - 1 534fe6060f1SDimitry Andric // = 2^log2(n) * 2^-1 - 1 535fe6060f1SDimitry Andric // = n / 2 - 1. 536fe6060f1SDimitry Andric // Considering comparisons from leaf and non-leaf nodes, we can estimate the 537fe6060f1SDimitry Andric // number of comparisons in a simple closed form : 538fe6060f1SDimitry Andric // n + n / 2 - 1 = n * 3 / 2 - 1 539fe6060f1SDimitry Andric int64_t getExpectedNumberOfCompare(int NumCaseCluster) { 540fe6060f1SDimitry Andric return 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1; 541fe6060f1SDimitry Andric } 542fe6060f1SDimitry Andric 543480093f4SDimitry Andric /// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note 544480093f4SDimitry Andric /// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer 545480093f4SDimitry Andric class InlineCostCallAnalyzer final : public CallAnalyzer { 546480093f4SDimitry Andric const bool ComputeFullInlineCost; 547480093f4SDimitry Andric int LoadEliminationCost = 0; 548480093f4SDimitry Andric /// Bonus to be applied when percentage of vector instructions in callee is 549480093f4SDimitry Andric /// high (see more details in updateThreshold). 550480093f4SDimitry Andric int VectorBonus = 0; 551480093f4SDimitry Andric /// Bonus to be applied when the callee has only one reachable basic block. 552480093f4SDimitry Andric int SingleBBBonus = 0; 553480093f4SDimitry Andric 554480093f4SDimitry Andric /// Tunable parameters that control the analysis. 555480093f4SDimitry Andric const InlineParams &Params; 556480093f4SDimitry Andric 5575ffd83dbSDimitry Andric // This DenseMap stores the delta change in cost and threshold after 5585ffd83dbSDimitry Andric // accounting for the given instruction. The map is filled only with the 5595ffd83dbSDimitry Andric // flag PrintInstructionComments on. 5605ffd83dbSDimitry Andric DenseMap<const Instruction *, InstructionCostDetail> InstructionCostDetailMap; 5615ffd83dbSDimitry Andric 562480093f4SDimitry Andric /// Upper bound for the inlining cost. Bonuses are being applied to account 563480093f4SDimitry Andric /// for speculative "expected profit" of the inlining decision. 564480093f4SDimitry Andric int Threshold = 0; 565480093f4SDimitry Andric 566bdd1243dSDimitry Andric /// The amount of StaticBonus applied. 567bdd1243dSDimitry Andric int StaticBonusApplied = 0; 568bdd1243dSDimitry Andric 569480093f4SDimitry Andric /// Attempt to evaluate indirect calls to boost its inline cost. 570480093f4SDimitry Andric const bool BoostIndirectCalls; 571480093f4SDimitry Andric 5725ffd83dbSDimitry Andric /// Ignore the threshold when finalizing analysis. 5735ffd83dbSDimitry Andric const bool IgnoreThreshold; 5745ffd83dbSDimitry Andric 575e8d8bef9SDimitry Andric // True if the cost-benefit-analysis-based inliner is enabled. 576e8d8bef9SDimitry Andric const bool CostBenefitAnalysisEnabled; 577e8d8bef9SDimitry Andric 578480093f4SDimitry Andric /// Inlining cost measured in abstract units, accounts for all the 579480093f4SDimitry Andric /// instructions expected to be executed for a given function invocation. 580480093f4SDimitry Andric /// Instructions that are statically proven to be dead based on call-site 581480093f4SDimitry Andric /// arguments are not counted here. 582480093f4SDimitry Andric int Cost = 0; 583480093f4SDimitry Andric 584e8d8bef9SDimitry Andric // The cumulative cost at the beginning of the basic block being analyzed. At 585e8d8bef9SDimitry Andric // the end of analyzing each basic block, "Cost - CostAtBBStart" represents 586e8d8bef9SDimitry Andric // the size of that basic block. 587e8d8bef9SDimitry Andric int CostAtBBStart = 0; 588e8d8bef9SDimitry Andric 589e8d8bef9SDimitry Andric // The static size of live but cold basic blocks. This is "static" in the 590e8d8bef9SDimitry Andric // sense that it's not weighted by profile counts at all. 591e8d8bef9SDimitry Andric int ColdSize = 0; 592e8d8bef9SDimitry Andric 593349cc55cSDimitry Andric // Whether inlining is decided by cost-threshold analysis. 594349cc55cSDimitry Andric bool DecidedByCostThreshold = false; 595349cc55cSDimitry Andric 596fe6060f1SDimitry Andric // Whether inlining is decided by cost-benefit analysis. 597fe6060f1SDimitry Andric bool DecidedByCostBenefit = false; 598fe6060f1SDimitry Andric 599fe6060f1SDimitry Andric // The cost-benefit pair computed by cost-benefit analysis. 600bdd1243dSDimitry Andric std::optional<CostBenefitPair> CostBenefit; 601fe6060f1SDimitry Andric 602480093f4SDimitry Andric bool SingleBB = true; 603480093f4SDimitry Andric 604480093f4SDimitry Andric unsigned SROACostSavings = 0; 605480093f4SDimitry Andric unsigned SROACostSavingsLost = 0; 606480093f4SDimitry Andric 607480093f4SDimitry Andric /// The mapping of caller Alloca values to their accumulated cost savings. If 608480093f4SDimitry Andric /// we have to disable SROA for one of the allocas, this tells us how much 609480093f4SDimitry Andric /// cost must be added. 610480093f4SDimitry Andric DenseMap<AllocaInst *, int> SROAArgCosts; 611480093f4SDimitry Andric 612480093f4SDimitry Andric /// Return true if \p Call is a cold callsite. 613480093f4SDimitry Andric bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI); 614480093f4SDimitry Andric 615480093f4SDimitry Andric /// Update Threshold based on callsite properties such as callee 616480093f4SDimitry Andric /// attributes and callee hotness for PGO builds. The Callee is explicitly 617480093f4SDimitry Andric /// passed to support analyzing indirect calls whose target is inferred by 618480093f4SDimitry Andric /// analysis. 619480093f4SDimitry Andric void updateThreshold(CallBase &Call, Function &Callee); 620480093f4SDimitry Andric /// Return a higher threshold if \p Call is a hot callsite. 621bdd1243dSDimitry Andric std::optional<int> getHotCallSiteThreshold(CallBase &Call, 622480093f4SDimitry Andric BlockFrequencyInfo *CallerBFI); 623480093f4SDimitry Andric 624480093f4SDimitry Andric /// Handle a capped 'int' increment for Cost. 625bdd1243dSDimitry Andric void addCost(int64_t Inc) { 6265f757f3fSDimitry Andric Inc = std::clamp<int64_t>(Inc, INT_MIN, INT_MAX); 6275f757f3fSDimitry Andric Cost = std::clamp<int64_t>(Inc + Cost, INT_MIN, INT_MAX); 628480093f4SDimitry Andric } 629480093f4SDimitry Andric 630480093f4SDimitry Andric void onDisableSROA(AllocaInst *Arg) override { 631480093f4SDimitry Andric auto CostIt = SROAArgCosts.find(Arg); 632480093f4SDimitry Andric if (CostIt == SROAArgCosts.end()) 633480093f4SDimitry Andric return; 634480093f4SDimitry Andric addCost(CostIt->second); 635480093f4SDimitry Andric SROACostSavings -= CostIt->second; 636480093f4SDimitry Andric SROACostSavingsLost += CostIt->second; 637480093f4SDimitry Andric SROAArgCosts.erase(CostIt); 638480093f4SDimitry Andric } 639480093f4SDimitry Andric 640480093f4SDimitry Andric void onDisableLoadElimination() override { 641480093f4SDimitry Andric addCost(LoadEliminationCost); 642480093f4SDimitry Andric LoadEliminationCost = 0; 643480093f4SDimitry Andric } 644349cc55cSDimitry Andric 645349cc55cSDimitry Andric bool onCallBaseVisitStart(CallBase &Call) override { 646bdd1243dSDimitry Andric if (std::optional<int> AttrCallThresholdBonus = 647349cc55cSDimitry Andric getStringFnAttrAsInt(Call, "call-threshold-bonus")) 648349cc55cSDimitry Andric Threshold += *AttrCallThresholdBonus; 649349cc55cSDimitry Andric 650bdd1243dSDimitry Andric if (std::optional<int> AttrCallCost = 651349cc55cSDimitry Andric getStringFnAttrAsInt(Call, "call-inline-cost")) { 652349cc55cSDimitry Andric addCost(*AttrCallCost); 653349cc55cSDimitry Andric // Prevent further processing of the call since we want to override its 654349cc55cSDimitry Andric // inline cost, not just add to it. 655349cc55cSDimitry Andric return false; 656349cc55cSDimitry Andric } 657349cc55cSDimitry Andric return true; 658349cc55cSDimitry Andric } 659349cc55cSDimitry Andric 660fe6060f1SDimitry Andric void onCallPenalty() override { addCost(CallPenalty); } 661bdd1243dSDimitry Andric 662bdd1243dSDimitry Andric void onMemAccess() override { addCost(MemAccessCost); } 663bdd1243dSDimitry Andric 664480093f4SDimitry Andric void onCallArgumentSetup(const CallBase &Call) override { 665480093f4SDimitry Andric // Pay the price of the argument setup. We account for the average 1 666480093f4SDimitry Andric // instruction per call argument setup here. 667bdd1243dSDimitry Andric addCost(Call.arg_size() * InstrCost); 668480093f4SDimitry Andric } 669480093f4SDimitry Andric void onLoadRelativeIntrinsic() override { 670480093f4SDimitry Andric // This is normally lowered to 4 LLVM instructions. 671bdd1243dSDimitry Andric addCost(3 * InstrCost); 672480093f4SDimitry Andric } 673480093f4SDimitry Andric void onLoweredCall(Function *F, CallBase &Call, 674480093f4SDimitry Andric bool IsIndirectCall) override { 675480093f4SDimitry Andric // We account for the average 1 instruction per call argument setup here. 676bdd1243dSDimitry Andric addCost(Call.arg_size() * InstrCost); 677480093f4SDimitry Andric 678480093f4SDimitry Andric // If we have a constant that we are calling as a function, we can peer 679480093f4SDimitry Andric // through it and see the function target. This happens not infrequently 680480093f4SDimitry Andric // during devirtualization and so we want to give it a hefty bonus for 681480093f4SDimitry Andric // inlining, but cap that bonus in the event that inlining wouldn't pan out. 682480093f4SDimitry Andric // Pretend to inline the function, with a custom threshold. 683480093f4SDimitry Andric if (IsIndirectCall && BoostIndirectCalls) { 684480093f4SDimitry Andric auto IndirectCallParams = Params; 685480093f4SDimitry Andric IndirectCallParams.DefaultThreshold = 686480093f4SDimitry Andric InlineConstants::IndirectCallThreshold; 687480093f4SDimitry Andric /// FIXME: if InlineCostCallAnalyzer is derived from, this may need 688480093f4SDimitry Andric /// to instantiate the derived class. 6895ffd83dbSDimitry Andric InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI, 6905ffd83dbSDimitry Andric GetAssumptionCache, GetBFI, PSI, ORE, false); 6915ffd83dbSDimitry Andric if (CA.analyze().isSuccess()) { 692480093f4SDimitry Andric // We were able to inline the indirect call! Subtract the cost from the 693480093f4SDimitry Andric // threshold to get the bonus we want to apply, but don't go below zero. 694480093f4SDimitry Andric Cost -= std::max(0, CA.getThreshold() - CA.getCost()); 695480093f4SDimitry Andric } 696480093f4SDimitry Andric } else 697480093f4SDimitry Andric // Otherwise simply add the cost for merely making the call. 6985f757f3fSDimitry Andric addCost(TTI.getInlineCallPenalty(CandidateCall.getCaller(), Call, 6995f757f3fSDimitry Andric CallPenalty)); 700480093f4SDimitry Andric } 701480093f4SDimitry Andric 702*0fca6ea1SDimitry Andric void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster, 703*0fca6ea1SDimitry Andric bool DefaultDestUndefined) override { 704480093f4SDimitry Andric // If suitable for a jump table, consider the cost for the table size and 705480093f4SDimitry Andric // branch to destination. 706480093f4SDimitry Andric // Maximum valid cost increased in this function. 707480093f4SDimitry Andric if (JumpTableSize) { 708*0fca6ea1SDimitry Andric // Suppose a default branch includes one compare and one conditional 709*0fca6ea1SDimitry Andric // branch if it's reachable. 710*0fca6ea1SDimitry Andric if (!DefaultDestUndefined) 711*0fca6ea1SDimitry Andric addCost(2 * InstrCost); 712*0fca6ea1SDimitry Andric // Suppose a jump table requires one load and one jump instruction. 713fe6060f1SDimitry Andric int64_t JTCost = 714*0fca6ea1SDimitry Andric static_cast<int64_t>(JumpTableSize) * InstrCost + 2 * InstrCost; 715bdd1243dSDimitry Andric addCost(JTCost); 716480093f4SDimitry Andric return; 717480093f4SDimitry Andric } 718fe6060f1SDimitry Andric 719480093f4SDimitry Andric if (NumCaseCluster <= 3) { 720480093f4SDimitry Andric // Suppose a comparison includes one compare and one conditional branch. 721*0fca6ea1SDimitry Andric // We can reduce a set of instructions if the default branch is 722*0fca6ea1SDimitry Andric // undefined. 723*0fca6ea1SDimitry Andric addCost((NumCaseCluster - DefaultDestUndefined) * 2 * InstrCost); 724480093f4SDimitry Andric return; 725480093f4SDimitry Andric } 726480093f4SDimitry Andric 727fe6060f1SDimitry Andric int64_t ExpectedNumberOfCompare = 728fe6060f1SDimitry Andric getExpectedNumberOfCompare(NumCaseCluster); 729bdd1243dSDimitry Andric int64_t SwitchCost = ExpectedNumberOfCompare * 2 * InstrCost; 730480093f4SDimitry Andric 731bdd1243dSDimitry Andric addCost(SwitchCost); 732480093f4SDimitry Andric } 733bdd1243dSDimitry Andric void onMissedSimplification() override { addCost(InstrCost); } 734480093f4SDimitry Andric 735480093f4SDimitry Andric void onInitializeSROAArg(AllocaInst *Arg) override { 736480093f4SDimitry Andric assert(Arg != nullptr && 737480093f4SDimitry Andric "Should not initialize SROA costs for null value."); 73806c3fb27SDimitry Andric auto SROAArgCost = TTI.getCallerAllocaCost(&CandidateCall, Arg); 73906c3fb27SDimitry Andric SROACostSavings += SROAArgCost; 74006c3fb27SDimitry Andric SROAArgCosts[Arg] = SROAArgCost; 741480093f4SDimitry Andric } 742480093f4SDimitry Andric 743480093f4SDimitry Andric void onAggregateSROAUse(AllocaInst *SROAArg) override { 744480093f4SDimitry Andric auto CostIt = SROAArgCosts.find(SROAArg); 745480093f4SDimitry Andric assert(CostIt != SROAArgCosts.end() && 746480093f4SDimitry Andric "expected this argument to have a cost"); 747bdd1243dSDimitry Andric CostIt->second += InstrCost; 748bdd1243dSDimitry Andric SROACostSavings += InstrCost; 749480093f4SDimitry Andric } 750480093f4SDimitry Andric 751e8d8bef9SDimitry Andric void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; } 752e8d8bef9SDimitry Andric 753480093f4SDimitry Andric void onBlockAnalyzed(const BasicBlock *BB) override { 754e8d8bef9SDimitry Andric if (CostBenefitAnalysisEnabled) { 755e8d8bef9SDimitry Andric // Keep track of the static size of live but cold basic blocks. For now, 756e8d8bef9SDimitry Andric // we define a cold basic block to be one that's never executed. 757e8d8bef9SDimitry Andric assert(GetBFI && "GetBFI must be available"); 758e8d8bef9SDimitry Andric BlockFrequencyInfo *BFI = &(GetBFI(F)); 759e8d8bef9SDimitry Andric assert(BFI && "BFI must be available"); 760e8d8bef9SDimitry Andric auto ProfileCount = BFI->getBlockProfileCount(BB); 761bdd1243dSDimitry Andric if (*ProfileCount == 0) 762e8d8bef9SDimitry Andric ColdSize += Cost - CostAtBBStart; 763e8d8bef9SDimitry Andric } 764e8d8bef9SDimitry Andric 765480093f4SDimitry Andric auto *TI = BB->getTerminator(); 766480093f4SDimitry Andric // If we had any successors at this point, than post-inlining is likely to 767480093f4SDimitry Andric // have them as well. Note that we assume any basic blocks which existed 768480093f4SDimitry Andric // due to branches or switches which folded above will also fold after 769480093f4SDimitry Andric // inlining. 770480093f4SDimitry Andric if (SingleBB && TI->getNumSuccessors() > 1) { 771480093f4SDimitry Andric // Take off the bonus we applied to the threshold. 772480093f4SDimitry Andric Threshold -= SingleBBBonus; 773480093f4SDimitry Andric SingleBB = false; 774480093f4SDimitry Andric } 775480093f4SDimitry Andric } 7765ffd83dbSDimitry Andric 7775ffd83dbSDimitry Andric void onInstructionAnalysisStart(const Instruction *I) override { 7785ffd83dbSDimitry Andric // This function is called to store the initial cost of inlining before 7795ffd83dbSDimitry Andric // the given instruction was assessed. 7805ffd83dbSDimitry Andric if (!PrintInstructionComments) 7815ffd83dbSDimitry Andric return; 7825ffd83dbSDimitry Andric InstructionCostDetailMap[I].CostBefore = Cost; 7835ffd83dbSDimitry Andric InstructionCostDetailMap[I].ThresholdBefore = Threshold; 7845ffd83dbSDimitry Andric } 7855ffd83dbSDimitry Andric 7865ffd83dbSDimitry Andric void onInstructionAnalysisFinish(const Instruction *I) override { 7875ffd83dbSDimitry Andric // This function is called to find new values of cost and threshold after 7885ffd83dbSDimitry Andric // the instruction has been assessed. 7895ffd83dbSDimitry Andric if (!PrintInstructionComments) 7905ffd83dbSDimitry Andric return; 7915ffd83dbSDimitry Andric InstructionCostDetailMap[I].CostAfter = Cost; 7925ffd83dbSDimitry Andric InstructionCostDetailMap[I].ThresholdAfter = Threshold; 7935ffd83dbSDimitry Andric } 7945ffd83dbSDimitry Andric 795e8d8bef9SDimitry Andric bool isCostBenefitAnalysisEnabled() { 796e8d8bef9SDimitry Andric if (!PSI || !PSI->hasProfileSummary()) 797e8d8bef9SDimitry Andric return false; 798e8d8bef9SDimitry Andric 799e8d8bef9SDimitry Andric if (!GetBFI) 800e8d8bef9SDimitry Andric return false; 801e8d8bef9SDimitry Andric 802fe6060f1SDimitry Andric if (InlineEnableCostBenefitAnalysis.getNumOccurrences()) { 803fe6060f1SDimitry Andric // Honor the explicit request from the user. 804fe6060f1SDimitry Andric if (!InlineEnableCostBenefitAnalysis) 805fe6060f1SDimitry Andric return false; 806fe6060f1SDimitry Andric } else { 807fe6060f1SDimitry Andric // Otherwise, require instrumentation profile. 808*0fca6ea1SDimitry Andric if (!PSI->hasInstrumentationProfile()) 809fe6060f1SDimitry Andric return false; 810fe6060f1SDimitry Andric } 811fe6060f1SDimitry Andric 812e8d8bef9SDimitry Andric auto *Caller = CandidateCall.getParent()->getParent(); 813e8d8bef9SDimitry Andric if (!Caller->getEntryCount()) 814e8d8bef9SDimitry Andric return false; 815e8d8bef9SDimitry Andric 816e8d8bef9SDimitry Andric BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller)); 817e8d8bef9SDimitry Andric if (!CallerBFI) 818e8d8bef9SDimitry Andric return false; 819e8d8bef9SDimitry Andric 820e8d8bef9SDimitry Andric // For now, limit to hot call site. 821e8d8bef9SDimitry Andric if (!PSI->isHotCallSite(CandidateCall, CallerBFI)) 822e8d8bef9SDimitry Andric return false; 823e8d8bef9SDimitry Andric 824fe6060f1SDimitry Andric // Make sure we have a nonzero entry count. 825fe6060f1SDimitry Andric auto EntryCount = F.getEntryCount(); 826349cc55cSDimitry Andric if (!EntryCount || !EntryCount->getCount()) 827e8d8bef9SDimitry Andric return false; 828e8d8bef9SDimitry Andric 829e8d8bef9SDimitry Andric BlockFrequencyInfo *CalleeBFI = &(GetBFI(F)); 830e8d8bef9SDimitry Andric if (!CalleeBFI) 831e8d8bef9SDimitry Andric return false; 832e8d8bef9SDimitry Andric 833e8d8bef9SDimitry Andric return true; 834e8d8bef9SDimitry Andric } 835e8d8bef9SDimitry Andric 8365f757f3fSDimitry Andric // A helper function to choose between command line override and default. 8375f757f3fSDimitry Andric unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const { 8385f757f3fSDimitry Andric if (InlineSavingsMultiplier.getNumOccurrences()) 8395f757f3fSDimitry Andric return InlineSavingsMultiplier; 8405f757f3fSDimitry Andric return TTI.getInliningCostBenefitAnalysisSavingsMultiplier(); 8415f757f3fSDimitry Andric } 8425f757f3fSDimitry Andric 8435f757f3fSDimitry Andric // A helper function to choose between command line override and default. 8445f757f3fSDimitry Andric unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const { 8455f757f3fSDimitry Andric if (InlineSavingsProfitableMultiplier.getNumOccurrences()) 8465f757f3fSDimitry Andric return InlineSavingsProfitableMultiplier; 8475f757f3fSDimitry Andric return TTI.getInliningCostBenefitAnalysisProfitableMultiplier(); 8485f757f3fSDimitry Andric } 8495f757f3fSDimitry Andric 8505f757f3fSDimitry Andric void OverrideCycleSavingsAndSizeForTesting(APInt &CycleSavings, int &Size) { 8515f757f3fSDimitry Andric if (std::optional<int> AttrCycleSavings = getStringFnAttrAsInt( 8525f757f3fSDimitry Andric CandidateCall, "inline-cycle-savings-for-test")) { 8535f757f3fSDimitry Andric CycleSavings = *AttrCycleSavings; 8545f757f3fSDimitry Andric } 8555f757f3fSDimitry Andric 8565f757f3fSDimitry Andric if (std::optional<int> AttrRuntimeCost = getStringFnAttrAsInt( 8575f757f3fSDimitry Andric CandidateCall, "inline-runtime-cost-for-test")) { 8585f757f3fSDimitry Andric Size = *AttrRuntimeCost; 8595f757f3fSDimitry Andric } 8605f757f3fSDimitry Andric } 8615f757f3fSDimitry Andric 862e8d8bef9SDimitry Andric // Determine whether we should inline the given call site, taking into account 863bdd1243dSDimitry Andric // both the size cost and the cycle savings. Return std::nullopt if we don't 8645f757f3fSDimitry Andric // have sufficient profiling information to determine. 865bdd1243dSDimitry Andric std::optional<bool> costBenefitAnalysis() { 866e8d8bef9SDimitry Andric if (!CostBenefitAnalysisEnabled) 867bdd1243dSDimitry Andric return std::nullopt; 868e8d8bef9SDimitry Andric 869e8d8bef9SDimitry Andric // buildInlinerPipeline in the pass builder sets HotCallSiteThreshold to 0 870e8d8bef9SDimitry Andric // for the prelink phase of the AutoFDO + ThinLTO build. Honor the logic by 871e8d8bef9SDimitry Andric // falling back to the cost-based metric. 872e8d8bef9SDimitry Andric // TODO: Improve this hacky condition. 873e8d8bef9SDimitry Andric if (Threshold == 0) 874bdd1243dSDimitry Andric return std::nullopt; 875e8d8bef9SDimitry Andric 876e8d8bef9SDimitry Andric assert(GetBFI); 877e8d8bef9SDimitry Andric BlockFrequencyInfo *CalleeBFI = &(GetBFI(F)); 878e8d8bef9SDimitry Andric assert(CalleeBFI); 879e8d8bef9SDimitry Andric 880bdd1243dSDimitry Andric // The cycle savings expressed as the sum of InstrCost 881e8d8bef9SDimitry Andric // multiplied by the estimated dynamic count of each instruction we can 882e8d8bef9SDimitry Andric // avoid. Savings come from the call site cost, such as argument setup and 883e8d8bef9SDimitry Andric // the call instruction, as well as the instructions that are folded. 884e8d8bef9SDimitry Andric // 885e8d8bef9SDimitry Andric // We use 128-bit APInt here to avoid potential overflow. This variable 886e8d8bef9SDimitry Andric // should stay well below 10^^24 (or 2^^80) in practice. This "worst" case 887e8d8bef9SDimitry Andric // assumes that we can avoid or fold a billion instructions, each with a 888e8d8bef9SDimitry Andric // profile count of 10^^15 -- roughly the number of cycles for a 24-hour 889e8d8bef9SDimitry Andric // period on a 4GHz machine. 890e8d8bef9SDimitry Andric APInt CycleSavings(128, 0); 891e8d8bef9SDimitry Andric 892e8d8bef9SDimitry Andric for (auto &BB : F) { 893e8d8bef9SDimitry Andric APInt CurrentSavings(128, 0); 894e8d8bef9SDimitry Andric for (auto &I : BB) { 895e8d8bef9SDimitry Andric if (BranchInst *BI = dyn_cast<BranchInst>(&I)) { 896e8d8bef9SDimitry Andric // Count a conditional branch as savings if it becomes unconditional. 897e8d8bef9SDimitry Andric if (BI->isConditional() && 898349cc55cSDimitry Andric isa_and_nonnull<ConstantInt>( 899e8d8bef9SDimitry Andric SimplifiedValues.lookup(BI->getCondition()))) { 900bdd1243dSDimitry Andric CurrentSavings += InstrCost; 901e8d8bef9SDimitry Andric } 9025f757f3fSDimitry Andric } else if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) { 9035f757f3fSDimitry Andric if (isa_and_present<ConstantInt>(SimplifiedValues.lookup(SI->getCondition()))) 9045f757f3fSDimitry Andric CurrentSavings += InstrCost; 905e8d8bef9SDimitry Andric } else if (Value *V = dyn_cast<Value>(&I)) { 906e8d8bef9SDimitry Andric // Count an instruction as savings if we can fold it. 907e8d8bef9SDimitry Andric if (SimplifiedValues.count(V)) { 908bdd1243dSDimitry Andric CurrentSavings += InstrCost; 909e8d8bef9SDimitry Andric } 910e8d8bef9SDimitry Andric } 911e8d8bef9SDimitry Andric } 912e8d8bef9SDimitry Andric 913e8d8bef9SDimitry Andric auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB); 914bdd1243dSDimitry Andric CurrentSavings *= *ProfileCount; 915e8d8bef9SDimitry Andric CycleSavings += CurrentSavings; 916e8d8bef9SDimitry Andric } 917e8d8bef9SDimitry Andric 918e8d8bef9SDimitry Andric // Compute the cycle savings per call. 919e8d8bef9SDimitry Andric auto EntryProfileCount = F.getEntryCount(); 92081ad6265SDimitry Andric assert(EntryProfileCount && EntryProfileCount->getCount()); 921349cc55cSDimitry Andric auto EntryCount = EntryProfileCount->getCount(); 922e8d8bef9SDimitry Andric CycleSavings += EntryCount / 2; 923e8d8bef9SDimitry Andric CycleSavings = CycleSavings.udiv(EntryCount); 924e8d8bef9SDimitry Andric 925e8d8bef9SDimitry Andric // Compute the total savings for the call site. 926e8d8bef9SDimitry Andric auto *CallerBB = CandidateCall.getParent(); 927e8d8bef9SDimitry Andric BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent()))); 9285f757f3fSDimitry Andric CycleSavings += getCallsiteCost(TTI, this->CandidateCall, DL); 92981ad6265SDimitry Andric CycleSavings *= *CallerBFI->getBlockProfileCount(CallerBB); 930e8d8bef9SDimitry Andric 9315f757f3fSDimitry Andric // Remove the cost of the cold basic blocks to model the runtime cost more 9325f757f3fSDimitry Andric // accurately. Both machine block placement and function splitting could 9335f757f3fSDimitry Andric // place cold blocks further from hot blocks. 934e8d8bef9SDimitry Andric int Size = Cost - ColdSize; 935e8d8bef9SDimitry Andric 936e8d8bef9SDimitry Andric // Allow tiny callees to be inlined regardless of whether they meet the 937e8d8bef9SDimitry Andric // savings threshold. 938e8d8bef9SDimitry Andric Size = Size > InlineSizeAllowance ? Size - InlineSizeAllowance : 1; 939e8d8bef9SDimitry Andric 9405f757f3fSDimitry Andric OverrideCycleSavingsAndSizeForTesting(CycleSavings, Size); 941fe6060f1SDimitry Andric CostBenefit.emplace(APInt(128, Size), CycleSavings); 942fe6060f1SDimitry Andric 9435f757f3fSDimitry Andric // Let R be the ratio of CycleSavings to Size. We accept the inlining 9445f757f3fSDimitry Andric // opportunity if R is really high and reject if R is really low. If R is 9455f757f3fSDimitry Andric // somewhere in the middle, we fall back to the cost-based analysis. 946e8d8bef9SDimitry Andric // 9475f757f3fSDimitry Andric // Specifically, let R = CycleSavings / Size, we accept the inlining 9485f757f3fSDimitry Andric // opportunity if: 949e8d8bef9SDimitry Andric // 9505f757f3fSDimitry Andric // PSI->getOrCompHotCountThreshold() 9515f757f3fSDimitry Andric // R > ------------------------------------------------- 9525f757f3fSDimitry Andric // getInliningCostBenefitAnalysisSavingsMultiplier() 9535f757f3fSDimitry Andric // 9545f757f3fSDimitry Andric // and reject the inlining opportunity if: 9555f757f3fSDimitry Andric // 9565f757f3fSDimitry Andric // PSI->getOrCompHotCountThreshold() 9575f757f3fSDimitry Andric // R <= ---------------------------------------------------- 9585f757f3fSDimitry Andric // getInliningCostBenefitAnalysisProfitableMultiplier() 9595f757f3fSDimitry Andric // 9605f757f3fSDimitry Andric // Otherwise, we fall back to the cost-based analysis. 9615f757f3fSDimitry Andric // 9625f757f3fSDimitry Andric // Implementation-wise, use multiplication (CycleSavings * Multiplier, 9635f757f3fSDimitry Andric // HotCountThreshold * Size) rather than division to avoid precision loss. 9645f757f3fSDimitry Andric APInt Threshold(128, PSI->getOrCompHotCountThreshold()); 9655f757f3fSDimitry Andric Threshold *= Size; 9665f757f3fSDimitry Andric 9675f757f3fSDimitry Andric APInt UpperBoundCycleSavings = CycleSavings; 9685f757f3fSDimitry Andric UpperBoundCycleSavings *= getInliningCostBenefitAnalysisSavingsMultiplier(); 9695f757f3fSDimitry Andric if (UpperBoundCycleSavings.uge(Threshold)) 9705f757f3fSDimitry Andric return true; 9715f757f3fSDimitry Andric 9725f757f3fSDimitry Andric APInt LowerBoundCycleSavings = CycleSavings; 9735f757f3fSDimitry Andric LowerBoundCycleSavings *= 9745f757f3fSDimitry Andric getInliningCostBenefitAnalysisProfitableMultiplier(); 9755f757f3fSDimitry Andric if (LowerBoundCycleSavings.ult(Threshold)) 9765f757f3fSDimitry Andric return false; 9775f757f3fSDimitry Andric 9785f757f3fSDimitry Andric // Otherwise, fall back to the cost-based analysis. 9795f757f3fSDimitry Andric return std::nullopt; 980e8d8bef9SDimitry Andric } 981e8d8bef9SDimitry Andric 982480093f4SDimitry Andric InlineResult finalizeAnalysis() override { 983480093f4SDimitry Andric // Loops generally act a lot like calls in that they act like barriers to 984480093f4SDimitry Andric // movement, require a certain amount of setup, etc. So when optimising for 985480093f4SDimitry Andric // size, we penalise any call sites that perform loops. We do this after all 986480093f4SDimitry Andric // other costs here, so will likely only be dealing with relatively small 987480093f4SDimitry Andric // functions (and hence DT and LI will hopefully be cheap). 988480093f4SDimitry Andric auto *Caller = CandidateCall.getFunction(); 989480093f4SDimitry Andric if (Caller->hasMinSize()) { 990480093f4SDimitry Andric DominatorTree DT(F); 991480093f4SDimitry Andric LoopInfo LI(DT); 992480093f4SDimitry Andric int NumLoops = 0; 993480093f4SDimitry Andric for (Loop *L : LI) { 994480093f4SDimitry Andric // Ignore loops that will not be executed 995480093f4SDimitry Andric if (DeadBlocks.count(L->getHeader())) 996480093f4SDimitry Andric continue; 997480093f4SDimitry Andric NumLoops++; 998480093f4SDimitry Andric } 999fe6060f1SDimitry Andric addCost(NumLoops * InlineConstants::LoopPenalty); 1000480093f4SDimitry Andric } 1001480093f4SDimitry Andric 1002480093f4SDimitry Andric // We applied the maximum possible vector bonus at the beginning. Now, 1003480093f4SDimitry Andric // subtract the excess bonus, if any, from the Threshold before 1004480093f4SDimitry Andric // comparing against Cost. 1005480093f4SDimitry Andric if (NumVectorInstructions <= NumInstructions / 10) 1006480093f4SDimitry Andric Threshold -= VectorBonus; 1007480093f4SDimitry Andric else if (NumVectorInstructions <= NumInstructions / 2) 1008480093f4SDimitry Andric Threshold -= VectorBonus / 2; 1009480093f4SDimitry Andric 1010bdd1243dSDimitry Andric if (std::optional<int> AttrCost = 1011349cc55cSDimitry Andric getStringFnAttrAsInt(CandidateCall, "function-inline-cost")) 1012349cc55cSDimitry Andric Cost = *AttrCost; 1013349cc55cSDimitry Andric 1014bdd1243dSDimitry Andric if (std::optional<int> AttrCostMult = getStringFnAttrAsInt( 1015fb03ea46SDimitry Andric CandidateCall, 1016fb03ea46SDimitry Andric InlineConstants::FunctionInlineCostMultiplierAttributeName)) 1017fb03ea46SDimitry Andric Cost *= *AttrCostMult; 1018fb03ea46SDimitry Andric 1019bdd1243dSDimitry Andric if (std::optional<int> AttrThreshold = 1020349cc55cSDimitry Andric getStringFnAttrAsInt(CandidateCall, "function-inline-threshold")) 1021349cc55cSDimitry Andric Threshold = *AttrThreshold; 1022349cc55cSDimitry Andric 1023e8d8bef9SDimitry Andric if (auto Result = costBenefitAnalysis()) { 1024fe6060f1SDimitry Andric DecidedByCostBenefit = true; 102581ad6265SDimitry Andric if (*Result) 1026e8d8bef9SDimitry Andric return InlineResult::success(); 1027e8d8bef9SDimitry Andric else 1028e8d8bef9SDimitry Andric return InlineResult::failure("Cost over threshold."); 1029e8d8bef9SDimitry Andric } 1030e8d8bef9SDimitry Andric 1031349cc55cSDimitry Andric if (IgnoreThreshold) 10325ffd83dbSDimitry Andric return InlineResult::success(); 1033349cc55cSDimitry Andric 1034349cc55cSDimitry Andric DecidedByCostThreshold = true; 1035349cc55cSDimitry Andric return Cost < std::max(1, Threshold) 1036349cc55cSDimitry Andric ? InlineResult::success() 1037349cc55cSDimitry Andric : InlineResult::failure("Cost over threshold."); 1038480093f4SDimitry Andric } 1039349cc55cSDimitry Andric 1040480093f4SDimitry Andric bool shouldStop() override { 1041349cc55cSDimitry Andric if (IgnoreThreshold || ComputeFullInlineCost) 1042349cc55cSDimitry Andric return false; 1043480093f4SDimitry Andric // Bail out the moment we cross the threshold. This means we'll under-count 1044480093f4SDimitry Andric // the cost, but only when undercounting doesn't matter. 1045349cc55cSDimitry Andric if (Cost < Threshold) 1046349cc55cSDimitry Andric return false; 1047349cc55cSDimitry Andric DecidedByCostThreshold = true; 1048349cc55cSDimitry Andric return true; 1049480093f4SDimitry Andric } 1050480093f4SDimitry Andric 1051480093f4SDimitry Andric void onLoadEliminationOpportunity() override { 1052bdd1243dSDimitry Andric LoadEliminationCost += InstrCost; 1053480093f4SDimitry Andric } 1054480093f4SDimitry Andric 1055480093f4SDimitry Andric InlineResult onAnalysisStart() override { 1056480093f4SDimitry Andric // Perform some tweaks to the cost and threshold based on the direct 1057480093f4SDimitry Andric // callsite information. 1058480093f4SDimitry Andric 1059480093f4SDimitry Andric // We want to more aggressively inline vector-dense kernels, so up the 1060480093f4SDimitry Andric // threshold, and we'll lower it if the % of vector instructions gets too 1061480093f4SDimitry Andric // low. Note that these bonuses are some what arbitrary and evolved over 1062480093f4SDimitry Andric // time by accident as much as because they are principled bonuses. 1063480093f4SDimitry Andric // 1064480093f4SDimitry Andric // FIXME: It would be nice to remove all such bonuses. At least it would be 1065480093f4SDimitry Andric // nice to base the bonus values on something more scientific. 1066480093f4SDimitry Andric assert(NumInstructions == 0); 1067480093f4SDimitry Andric assert(NumVectorInstructions == 0); 1068480093f4SDimitry Andric 1069480093f4SDimitry Andric // Update the threshold based on callsite properties 1070480093f4SDimitry Andric updateThreshold(CandidateCall, F); 1071480093f4SDimitry Andric 1072480093f4SDimitry Andric // While Threshold depends on commandline options that can take negative 1073480093f4SDimitry Andric // values, we want to enforce the invariant that the computed threshold and 1074480093f4SDimitry Andric // bonuses are non-negative. 1075480093f4SDimitry Andric assert(Threshold >= 0); 1076480093f4SDimitry Andric assert(SingleBBBonus >= 0); 1077480093f4SDimitry Andric assert(VectorBonus >= 0); 1078480093f4SDimitry Andric 1079480093f4SDimitry Andric // Speculatively apply all possible bonuses to Threshold. If cost exceeds 1080480093f4SDimitry Andric // this Threshold any time, and cost cannot decrease, we can stop processing 1081480093f4SDimitry Andric // the rest of the function body. 1082480093f4SDimitry Andric Threshold += (SingleBBBonus + VectorBonus); 1083480093f4SDimitry Andric 1084480093f4SDimitry Andric // Give out bonuses for the callsite, as the instructions setting them up 1085480093f4SDimitry Andric // will be gone after inlining. 10865f757f3fSDimitry Andric addCost(-getCallsiteCost(TTI, this->CandidateCall, DL)); 1087480093f4SDimitry Andric 1088480093f4SDimitry Andric // If this function uses the coldcc calling convention, prefer not to inline 1089480093f4SDimitry Andric // it. 1090480093f4SDimitry Andric if (F.getCallingConv() == CallingConv::Cold) 1091480093f4SDimitry Andric Cost += InlineConstants::ColdccPenalty; 1092480093f4SDimitry Andric 109381ad6265SDimitry Andric LLVM_DEBUG(dbgs() << " Initial cost: " << Cost << "\n"); 109481ad6265SDimitry Andric 1095480093f4SDimitry Andric // Check if we're done. This can happen due to bonuses and penalties. 1096480093f4SDimitry Andric if (Cost >= Threshold && !ComputeFullInlineCost) 10975ffd83dbSDimitry Andric return InlineResult::failure("high cost"); 1098480093f4SDimitry Andric 10995ffd83dbSDimitry Andric return InlineResult::success(); 1100480093f4SDimitry Andric } 1101480093f4SDimitry Andric 1102480093f4SDimitry Andric public: 1103480093f4SDimitry Andric InlineCostCallAnalyzer( 11045ffd83dbSDimitry Andric Function &Callee, CallBase &Call, const InlineParams &Params, 1105480093f4SDimitry Andric const TargetTransformInfo &TTI, 11065ffd83dbSDimitry Andric function_ref<AssumptionCache &(Function &)> GetAssumptionCache, 11075ffd83dbSDimitry Andric function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr, 11085ffd83dbSDimitry Andric ProfileSummaryInfo *PSI = nullptr, 11095ffd83dbSDimitry Andric OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true, 11105ffd83dbSDimitry Andric bool IgnoreThreshold = false) 11115ffd83dbSDimitry Andric : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE), 1112480093f4SDimitry Andric ComputeFullInlineCost(OptComputeFullInlineCost || 1113e8d8bef9SDimitry Andric Params.ComputeFullInlineCost || ORE || 1114e8d8bef9SDimitry Andric isCostBenefitAnalysisEnabled()), 1115480093f4SDimitry Andric Params(Params), Threshold(Params.DefaultThreshold), 11165ffd83dbSDimitry Andric BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold), 1117e8d8bef9SDimitry Andric CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()), 1118349cc55cSDimitry Andric Writer(this) { 111981ad6265SDimitry Andric AllowRecursiveCall = *Params.AllowRecursiveCall; 1120349cc55cSDimitry Andric } 11215ffd83dbSDimitry Andric 11225ffd83dbSDimitry Andric /// Annotation Writer for instruction details 11235ffd83dbSDimitry Andric InlineCostAnnotationWriter Writer; 11245ffd83dbSDimitry Andric 1125480093f4SDimitry Andric void dump(); 1126480093f4SDimitry Andric 11275ffd83dbSDimitry Andric // Prints the same analysis as dump(), but its definition is not dependent 11285ffd83dbSDimitry Andric // on the build. 1129349cc55cSDimitry Andric void print(raw_ostream &OS); 11305ffd83dbSDimitry Andric 1131bdd1243dSDimitry Andric std::optional<InstructionCostDetail> getCostDetails(const Instruction *I) { 113206c3fb27SDimitry Andric if (InstructionCostDetailMap.contains(I)) 11335ffd83dbSDimitry Andric return InstructionCostDetailMap[I]; 1134bdd1243dSDimitry Andric return std::nullopt; 11355ffd83dbSDimitry Andric } 11365ffd83dbSDimitry Andric 113781ad6265SDimitry Andric virtual ~InlineCostCallAnalyzer() = default; 1138fe6060f1SDimitry Andric int getThreshold() const { return Threshold; } 1139fe6060f1SDimitry Andric int getCost() const { return Cost; } 1140bdd1243dSDimitry Andric int getStaticBonusApplied() const { return StaticBonusApplied; } 1141bdd1243dSDimitry Andric std::optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; } 1142fe6060f1SDimitry Andric bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; } 1143349cc55cSDimitry Andric bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; } 1144480093f4SDimitry Andric }; 1145fe6060f1SDimitry Andric 1146bdd1243dSDimitry Andric // Return true if CB is the sole call to local function Callee. 1147bdd1243dSDimitry Andric static bool isSoleCallToLocalFunction(const CallBase &CB, 1148bdd1243dSDimitry Andric const Function &Callee) { 1149bdd1243dSDimitry Andric return Callee.hasLocalLinkage() && Callee.hasOneLiveUse() && 1150bdd1243dSDimitry Andric &Callee == CB.getCalledFunction(); 1151bdd1243dSDimitry Andric } 1152bdd1243dSDimitry Andric 1153fe6060f1SDimitry Andric class InlineCostFeaturesAnalyzer final : public CallAnalyzer { 1154fe6060f1SDimitry Andric private: 1155fe6060f1SDimitry Andric InlineCostFeatures Cost = {}; 1156fe6060f1SDimitry Andric 1157fe6060f1SDimitry Andric // FIXME: These constants are taken from the heuristic-based cost visitor. 1158fe6060f1SDimitry Andric // These should be removed entirely in a later revision to avoid reliance on 1159fe6060f1SDimitry Andric // heuristics in the ML inliner. 1160*0fca6ea1SDimitry Andric static constexpr int JTCostMultiplier = 2; 1161fe6060f1SDimitry Andric static constexpr int CaseClusterCostMultiplier = 2; 1162*0fca6ea1SDimitry Andric static constexpr int SwitchDefaultDestCostMultiplier = 2; 1163fe6060f1SDimitry Andric static constexpr int SwitchCostMultiplier = 2; 1164fe6060f1SDimitry Andric 1165fe6060f1SDimitry Andric // FIXME: These are taken from the heuristic-based cost visitor: we should 1166fe6060f1SDimitry Andric // eventually abstract these to the CallAnalyzer to avoid duplication. 1167fe6060f1SDimitry Andric unsigned SROACostSavingOpportunities = 0; 1168fe6060f1SDimitry Andric int VectorBonus = 0; 1169fe6060f1SDimitry Andric int SingleBBBonus = 0; 1170fe6060f1SDimitry Andric int Threshold = 5; 1171fe6060f1SDimitry Andric 1172fe6060f1SDimitry Andric DenseMap<AllocaInst *, unsigned> SROACosts; 1173fe6060f1SDimitry Andric 1174fe6060f1SDimitry Andric void increment(InlineCostFeatureIndex Feature, int64_t Delta = 1) { 1175fe6060f1SDimitry Andric Cost[static_cast<size_t>(Feature)] += Delta; 1176fe6060f1SDimitry Andric } 1177fe6060f1SDimitry Andric 1178fe6060f1SDimitry Andric void set(InlineCostFeatureIndex Feature, int64_t Value) { 1179fe6060f1SDimitry Andric Cost[static_cast<size_t>(Feature)] = Value; 1180fe6060f1SDimitry Andric } 1181fe6060f1SDimitry Andric 1182fe6060f1SDimitry Andric void onDisableSROA(AllocaInst *Arg) override { 1183fe6060f1SDimitry Andric auto CostIt = SROACosts.find(Arg); 1184fe6060f1SDimitry Andric if (CostIt == SROACosts.end()) 1185fe6060f1SDimitry Andric return; 1186fe6060f1SDimitry Andric 118706c3fb27SDimitry Andric increment(InlineCostFeatureIndex::sroa_losses, CostIt->second); 1188fe6060f1SDimitry Andric SROACostSavingOpportunities -= CostIt->second; 1189fe6060f1SDimitry Andric SROACosts.erase(CostIt); 1190fe6060f1SDimitry Andric } 1191fe6060f1SDimitry Andric 1192fe6060f1SDimitry Andric void onDisableLoadElimination() override { 119306c3fb27SDimitry Andric set(InlineCostFeatureIndex::load_elimination, 1); 1194fe6060f1SDimitry Andric } 1195fe6060f1SDimitry Andric 1196fe6060f1SDimitry Andric void onCallPenalty() override { 119706c3fb27SDimitry Andric increment(InlineCostFeatureIndex::call_penalty, CallPenalty); 1198fe6060f1SDimitry Andric } 1199fe6060f1SDimitry Andric 1200fe6060f1SDimitry Andric void onCallArgumentSetup(const CallBase &Call) override { 120106c3fb27SDimitry Andric increment(InlineCostFeatureIndex::call_argument_setup, 1202bdd1243dSDimitry Andric Call.arg_size() * InstrCost); 1203fe6060f1SDimitry Andric } 1204fe6060f1SDimitry Andric 1205fe6060f1SDimitry Andric void onLoadRelativeIntrinsic() override { 120606c3fb27SDimitry Andric increment(InlineCostFeatureIndex::load_relative_intrinsic, 3 * InstrCost); 1207fe6060f1SDimitry Andric } 1208fe6060f1SDimitry Andric 1209fe6060f1SDimitry Andric void onLoweredCall(Function *F, CallBase &Call, 1210fe6060f1SDimitry Andric bool IsIndirectCall) override { 121106c3fb27SDimitry Andric increment(InlineCostFeatureIndex::lowered_call_arg_setup, 1212bdd1243dSDimitry Andric Call.arg_size() * InstrCost); 1213fe6060f1SDimitry Andric 1214fe6060f1SDimitry Andric if (IsIndirectCall) { 1215fe6060f1SDimitry Andric InlineParams IndirectCallParams = {/* DefaultThreshold*/ 0, 1216fe6060f1SDimitry Andric /*HintThreshold*/ {}, 1217fe6060f1SDimitry Andric /*ColdThreshold*/ {}, 1218fe6060f1SDimitry Andric /*OptSizeThreshold*/ {}, 1219fe6060f1SDimitry Andric /*OptMinSizeThreshold*/ {}, 1220fe6060f1SDimitry Andric /*HotCallSiteThreshold*/ {}, 1221fe6060f1SDimitry Andric /*LocallyHotCallSiteThreshold*/ {}, 1222fe6060f1SDimitry Andric /*ColdCallSiteThreshold*/ {}, 1223fe6060f1SDimitry Andric /*ComputeFullInlineCost*/ true, 1224fe6060f1SDimitry Andric /*EnableDeferral*/ true}; 1225fe6060f1SDimitry Andric IndirectCallParams.DefaultThreshold = 1226fe6060f1SDimitry Andric InlineConstants::IndirectCallThreshold; 1227fe6060f1SDimitry Andric 1228fe6060f1SDimitry Andric InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI, 1229fe6060f1SDimitry Andric GetAssumptionCache, GetBFI, PSI, ORE, false, 1230fe6060f1SDimitry Andric true); 1231fe6060f1SDimitry Andric if (CA.analyze().isSuccess()) { 123206c3fb27SDimitry Andric increment(InlineCostFeatureIndex::nested_inline_cost_estimate, 1233fe6060f1SDimitry Andric CA.getCost()); 123406c3fb27SDimitry Andric increment(InlineCostFeatureIndex::nested_inlines, 1); 1235fe6060f1SDimitry Andric } 1236fe6060f1SDimitry Andric } else { 1237fe6060f1SDimitry Andric onCallPenalty(); 1238fe6060f1SDimitry Andric } 1239fe6060f1SDimitry Andric } 1240fe6060f1SDimitry Andric 1241*0fca6ea1SDimitry Andric void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster, 1242*0fca6ea1SDimitry Andric bool DefaultDestUndefined) override { 1243fe6060f1SDimitry Andric if (JumpTableSize) { 1244*0fca6ea1SDimitry Andric if (!DefaultDestUndefined) 1245*0fca6ea1SDimitry Andric increment(InlineCostFeatureIndex::switch_default_dest_penalty, 1246*0fca6ea1SDimitry Andric SwitchDefaultDestCostMultiplier * InstrCost); 1247bdd1243dSDimitry Andric int64_t JTCost = static_cast<int64_t>(JumpTableSize) * InstrCost + 1248bdd1243dSDimitry Andric JTCostMultiplier * InstrCost; 124906c3fb27SDimitry Andric increment(InlineCostFeatureIndex::jump_table_penalty, JTCost); 1250fe6060f1SDimitry Andric return; 1251fe6060f1SDimitry Andric } 1252fe6060f1SDimitry Andric 1253fe6060f1SDimitry Andric if (NumCaseCluster <= 3) { 125406c3fb27SDimitry Andric increment(InlineCostFeatureIndex::case_cluster_penalty, 1255*0fca6ea1SDimitry Andric (NumCaseCluster - DefaultDestUndefined) * 1256*0fca6ea1SDimitry Andric CaseClusterCostMultiplier * InstrCost); 1257fe6060f1SDimitry Andric return; 1258fe6060f1SDimitry Andric } 1259fe6060f1SDimitry Andric 1260fe6060f1SDimitry Andric int64_t ExpectedNumberOfCompare = 1261fe6060f1SDimitry Andric getExpectedNumberOfCompare(NumCaseCluster); 1262fe6060f1SDimitry Andric 1263bdd1243dSDimitry Andric int64_t SwitchCost = 1264bdd1243dSDimitry Andric ExpectedNumberOfCompare * SwitchCostMultiplier * InstrCost; 126506c3fb27SDimitry Andric increment(InlineCostFeatureIndex::switch_penalty, SwitchCost); 1266fe6060f1SDimitry Andric } 1267fe6060f1SDimitry Andric 1268fe6060f1SDimitry Andric void onMissedSimplification() override { 126906c3fb27SDimitry Andric increment(InlineCostFeatureIndex::unsimplified_common_instructions, 1270bdd1243dSDimitry Andric InstrCost); 1271fe6060f1SDimitry Andric } 1272fe6060f1SDimitry Andric 127306c3fb27SDimitry Andric void onInitializeSROAArg(AllocaInst *Arg) override { 127406c3fb27SDimitry Andric auto SROAArgCost = TTI.getCallerAllocaCost(&CandidateCall, Arg); 127506c3fb27SDimitry Andric SROACosts[Arg] = SROAArgCost; 127606c3fb27SDimitry Andric SROACostSavingOpportunities += SROAArgCost; 127706c3fb27SDimitry Andric } 127806c3fb27SDimitry Andric 1279fe6060f1SDimitry Andric void onAggregateSROAUse(AllocaInst *Arg) override { 1280bdd1243dSDimitry Andric SROACosts.find(Arg)->second += InstrCost; 1281bdd1243dSDimitry Andric SROACostSavingOpportunities += InstrCost; 1282fe6060f1SDimitry Andric } 1283fe6060f1SDimitry Andric 1284fe6060f1SDimitry Andric void onBlockAnalyzed(const BasicBlock *BB) override { 1285fe6060f1SDimitry Andric if (BB->getTerminator()->getNumSuccessors() > 1) 128606c3fb27SDimitry Andric set(InlineCostFeatureIndex::is_multiple_blocks, 1); 1287fe6060f1SDimitry Andric Threshold -= SingleBBBonus; 1288fe6060f1SDimitry Andric } 1289fe6060f1SDimitry Andric 1290fe6060f1SDimitry Andric InlineResult finalizeAnalysis() override { 1291fe6060f1SDimitry Andric auto *Caller = CandidateCall.getFunction(); 1292fe6060f1SDimitry Andric if (Caller->hasMinSize()) { 1293fe6060f1SDimitry Andric DominatorTree DT(F); 1294fe6060f1SDimitry Andric LoopInfo LI(DT); 1295fe6060f1SDimitry Andric for (Loop *L : LI) { 1296fe6060f1SDimitry Andric // Ignore loops that will not be executed 1297fe6060f1SDimitry Andric if (DeadBlocks.count(L->getHeader())) 1298fe6060f1SDimitry Andric continue; 129906c3fb27SDimitry Andric increment(InlineCostFeatureIndex::num_loops, 1300fe6060f1SDimitry Andric InlineConstants::LoopPenalty); 1301fe6060f1SDimitry Andric } 1302fe6060f1SDimitry Andric } 130306c3fb27SDimitry Andric set(InlineCostFeatureIndex::dead_blocks, DeadBlocks.size()); 130406c3fb27SDimitry Andric set(InlineCostFeatureIndex::simplified_instructions, 1305fe6060f1SDimitry Andric NumInstructionsSimplified); 130606c3fb27SDimitry Andric set(InlineCostFeatureIndex::constant_args, NumConstantArgs); 130706c3fb27SDimitry Andric set(InlineCostFeatureIndex::constant_offset_ptr_args, 1308fe6060f1SDimitry Andric NumConstantOffsetPtrArgs); 130906c3fb27SDimitry Andric set(InlineCostFeatureIndex::sroa_savings, SROACostSavingOpportunities); 1310fe6060f1SDimitry Andric 1311fe6060f1SDimitry Andric if (NumVectorInstructions <= NumInstructions / 10) 1312fe6060f1SDimitry Andric Threshold -= VectorBonus; 1313fe6060f1SDimitry Andric else if (NumVectorInstructions <= NumInstructions / 2) 1314fe6060f1SDimitry Andric Threshold -= VectorBonus / 2; 1315fe6060f1SDimitry Andric 131606c3fb27SDimitry Andric set(InlineCostFeatureIndex::threshold, Threshold); 1317fe6060f1SDimitry Andric 1318fe6060f1SDimitry Andric return InlineResult::success(); 1319fe6060f1SDimitry Andric } 1320fe6060f1SDimitry Andric 1321fe6060f1SDimitry Andric bool shouldStop() override { return false; } 1322fe6060f1SDimitry Andric 1323fe6060f1SDimitry Andric void onLoadEliminationOpportunity() override { 132406c3fb27SDimitry Andric increment(InlineCostFeatureIndex::load_elimination, 1); 1325fe6060f1SDimitry Andric } 1326fe6060f1SDimitry Andric 1327fe6060f1SDimitry Andric InlineResult onAnalysisStart() override { 132806c3fb27SDimitry Andric increment(InlineCostFeatureIndex::callsite_cost, 13295f757f3fSDimitry Andric -1 * getCallsiteCost(TTI, this->CandidateCall, DL)); 1330fe6060f1SDimitry Andric 133106c3fb27SDimitry Andric set(InlineCostFeatureIndex::cold_cc_penalty, 1332fe6060f1SDimitry Andric (F.getCallingConv() == CallingConv::Cold)); 1333fe6060f1SDimitry Andric 133406c3fb27SDimitry Andric set(InlineCostFeatureIndex::last_call_to_static_bonus, 1335bdd1243dSDimitry Andric isSoleCallToLocalFunction(CandidateCall, F)); 133681ad6265SDimitry Andric 1337fe6060f1SDimitry Andric // FIXME: we shouldn't repeat this logic in both the Features and Cost 1338fe6060f1SDimitry Andric // analyzer - instead, we should abstract it to a common method in the 1339fe6060f1SDimitry Andric // CallAnalyzer 1340fe6060f1SDimitry Andric int SingleBBBonusPercent = 50; 1341fe6060f1SDimitry Andric int VectorBonusPercent = TTI.getInlinerVectorBonusPercent(); 1342fe6060f1SDimitry Andric Threshold += TTI.adjustInliningThreshold(&CandidateCall); 1343fe6060f1SDimitry Andric Threshold *= TTI.getInliningThresholdMultiplier(); 1344fe6060f1SDimitry Andric SingleBBBonus = Threshold * SingleBBBonusPercent / 100; 1345fe6060f1SDimitry Andric VectorBonus = Threshold * VectorBonusPercent / 100; 1346fe6060f1SDimitry Andric Threshold += (SingleBBBonus + VectorBonus); 1347fe6060f1SDimitry Andric 1348fe6060f1SDimitry Andric return InlineResult::success(); 1349fe6060f1SDimitry Andric } 1350fe6060f1SDimitry Andric 1351fe6060f1SDimitry Andric public: 1352fe6060f1SDimitry Andric InlineCostFeaturesAnalyzer( 1353fe6060f1SDimitry Andric const TargetTransformInfo &TTI, 1354fe6060f1SDimitry Andric function_ref<AssumptionCache &(Function &)> &GetAssumptionCache, 1355fe6060f1SDimitry Andric function_ref<BlockFrequencyInfo &(Function &)> GetBFI, 1356fe6060f1SDimitry Andric ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee, 1357fe6060f1SDimitry Andric CallBase &Call) 1358fe6060f1SDimitry Andric : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI) {} 1359fe6060f1SDimitry Andric 1360fe6060f1SDimitry Andric const InlineCostFeatures &features() const { return Cost; } 1361fe6060f1SDimitry Andric }; 1362fe6060f1SDimitry Andric 13630b57cec5SDimitry Andric } // namespace 13640b57cec5SDimitry Andric 13650b57cec5SDimitry Andric /// Test whether the given value is an Alloca-derived function argument. 13660b57cec5SDimitry Andric bool CallAnalyzer::isAllocaDerivedArg(Value *V) { 13670b57cec5SDimitry Andric return SROAArgValues.count(V); 13680b57cec5SDimitry Andric } 13690b57cec5SDimitry Andric 1370480093f4SDimitry Andric void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) { 1371480093f4SDimitry Andric onDisableSROA(SROAArg); 13725ffd83dbSDimitry Andric EnabledSROAAllocas.erase(SROAArg); 13730b57cec5SDimitry Andric disableLoadElimination(); 13740b57cec5SDimitry Andric } 13755ffd83dbSDimitry Andric 1376fe6060f1SDimitry Andric void InlineCostAnnotationWriter::emitInstructionAnnot( 1377fe6060f1SDimitry Andric const Instruction *I, formatted_raw_ostream &OS) { 13785ffd83dbSDimitry Andric // The cost of inlining of the given instruction is printed always. 13795ffd83dbSDimitry Andric // The threshold delta is printed only when it is non-zero. It happens 13805ffd83dbSDimitry Andric // when we decided to give a bonus at a particular instruction. 1381bdd1243dSDimitry Andric std::optional<InstructionCostDetail> Record = ICCA->getCostDetails(I); 13825ffd83dbSDimitry Andric if (!Record) 13835ffd83dbSDimitry Andric OS << "; No analysis for the instruction"; 13845ffd83dbSDimitry Andric else { 13855ffd83dbSDimitry Andric OS << "; cost before = " << Record->CostBefore 13865ffd83dbSDimitry Andric << ", cost after = " << Record->CostAfter 13875ffd83dbSDimitry Andric << ", threshold before = " << Record->ThresholdBefore 13885ffd83dbSDimitry Andric << ", threshold after = " << Record->ThresholdAfter << ", "; 13895ffd83dbSDimitry Andric OS << "cost delta = " << Record->getCostDelta(); 13905ffd83dbSDimitry Andric if (Record->hasThresholdChanged()) 13915ffd83dbSDimitry Andric OS << ", threshold delta = " << Record->getThresholdDelta(); 13925ffd83dbSDimitry Andric } 13935ffd83dbSDimitry Andric auto C = ICCA->getSimplifiedValue(const_cast<Instruction *>(I)); 13945ffd83dbSDimitry Andric if (C) { 13955ffd83dbSDimitry Andric OS << ", simplified to "; 139681ad6265SDimitry Andric (*C)->print(OS, true); 13975ffd83dbSDimitry Andric } 13985ffd83dbSDimitry Andric OS << "\n"; 13995ffd83dbSDimitry Andric } 14005ffd83dbSDimitry Andric 14010b57cec5SDimitry Andric /// If 'V' maps to a SROA candidate, disable SROA for it. 14020b57cec5SDimitry Andric void CallAnalyzer::disableSROA(Value *V) { 1403480093f4SDimitry Andric if (auto *SROAArg = getSROAArgForValueOrNull(V)) { 1404480093f4SDimitry Andric disableSROAForArg(SROAArg); 14050b57cec5SDimitry Andric } 14060b57cec5SDimitry Andric } 14070b57cec5SDimitry Andric 14080b57cec5SDimitry Andric void CallAnalyzer::disableLoadElimination() { 14090b57cec5SDimitry Andric if (EnableLoadElimination) { 1410480093f4SDimitry Andric onDisableLoadElimination(); 14110b57cec5SDimitry Andric EnableLoadElimination = false; 14120b57cec5SDimitry Andric } 14130b57cec5SDimitry Andric } 14140b57cec5SDimitry Andric 14150b57cec5SDimitry Andric /// Accumulate a constant GEP offset into an APInt if possible. 14160b57cec5SDimitry Andric /// 14170b57cec5SDimitry Andric /// Returns false if unable to compute the offset for any reason. Respects any 14180b57cec5SDimitry Andric /// simplified values known during the analysis of this callsite. 14190b57cec5SDimitry Andric bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { 14200b57cec5SDimitry Andric unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType()); 14210b57cec5SDimitry Andric assert(IntPtrWidth == Offset.getBitWidth()); 14220b57cec5SDimitry Andric 14230b57cec5SDimitry Andric for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); 14240b57cec5SDimitry Andric GTI != GTE; ++GTI) { 14250b57cec5SDimitry Andric ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); 14260b57cec5SDimitry Andric if (!OpC) 14270b57cec5SDimitry Andric if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) 14280b57cec5SDimitry Andric OpC = dyn_cast<ConstantInt>(SimpleOp); 14290b57cec5SDimitry Andric if (!OpC) 14300b57cec5SDimitry Andric return false; 14310b57cec5SDimitry Andric if (OpC->isZero()) 14320b57cec5SDimitry Andric continue; 14330b57cec5SDimitry Andric 14340b57cec5SDimitry Andric // Handle a struct index, which adds its field offset to the pointer. 14350b57cec5SDimitry Andric if (StructType *STy = GTI.getStructTypeOrNull()) { 14360b57cec5SDimitry Andric unsigned ElementIdx = OpC->getZExtValue(); 14370b57cec5SDimitry Andric const StructLayout *SL = DL.getStructLayout(STy); 14380b57cec5SDimitry Andric Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); 14390b57cec5SDimitry Andric continue; 14400b57cec5SDimitry Andric } 14410b57cec5SDimitry Andric 14421db9f3b2SDimitry Andric APInt TypeSize(IntPtrWidth, GTI.getSequentialElementStride(DL)); 14430b57cec5SDimitry Andric Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; 14440b57cec5SDimitry Andric } 14450b57cec5SDimitry Andric return true; 14460b57cec5SDimitry Andric } 14470b57cec5SDimitry Andric 14480b57cec5SDimitry Andric /// Use TTI to check whether a GEP is free. 14490b57cec5SDimitry Andric /// 14500b57cec5SDimitry Andric /// Respects any simplified values known during the analysis of this callsite. 14510b57cec5SDimitry Andric bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) { 14520b57cec5SDimitry Andric SmallVector<Value *, 4> Operands; 14530b57cec5SDimitry Andric Operands.push_back(GEP.getOperand(0)); 1454e8d8bef9SDimitry Andric for (const Use &Op : GEP.indices()) 1455e8d8bef9SDimitry Andric if (Constant *SimpleOp = SimplifiedValues.lookup(Op)) 14560b57cec5SDimitry Andric Operands.push_back(SimpleOp); 14570b57cec5SDimitry Andric else 1458e8d8bef9SDimitry Andric Operands.push_back(Op); 1459bdd1243dSDimitry Andric return TTI.getInstructionCost(&GEP, Operands, 1460fe6060f1SDimitry Andric TargetTransformInfo::TCK_SizeAndLatency) == 1461fe6060f1SDimitry Andric TargetTransformInfo::TCC_Free; 14620b57cec5SDimitry Andric } 14630b57cec5SDimitry Andric 14640b57cec5SDimitry Andric bool CallAnalyzer::visitAlloca(AllocaInst &I) { 1465fe6060f1SDimitry Andric disableSROA(I.getOperand(0)); 1466fe6060f1SDimitry Andric 14670b57cec5SDimitry Andric // Check whether inlining will turn a dynamic alloca into a static 14680b57cec5SDimitry Andric // alloca and handle that case. 14690b57cec5SDimitry Andric if (I.isArrayAllocation()) { 14700b57cec5SDimitry Andric Constant *Size = SimplifiedValues.lookup(I.getArraySize()); 14710b57cec5SDimitry Andric if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) { 14725ffd83dbSDimitry Andric // Sometimes a dynamic alloca could be converted into a static alloca 14735ffd83dbSDimitry Andric // after this constant prop, and become a huge static alloca on an 14745ffd83dbSDimitry Andric // unconditional CFG path. Avoid inlining if this is going to happen above 14755ffd83dbSDimitry Andric // a threshold. 14765ffd83dbSDimitry Andric // FIXME: If the threshold is removed or lowered too much, we could end up 14775ffd83dbSDimitry Andric // being too pessimistic and prevent inlining non-problematic code. This 14785ffd83dbSDimitry Andric // could result in unintended perf regressions. A better overall strategy 14795ffd83dbSDimitry Andric // is needed to track stack usage during inlining. 14800b57cec5SDimitry Andric Type *Ty = I.getAllocatedType(); 14810b57cec5SDimitry Andric AllocatedSize = SaturatingMultiplyAdd( 1482fe6060f1SDimitry Andric AllocSize->getLimitedValue(), 1483bdd1243dSDimitry Andric DL.getTypeAllocSize(Ty).getKnownMinValue(), AllocatedSize); 1484fe6060f1SDimitry Andric if (AllocatedSize > InlineConstants::MaxSimplifiedDynamicAllocaToInline) 14855ffd83dbSDimitry Andric HasDynamicAlloca = true; 14865ffd83dbSDimitry Andric return false; 14875ffd83dbSDimitry Andric } 14880b57cec5SDimitry Andric } 14890b57cec5SDimitry Andric 14900b57cec5SDimitry Andric // Accumulate the allocated size. 14910b57cec5SDimitry Andric if (I.isStaticAlloca()) { 14920b57cec5SDimitry Andric Type *Ty = I.getAllocatedType(); 1493bdd1243dSDimitry Andric AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty).getKnownMinValue(), 1494bdd1243dSDimitry Andric AllocatedSize); 14950b57cec5SDimitry Andric } 14960b57cec5SDimitry Andric 14970b57cec5SDimitry Andric // FIXME: This is overly conservative. Dynamic allocas are inefficient for 14980b57cec5SDimitry Andric // a variety of reasons, and so we would like to not inline them into 14990b57cec5SDimitry Andric // functions which don't currently have a dynamic alloca. This simply 15000b57cec5SDimitry Andric // disables inlining altogether in the presence of a dynamic alloca. 1501fe6060f1SDimitry Andric if (!I.isStaticAlloca()) 15020b57cec5SDimitry Andric HasDynamicAlloca = true; 1503fe6060f1SDimitry Andric 15040b57cec5SDimitry Andric return false; 15050b57cec5SDimitry Andric } 15060b57cec5SDimitry Andric 15070b57cec5SDimitry Andric bool CallAnalyzer::visitPHI(PHINode &I) { 15080b57cec5SDimitry Andric // FIXME: We need to propagate SROA *disabling* through phi nodes, even 15090b57cec5SDimitry Andric // though we don't want to propagate it's bonuses. The idea is to disable 15100b57cec5SDimitry Andric // SROA if it *might* be used in an inappropriate manner. 15110b57cec5SDimitry Andric 15120b57cec5SDimitry Andric // Phi nodes are always zero-cost. 15130b57cec5SDimitry Andric // FIXME: Pointer sizes may differ between different address spaces, so do we 15140b57cec5SDimitry Andric // need to use correct address space in the call to getPointerSizeInBits here? 15150b57cec5SDimitry Andric // Or could we skip the getPointerSizeInBits call completely? As far as I can 15160b57cec5SDimitry Andric // see the ZeroOffset is used as a dummy value, so we can probably use any 15170b57cec5SDimitry Andric // bit width for the ZeroOffset? 1518349cc55cSDimitry Andric APInt ZeroOffset = APInt::getZero(DL.getPointerSizeInBits(0)); 15190b57cec5SDimitry Andric bool CheckSROA = I.getType()->isPointerTy(); 15200b57cec5SDimitry Andric 15210b57cec5SDimitry Andric // Track the constant or pointer with constant offset we've seen so far. 15220b57cec5SDimitry Andric Constant *FirstC = nullptr; 15230b57cec5SDimitry Andric std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset}; 15240b57cec5SDimitry Andric Value *FirstV = nullptr; 15250b57cec5SDimitry Andric 15260b57cec5SDimitry Andric for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) { 15270b57cec5SDimitry Andric BasicBlock *Pred = I.getIncomingBlock(i); 15280b57cec5SDimitry Andric // If the incoming block is dead, skip the incoming block. 15290b57cec5SDimitry Andric if (DeadBlocks.count(Pred)) 15300b57cec5SDimitry Andric continue; 15310b57cec5SDimitry Andric // If the parent block of phi is not the known successor of the incoming 15320b57cec5SDimitry Andric // block, skip the incoming block. 15330b57cec5SDimitry Andric BasicBlock *KnownSuccessor = KnownSuccessors[Pred]; 15340b57cec5SDimitry Andric if (KnownSuccessor && KnownSuccessor != I.getParent()) 15350b57cec5SDimitry Andric continue; 15360b57cec5SDimitry Andric 15370b57cec5SDimitry Andric Value *V = I.getIncomingValue(i); 15380b57cec5SDimitry Andric // If the incoming value is this phi itself, skip the incoming value. 15390b57cec5SDimitry Andric if (&I == V) 15400b57cec5SDimitry Andric continue; 15410b57cec5SDimitry Andric 15420b57cec5SDimitry Andric Constant *C = dyn_cast<Constant>(V); 15430b57cec5SDimitry Andric if (!C) 15440b57cec5SDimitry Andric C = SimplifiedValues.lookup(V); 15450b57cec5SDimitry Andric 15460b57cec5SDimitry Andric std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset}; 15470b57cec5SDimitry Andric if (!C && CheckSROA) 15480b57cec5SDimitry Andric BaseAndOffset = ConstantOffsetPtrs.lookup(V); 15490b57cec5SDimitry Andric 15500b57cec5SDimitry Andric if (!C && !BaseAndOffset.first) 15510b57cec5SDimitry Andric // The incoming value is neither a constant nor a pointer with constant 15520b57cec5SDimitry Andric // offset, exit early. 15530b57cec5SDimitry Andric return true; 15540b57cec5SDimitry Andric 15550b57cec5SDimitry Andric if (FirstC) { 15560b57cec5SDimitry Andric if (FirstC == C) 15570b57cec5SDimitry Andric // If we've seen a constant incoming value before and it is the same 15580b57cec5SDimitry Andric // constant we see this time, continue checking the next incoming value. 15590b57cec5SDimitry Andric continue; 15600b57cec5SDimitry Andric // Otherwise early exit because we either see a different constant or saw 15610b57cec5SDimitry Andric // a constant before but we have a pointer with constant offset this time. 15620b57cec5SDimitry Andric return true; 15630b57cec5SDimitry Andric } 15640b57cec5SDimitry Andric 15650b57cec5SDimitry Andric if (FirstV) { 15660b57cec5SDimitry Andric // The same logic as above, but check pointer with constant offset here. 15670b57cec5SDimitry Andric if (FirstBaseAndOffset == BaseAndOffset) 15680b57cec5SDimitry Andric continue; 15690b57cec5SDimitry Andric return true; 15700b57cec5SDimitry Andric } 15710b57cec5SDimitry Andric 15720b57cec5SDimitry Andric if (C) { 15730b57cec5SDimitry Andric // This is the 1st time we've seen a constant, record it. 15740b57cec5SDimitry Andric FirstC = C; 15750b57cec5SDimitry Andric continue; 15760b57cec5SDimitry Andric } 15770b57cec5SDimitry Andric 15780b57cec5SDimitry Andric // The remaining case is that this is the 1st time we've seen a pointer with 15790b57cec5SDimitry Andric // constant offset, record it. 15800b57cec5SDimitry Andric FirstV = V; 15810b57cec5SDimitry Andric FirstBaseAndOffset = BaseAndOffset; 15820b57cec5SDimitry Andric } 15830b57cec5SDimitry Andric 15840b57cec5SDimitry Andric // Check if we can map phi to a constant. 15850b57cec5SDimitry Andric if (FirstC) { 15860b57cec5SDimitry Andric SimplifiedValues[&I] = FirstC; 15870b57cec5SDimitry Andric return true; 15880b57cec5SDimitry Andric } 15890b57cec5SDimitry Andric 15900b57cec5SDimitry Andric // Check if we can map phi to a pointer with constant offset. 15910b57cec5SDimitry Andric if (FirstBaseAndOffset.first) { 15920b57cec5SDimitry Andric ConstantOffsetPtrs[&I] = FirstBaseAndOffset; 15930b57cec5SDimitry Andric 1594480093f4SDimitry Andric if (auto *SROAArg = getSROAArgForValueOrNull(FirstV)) 15950b57cec5SDimitry Andric SROAArgValues[&I] = SROAArg; 15960b57cec5SDimitry Andric } 15970b57cec5SDimitry Andric 15980b57cec5SDimitry Andric return true; 15990b57cec5SDimitry Andric } 16000b57cec5SDimitry Andric 16010b57cec5SDimitry Andric /// Check we can fold GEPs of constant-offset call site argument pointers. 16020b57cec5SDimitry Andric /// This requires target data and inbounds GEPs. 16030b57cec5SDimitry Andric /// 16040b57cec5SDimitry Andric /// \return true if the specified GEP can be folded. 16050b57cec5SDimitry Andric bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) { 16060b57cec5SDimitry Andric // Check if we have a base + offset for the pointer. 16070b57cec5SDimitry Andric std::pair<Value *, APInt> BaseAndOffset = 16080b57cec5SDimitry Andric ConstantOffsetPtrs.lookup(I.getPointerOperand()); 16090b57cec5SDimitry Andric if (!BaseAndOffset.first) 16100b57cec5SDimitry Andric return false; 16110b57cec5SDimitry Andric 16120b57cec5SDimitry Andric // Check if the offset of this GEP is constant, and if so accumulate it 16130b57cec5SDimitry Andric // into Offset. 16140b57cec5SDimitry Andric if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) 16150b57cec5SDimitry Andric return false; 16160b57cec5SDimitry Andric 16170b57cec5SDimitry Andric // Add the result as a new mapping to Base + Offset. 16180b57cec5SDimitry Andric ConstantOffsetPtrs[&I] = BaseAndOffset; 16190b57cec5SDimitry Andric 16200b57cec5SDimitry Andric return true; 16210b57cec5SDimitry Andric } 16220b57cec5SDimitry Andric 16230b57cec5SDimitry Andric bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { 1624480093f4SDimitry Andric auto *SROAArg = getSROAArgForValueOrNull(I.getPointerOperand()); 16250b57cec5SDimitry Andric 16260b57cec5SDimitry Andric // Lambda to check whether a GEP's indices are all constant. 16270b57cec5SDimitry Andric auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) { 1628e8d8bef9SDimitry Andric for (const Use &Op : GEP.indices()) 1629e8d8bef9SDimitry Andric if (!isa<Constant>(Op) && !SimplifiedValues.lookup(Op)) 16300b57cec5SDimitry Andric return false; 16310b57cec5SDimitry Andric return true; 16320b57cec5SDimitry Andric }; 16330b57cec5SDimitry Andric 16345ffd83dbSDimitry Andric if (!DisableGEPConstOperand) 163581ad6265SDimitry Andric if (simplifyInstruction(I)) 16365ffd83dbSDimitry Andric return true; 16375ffd83dbSDimitry Andric 16380b57cec5SDimitry Andric if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) { 1639480093f4SDimitry Andric if (SROAArg) 16400b57cec5SDimitry Andric SROAArgValues[&I] = SROAArg; 16410b57cec5SDimitry Andric 16420b57cec5SDimitry Andric // Constant GEPs are modeled as free. 16430b57cec5SDimitry Andric return true; 16440b57cec5SDimitry Andric } 16450b57cec5SDimitry Andric 16460b57cec5SDimitry Andric // Variable GEPs will require math and will disable SROA. 1647480093f4SDimitry Andric if (SROAArg) 1648480093f4SDimitry Andric disableSROAForArg(SROAArg); 16490b57cec5SDimitry Andric return isGEPFree(I); 16500b57cec5SDimitry Andric } 16510b57cec5SDimitry Andric 16520b57cec5SDimitry Andric /// Simplify \p I if its operands are constants and update SimplifiedValues. 165381ad6265SDimitry Andric bool CallAnalyzer::simplifyInstruction(Instruction &I) { 165481ad6265SDimitry Andric SmallVector<Constant *> COps; 16550b57cec5SDimitry Andric for (Value *Op : I.operands()) { 16560b57cec5SDimitry Andric Constant *COp = dyn_cast<Constant>(Op); 16570b57cec5SDimitry Andric if (!COp) 16580b57cec5SDimitry Andric COp = SimplifiedValues.lookup(Op); 16590b57cec5SDimitry Andric if (!COp) 16600b57cec5SDimitry Andric return false; 16610b57cec5SDimitry Andric COps.push_back(COp); 16620b57cec5SDimitry Andric } 166381ad6265SDimitry Andric auto *C = ConstantFoldInstOperands(&I, COps, DL); 16640b57cec5SDimitry Andric if (!C) 16650b57cec5SDimitry Andric return false; 16660b57cec5SDimitry Andric SimplifiedValues[&I] = C; 16670b57cec5SDimitry Andric return true; 16680b57cec5SDimitry Andric } 16690b57cec5SDimitry Andric 1670349cc55cSDimitry Andric /// Try to simplify a call to llvm.is.constant. 1671349cc55cSDimitry Andric /// 1672349cc55cSDimitry Andric /// Duplicate the argument checking from CallAnalyzer::simplifyCallSite since 1673349cc55cSDimitry Andric /// we expect calls of this specific intrinsic to be infrequent. 1674349cc55cSDimitry Andric /// 1675349cc55cSDimitry Andric /// FIXME: Given that we know CB's parent (F) caller 1676349cc55cSDimitry Andric /// (CandidateCall->getParent()->getParent()), we might be able to determine 1677349cc55cSDimitry Andric /// whether inlining F into F's caller would change how the call to 1678349cc55cSDimitry Andric /// llvm.is.constant would evaluate. 1679349cc55cSDimitry Andric bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) { 1680349cc55cSDimitry Andric Value *Arg = CB.getArgOperand(0); 1681349cc55cSDimitry Andric auto *C = dyn_cast<Constant>(Arg); 1682349cc55cSDimitry Andric 1683349cc55cSDimitry Andric if (!C) 1684349cc55cSDimitry Andric C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(Arg)); 1685349cc55cSDimitry Andric 1686349cc55cSDimitry Andric Type *RT = CB.getFunctionType()->getReturnType(); 1687349cc55cSDimitry Andric SimplifiedValues[&CB] = ConstantInt::get(RT, C ? 1 : 0); 1688349cc55cSDimitry Andric return true; 1689349cc55cSDimitry Andric } 1690349cc55cSDimitry Andric 1691bdd1243dSDimitry Andric bool CallAnalyzer::simplifyIntrinsicCallObjectSize(CallBase &CB) { 1692bdd1243dSDimitry Andric // As per the langref, "The fourth argument to llvm.objectsize determines if 1693bdd1243dSDimitry Andric // the value should be evaluated at runtime." 1694bdd1243dSDimitry Andric if (cast<ConstantInt>(CB.getArgOperand(3))->isOne()) 1695bdd1243dSDimitry Andric return false; 1696bdd1243dSDimitry Andric 1697bdd1243dSDimitry Andric Value *V = lowerObjectSizeCall(&cast<IntrinsicInst>(CB), DL, nullptr, 1698bdd1243dSDimitry Andric /*MustSucceed=*/true); 1699bdd1243dSDimitry Andric Constant *C = dyn_cast_or_null<Constant>(V); 1700bdd1243dSDimitry Andric if (C) 1701bdd1243dSDimitry Andric SimplifiedValues[&CB] = C; 1702bdd1243dSDimitry Andric return C; 1703bdd1243dSDimitry Andric } 1704bdd1243dSDimitry Andric 17050b57cec5SDimitry Andric bool CallAnalyzer::visitBitCast(BitCastInst &I) { 17060b57cec5SDimitry Andric // Propagate constants through bitcasts. 170781ad6265SDimitry Andric if (simplifyInstruction(I)) 17080b57cec5SDimitry Andric return true; 17090b57cec5SDimitry Andric 17100b57cec5SDimitry Andric // Track base/offsets through casts 17110b57cec5SDimitry Andric std::pair<Value *, APInt> BaseAndOffset = 17120b57cec5SDimitry Andric ConstantOffsetPtrs.lookup(I.getOperand(0)); 17130b57cec5SDimitry Andric // Casts don't change the offset, just wrap it up. 17140b57cec5SDimitry Andric if (BaseAndOffset.first) 17150b57cec5SDimitry Andric ConstantOffsetPtrs[&I] = BaseAndOffset; 17160b57cec5SDimitry Andric 17170b57cec5SDimitry Andric // Also look for SROA candidates here. 1718480093f4SDimitry Andric if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0))) 17190b57cec5SDimitry Andric SROAArgValues[&I] = SROAArg; 17200b57cec5SDimitry Andric 17210b57cec5SDimitry Andric // Bitcasts are always zero cost. 17220b57cec5SDimitry Andric return true; 17230b57cec5SDimitry Andric } 17240b57cec5SDimitry Andric 17250b57cec5SDimitry Andric bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { 17260b57cec5SDimitry Andric // Propagate constants through ptrtoint. 172781ad6265SDimitry Andric if (simplifyInstruction(I)) 17280b57cec5SDimitry Andric return true; 17290b57cec5SDimitry Andric 17300b57cec5SDimitry Andric // Track base/offset pairs when converted to a plain integer provided the 17310b57cec5SDimitry Andric // integer is large enough to represent the pointer. 17320b57cec5SDimitry Andric unsigned IntegerSize = I.getType()->getScalarSizeInBits(); 17330b57cec5SDimitry Andric unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace(); 1734e8d8bef9SDimitry Andric if (IntegerSize == DL.getPointerSizeInBits(AS)) { 17350b57cec5SDimitry Andric std::pair<Value *, APInt> BaseAndOffset = 17360b57cec5SDimitry Andric ConstantOffsetPtrs.lookup(I.getOperand(0)); 17370b57cec5SDimitry Andric if (BaseAndOffset.first) 17380b57cec5SDimitry Andric ConstantOffsetPtrs[&I] = BaseAndOffset; 17390b57cec5SDimitry Andric } 17400b57cec5SDimitry Andric 17410b57cec5SDimitry Andric // This is really weird. Technically, ptrtoint will disable SROA. However, 17420b57cec5SDimitry Andric // unless that ptrtoint is *used* somewhere in the live basic blocks after 17430b57cec5SDimitry Andric // inlining, it will be nuked, and SROA should proceed. All of the uses which 17440b57cec5SDimitry Andric // would block SROA would also block SROA if applied directly to a pointer, 17450b57cec5SDimitry Andric // and so we can just add the integer in here. The only places where SROA is 17460b57cec5SDimitry Andric // preserved either cannot fire on an integer, or won't in-and-of themselves 17470b57cec5SDimitry Andric // disable SROA (ext) w/o some later use that we would see and disable. 1748480093f4SDimitry Andric if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0))) 17490b57cec5SDimitry Andric SROAArgValues[&I] = SROAArg; 17500b57cec5SDimitry Andric 1751bdd1243dSDimitry Andric return TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == 1752fe6060f1SDimitry Andric TargetTransformInfo::TCC_Free; 17530b57cec5SDimitry Andric } 17540b57cec5SDimitry Andric 17550b57cec5SDimitry Andric bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { 17560b57cec5SDimitry Andric // Propagate constants through ptrtoint. 175781ad6265SDimitry Andric if (simplifyInstruction(I)) 17580b57cec5SDimitry Andric return true; 17590b57cec5SDimitry Andric 17600b57cec5SDimitry Andric // Track base/offset pairs when round-tripped through a pointer without 17610b57cec5SDimitry Andric // modifications provided the integer is not too large. 17620b57cec5SDimitry Andric Value *Op = I.getOperand(0); 17630b57cec5SDimitry Andric unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); 17640b57cec5SDimitry Andric if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) { 17650b57cec5SDimitry Andric std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op); 17660b57cec5SDimitry Andric if (BaseAndOffset.first) 17670b57cec5SDimitry Andric ConstantOffsetPtrs[&I] = BaseAndOffset; 17680b57cec5SDimitry Andric } 17690b57cec5SDimitry Andric 17700b57cec5SDimitry Andric // "Propagate" SROA here in the same manner as we do for ptrtoint above. 1771480093f4SDimitry Andric if (auto *SROAArg = getSROAArgForValueOrNull(Op)) 17720b57cec5SDimitry Andric SROAArgValues[&I] = SROAArg; 17730b57cec5SDimitry Andric 1774bdd1243dSDimitry Andric return TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == 1775fe6060f1SDimitry Andric TargetTransformInfo::TCC_Free; 17760b57cec5SDimitry Andric } 17770b57cec5SDimitry Andric 17780b57cec5SDimitry Andric bool CallAnalyzer::visitCastInst(CastInst &I) { 17790b57cec5SDimitry Andric // Propagate constants through casts. 178081ad6265SDimitry Andric if (simplifyInstruction(I)) 17810b57cec5SDimitry Andric return true; 17820b57cec5SDimitry Andric 17835ffd83dbSDimitry Andric // Disable SROA in the face of arbitrary casts we don't explicitly list 17845ffd83dbSDimitry Andric // elsewhere. 17850b57cec5SDimitry Andric disableSROA(I.getOperand(0)); 17860b57cec5SDimitry Andric 17870b57cec5SDimitry Andric // If this is a floating-point cast, and the target says this operation 17880b57cec5SDimitry Andric // is expensive, this may eventually become a library call. Treat the cost 17890b57cec5SDimitry Andric // as such. 17900b57cec5SDimitry Andric switch (I.getOpcode()) { 17910b57cec5SDimitry Andric case Instruction::FPTrunc: 17920b57cec5SDimitry Andric case Instruction::FPExt: 17930b57cec5SDimitry Andric case Instruction::UIToFP: 17940b57cec5SDimitry Andric case Instruction::SIToFP: 17950b57cec5SDimitry Andric case Instruction::FPToUI: 17960b57cec5SDimitry Andric case Instruction::FPToSI: 17970b57cec5SDimitry Andric if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) 1798480093f4SDimitry Andric onCallPenalty(); 17990b57cec5SDimitry Andric break; 18000b57cec5SDimitry Andric default: 18010b57cec5SDimitry Andric break; 18020b57cec5SDimitry Andric } 18030b57cec5SDimitry Andric 1804bdd1243dSDimitry Andric return TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == 1805fe6060f1SDimitry Andric TargetTransformInfo::TCC_Free; 18060b57cec5SDimitry Andric } 18070b57cec5SDimitry Andric 18080b57cec5SDimitry Andric bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) { 18090b57cec5SDimitry Andric return CandidateCall.paramHasAttr(A->getArgNo(), Attr); 18100b57cec5SDimitry Andric } 18110b57cec5SDimitry Andric 18120b57cec5SDimitry Andric bool CallAnalyzer::isKnownNonNullInCallee(Value *V) { 18130b57cec5SDimitry Andric // Does the *call site* have the NonNull attribute set on an argument? We 18140b57cec5SDimitry Andric // use the attribute on the call site to memoize any analysis done in the 18150b57cec5SDimitry Andric // caller. This will also trip if the callee function has a non-null 18160b57cec5SDimitry Andric // parameter attribute, but that's a less interesting case because hopefully 18170b57cec5SDimitry Andric // the callee would already have been simplified based on that. 18180b57cec5SDimitry Andric if (Argument *A = dyn_cast<Argument>(V)) 18190b57cec5SDimitry Andric if (paramHasAttr(A, Attribute::NonNull)) 18200b57cec5SDimitry Andric return true; 18210b57cec5SDimitry Andric 18220b57cec5SDimitry Andric // Is this an alloca in the caller? This is distinct from the attribute case 18230b57cec5SDimitry Andric // above because attributes aren't updated within the inliner itself and we 18240b57cec5SDimitry Andric // always want to catch the alloca derived case. 18250b57cec5SDimitry Andric if (isAllocaDerivedArg(V)) 18260b57cec5SDimitry Andric // We can actually predict the result of comparisons between an 18270b57cec5SDimitry Andric // alloca-derived value and null. Note that this fires regardless of 18280b57cec5SDimitry Andric // SROA firing. 18290b57cec5SDimitry Andric return true; 18300b57cec5SDimitry Andric 18310b57cec5SDimitry Andric return false; 18320b57cec5SDimitry Andric } 18330b57cec5SDimitry Andric 18340b57cec5SDimitry Andric bool CallAnalyzer::allowSizeGrowth(CallBase &Call) { 18350b57cec5SDimitry Andric // If the normal destination of the invoke or the parent block of the call 18360b57cec5SDimitry Andric // site is unreachable-terminated, there is little point in inlining this 18370b57cec5SDimitry Andric // unless there is literally zero cost. 18380b57cec5SDimitry Andric // FIXME: Note that it is possible that an unreachable-terminated block has a 18390b57cec5SDimitry Andric // hot entry. For example, in below scenario inlining hot_call_X() may be 18400b57cec5SDimitry Andric // beneficial : 18410b57cec5SDimitry Andric // main() { 18420b57cec5SDimitry Andric // hot_call_1(); 18430b57cec5SDimitry Andric // ... 18440b57cec5SDimitry Andric // hot_call_N() 18450b57cec5SDimitry Andric // exit(0); 18460b57cec5SDimitry Andric // } 18470b57cec5SDimitry Andric // For now, we are not handling this corner case here as it is rare in real 18480b57cec5SDimitry Andric // code. In future, we should elaborate this based on BPI and BFI in more 18490b57cec5SDimitry Andric // general threshold adjusting heuristics in updateThreshold(). 18500b57cec5SDimitry Andric if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) { 18510b57cec5SDimitry Andric if (isa<UnreachableInst>(II->getNormalDest()->getTerminator())) 18520b57cec5SDimitry Andric return false; 18530b57cec5SDimitry Andric } else if (isa<UnreachableInst>(Call.getParent()->getTerminator())) 18540b57cec5SDimitry Andric return false; 18550b57cec5SDimitry Andric 18560b57cec5SDimitry Andric return true; 18570b57cec5SDimitry Andric } 18580b57cec5SDimitry Andric 1859480093f4SDimitry Andric bool InlineCostCallAnalyzer::isColdCallSite(CallBase &Call, 18600b57cec5SDimitry Andric BlockFrequencyInfo *CallerBFI) { 18610b57cec5SDimitry Andric // If global profile summary is available, then callsite's coldness is 18620b57cec5SDimitry Andric // determined based on that. 18630b57cec5SDimitry Andric if (PSI && PSI->hasProfileSummary()) 18645ffd83dbSDimitry Andric return PSI->isColdCallSite(Call, CallerBFI); 18650b57cec5SDimitry Andric 18660b57cec5SDimitry Andric // Otherwise we need BFI to be available. 18670b57cec5SDimitry Andric if (!CallerBFI) 18680b57cec5SDimitry Andric return false; 18690b57cec5SDimitry Andric 18700b57cec5SDimitry Andric // Determine if the callsite is cold relative to caller's entry. We could 18710b57cec5SDimitry Andric // potentially cache the computation of scaled entry frequency, but the added 18720b57cec5SDimitry Andric // complexity is not worth it unless this scaling shows up high in the 18730b57cec5SDimitry Andric // profiles. 18740b57cec5SDimitry Andric const BranchProbability ColdProb(ColdCallSiteRelFreq, 100); 18750b57cec5SDimitry Andric auto CallSiteBB = Call.getParent(); 18760b57cec5SDimitry Andric auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB); 18770b57cec5SDimitry Andric auto CallerEntryFreq = 18780b57cec5SDimitry Andric CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock())); 18790b57cec5SDimitry Andric return CallSiteFreq < CallerEntryFreq * ColdProb; 18800b57cec5SDimitry Andric } 18810b57cec5SDimitry Andric 1882bdd1243dSDimitry Andric std::optional<int> 1883480093f4SDimitry Andric InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call, 18840b57cec5SDimitry Andric BlockFrequencyInfo *CallerBFI) { 18850b57cec5SDimitry Andric 18860b57cec5SDimitry Andric // If global profile summary is available, then callsite's hotness is 18870b57cec5SDimitry Andric // determined based on that. 18885ffd83dbSDimitry Andric if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI)) 18890b57cec5SDimitry Andric return Params.HotCallSiteThreshold; 18900b57cec5SDimitry Andric 18910b57cec5SDimitry Andric // Otherwise we need BFI to be available and to have a locally hot callsite 18920b57cec5SDimitry Andric // threshold. 18930b57cec5SDimitry Andric if (!CallerBFI || !Params.LocallyHotCallSiteThreshold) 1894bdd1243dSDimitry Andric return std::nullopt; 18950b57cec5SDimitry Andric 18960b57cec5SDimitry Andric // Determine if the callsite is hot relative to caller's entry. We could 18970b57cec5SDimitry Andric // potentially cache the computation of scaled entry frequency, but the added 18980b57cec5SDimitry Andric // complexity is not worth it unless this scaling shows up high in the 18990b57cec5SDimitry Andric // profiles. 19005f757f3fSDimitry Andric const BasicBlock *CallSiteBB = Call.getParent(); 19015f757f3fSDimitry Andric BlockFrequency CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB); 19025f757f3fSDimitry Andric BlockFrequency CallerEntryFreq = CallerBFI->getEntryFreq(); 19035f757f3fSDimitry Andric std::optional<BlockFrequency> Limit = CallerEntryFreq.mul(HotCallSiteRelFreq); 19045f757f3fSDimitry Andric if (Limit && CallSiteFreq >= *Limit) 19050b57cec5SDimitry Andric return Params.LocallyHotCallSiteThreshold; 19060b57cec5SDimitry Andric 19070b57cec5SDimitry Andric // Otherwise treat it normally. 1908bdd1243dSDimitry Andric return std::nullopt; 19090b57cec5SDimitry Andric } 19100b57cec5SDimitry Andric 1911480093f4SDimitry Andric void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) { 19120b57cec5SDimitry Andric // If no size growth is allowed for this inlining, set Threshold to 0. 19130b57cec5SDimitry Andric if (!allowSizeGrowth(Call)) { 19140b57cec5SDimitry Andric Threshold = 0; 19150b57cec5SDimitry Andric return; 19160b57cec5SDimitry Andric } 19170b57cec5SDimitry Andric 19180b57cec5SDimitry Andric Function *Caller = Call.getCaller(); 19190b57cec5SDimitry Andric 19200b57cec5SDimitry Andric // return min(A, B) if B is valid. 1921bdd1243dSDimitry Andric auto MinIfValid = [](int A, std::optional<int> B) { 1922bdd1243dSDimitry Andric return B ? std::min(A, *B) : A; 19230b57cec5SDimitry Andric }; 19240b57cec5SDimitry Andric 19250b57cec5SDimitry Andric // return max(A, B) if B is valid. 1926bdd1243dSDimitry Andric auto MaxIfValid = [](int A, std::optional<int> B) { 1927bdd1243dSDimitry Andric return B ? std::max(A, *B) : A; 19280b57cec5SDimitry Andric }; 19290b57cec5SDimitry Andric 19300b57cec5SDimitry Andric // Various bonus percentages. These are multiplied by Threshold to get the 19310b57cec5SDimitry Andric // bonus values. 19320b57cec5SDimitry Andric // SingleBBBonus: This bonus is applied if the callee has a single reachable 19330b57cec5SDimitry Andric // basic block at the given callsite context. This is speculatively applied 19340b57cec5SDimitry Andric // and withdrawn if more than one basic block is seen. 19350b57cec5SDimitry Andric // 19360b57cec5SDimitry Andric // LstCallToStaticBonus: This large bonus is applied to ensure the inlining 19370b57cec5SDimitry Andric // of the last call to a static function as inlining such functions is 19380b57cec5SDimitry Andric // guaranteed to reduce code size. 19390b57cec5SDimitry Andric // 19400b57cec5SDimitry Andric // These bonus percentages may be set to 0 based on properties of the caller 19410b57cec5SDimitry Andric // and the callsite. 19420b57cec5SDimitry Andric int SingleBBBonusPercent = 50; 19430b57cec5SDimitry Andric int VectorBonusPercent = TTI.getInlinerVectorBonusPercent(); 19440b57cec5SDimitry Andric int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus; 19450b57cec5SDimitry Andric 19460b57cec5SDimitry Andric // Lambda to set all the above bonus and bonus percentages to 0. 19470b57cec5SDimitry Andric auto DisallowAllBonuses = [&]() { 19480b57cec5SDimitry Andric SingleBBBonusPercent = 0; 19490b57cec5SDimitry Andric VectorBonusPercent = 0; 19500b57cec5SDimitry Andric LastCallToStaticBonus = 0; 19510b57cec5SDimitry Andric }; 19520b57cec5SDimitry Andric 19530b57cec5SDimitry Andric // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available 19540b57cec5SDimitry Andric // and reduce the threshold if the caller has the necessary attribute. 19550b57cec5SDimitry Andric if (Caller->hasMinSize()) { 19560b57cec5SDimitry Andric Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold); 19570b57cec5SDimitry Andric // For minsize, we want to disable the single BB bonus and the vector 19580b57cec5SDimitry Andric // bonuses, but not the last-call-to-static bonus. Inlining the last call to 19590b57cec5SDimitry Andric // a static function will, at the minimum, eliminate the parameter setup and 19600b57cec5SDimitry Andric // call/return instructions. 19610b57cec5SDimitry Andric SingleBBBonusPercent = 0; 19620b57cec5SDimitry Andric VectorBonusPercent = 0; 19630b57cec5SDimitry Andric } else if (Caller->hasOptSize()) 19640b57cec5SDimitry Andric Threshold = MinIfValid(Threshold, Params.OptSizeThreshold); 19650b57cec5SDimitry Andric 19660b57cec5SDimitry Andric // Adjust the threshold based on inlinehint attribute and profile based 19670b57cec5SDimitry Andric // hotness information if the caller does not have MinSize attribute. 19680b57cec5SDimitry Andric if (!Caller->hasMinSize()) { 19690b57cec5SDimitry Andric if (Callee.hasFnAttribute(Attribute::InlineHint)) 19700b57cec5SDimitry Andric Threshold = MaxIfValid(Threshold, Params.HintThreshold); 19710b57cec5SDimitry Andric 19720b57cec5SDimitry Andric // FIXME: After switching to the new passmanager, simplify the logic below 19730b57cec5SDimitry Andric // by checking only the callsite hotness/coldness as we will reliably 19740b57cec5SDimitry Andric // have local profile information. 19750b57cec5SDimitry Andric // 19760b57cec5SDimitry Andric // Callsite hotness and coldness can be determined if sample profile is 19770b57cec5SDimitry Andric // used (which adds hotness metadata to calls) or if caller's 19780b57cec5SDimitry Andric // BlockFrequencyInfo is available. 19795ffd83dbSDimitry Andric BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr; 19800b57cec5SDimitry Andric auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI); 19810b57cec5SDimitry Andric if (!Caller->hasOptSize() && HotCallSiteThreshold) { 19820b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Hot callsite.\n"); 19830b57cec5SDimitry Andric // FIXME: This should update the threshold only if it exceeds the 19840b57cec5SDimitry Andric // current threshold, but AutoFDO + ThinLTO currently relies on this 19850b57cec5SDimitry Andric // behavior to prevent inlining of hot callsites during ThinLTO 19860b57cec5SDimitry Andric // compile phase. 198781ad6265SDimitry Andric Threshold = *HotCallSiteThreshold; 19880b57cec5SDimitry Andric } else if (isColdCallSite(Call, CallerBFI)) { 19890b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Cold callsite.\n"); 19900b57cec5SDimitry Andric // Do not apply bonuses for a cold callsite including the 19910b57cec5SDimitry Andric // LastCallToStatic bonus. While this bonus might result in code size 19920b57cec5SDimitry Andric // reduction, it can cause the size of a non-cold caller to increase 19930b57cec5SDimitry Andric // preventing it from being inlined. 19940b57cec5SDimitry Andric DisallowAllBonuses(); 19950b57cec5SDimitry Andric Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold); 19960b57cec5SDimitry Andric } else if (PSI) { 19970b57cec5SDimitry Andric // Use callee's global profile information only if we have no way of 19980b57cec5SDimitry Andric // determining this via callsite information. 19990b57cec5SDimitry Andric if (PSI->isFunctionEntryHot(&Callee)) { 20000b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Hot callee.\n"); 20010b57cec5SDimitry Andric // If callsite hotness can not be determined, we may still know 20020b57cec5SDimitry Andric // that the callee is hot and treat it as a weaker hint for threshold 20030b57cec5SDimitry Andric // increase. 20040b57cec5SDimitry Andric Threshold = MaxIfValid(Threshold, Params.HintThreshold); 20050b57cec5SDimitry Andric } else if (PSI->isFunctionEntryCold(&Callee)) { 20060b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Cold callee.\n"); 20070b57cec5SDimitry Andric // Do not apply bonuses for a cold callee including the 20080b57cec5SDimitry Andric // LastCallToStatic bonus. While this bonus might result in code size 20090b57cec5SDimitry Andric // reduction, it can cause the size of a non-cold caller to increase 20100b57cec5SDimitry Andric // preventing it from being inlined. 20110b57cec5SDimitry Andric DisallowAllBonuses(); 20120b57cec5SDimitry Andric Threshold = MinIfValid(Threshold, Params.ColdThreshold); 20130b57cec5SDimitry Andric } 20140b57cec5SDimitry Andric } 20150b57cec5SDimitry Andric } 20160b57cec5SDimitry Andric 2017fe6060f1SDimitry Andric Threshold += TTI.adjustInliningThreshold(&Call); 2018fe6060f1SDimitry Andric 20190b57cec5SDimitry Andric // Finally, take the target-specific inlining threshold multiplier into 20200b57cec5SDimitry Andric // account. 20210b57cec5SDimitry Andric Threshold *= TTI.getInliningThresholdMultiplier(); 20220b57cec5SDimitry Andric 20230b57cec5SDimitry Andric SingleBBBonus = Threshold * SingleBBBonusPercent / 100; 20240b57cec5SDimitry Andric VectorBonus = Threshold * VectorBonusPercent / 100; 20250b57cec5SDimitry Andric 20260b57cec5SDimitry Andric // If there is only one call of the function, and it has internal linkage, 20270b57cec5SDimitry Andric // the cost of inlining it drops dramatically. It may seem odd to update 20280b57cec5SDimitry Andric // Cost in updateThreshold, but the bonus depends on the logic in this method. 2029bdd1243dSDimitry Andric if (isSoleCallToLocalFunction(Call, F)) { 20300b57cec5SDimitry Andric Cost -= LastCallToStaticBonus; 2031bdd1243dSDimitry Andric StaticBonusApplied = LastCallToStaticBonus; 2032bdd1243dSDimitry Andric } 20330b57cec5SDimitry Andric } 20340b57cec5SDimitry Andric 20350b57cec5SDimitry Andric bool CallAnalyzer::visitCmpInst(CmpInst &I) { 20360b57cec5SDimitry Andric Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 20370b57cec5SDimitry Andric // First try to handle simplified comparisons. 203881ad6265SDimitry Andric if (simplifyInstruction(I)) 20390b57cec5SDimitry Andric return true; 20400b57cec5SDimitry Andric 20410b57cec5SDimitry Andric if (I.getOpcode() == Instruction::FCmp) 20420b57cec5SDimitry Andric return false; 20430b57cec5SDimitry Andric 20440b57cec5SDimitry Andric // Otherwise look for a comparison between constant offset pointers with 20450b57cec5SDimitry Andric // a common base. 20460b57cec5SDimitry Andric Value *LHSBase, *RHSBase; 20470b57cec5SDimitry Andric APInt LHSOffset, RHSOffset; 20480b57cec5SDimitry Andric std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); 20490b57cec5SDimitry Andric if (LHSBase) { 20500b57cec5SDimitry Andric std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); 20510b57cec5SDimitry Andric if (RHSBase && LHSBase == RHSBase) { 20520b57cec5SDimitry Andric // We have common bases, fold the icmp to a constant based on the 20530b57cec5SDimitry Andric // offsets. 2054*0fca6ea1SDimitry Andric SimplifiedValues[&I] = ConstantInt::getBool( 2055*0fca6ea1SDimitry Andric I.getType(), 2056*0fca6ea1SDimitry Andric ICmpInst::compare(LHSOffset, RHSOffset, I.getPredicate())); 20570b57cec5SDimitry Andric ++NumConstantPtrCmps; 20580b57cec5SDimitry Andric return true; 20590b57cec5SDimitry Andric } 20600b57cec5SDimitry Andric } 20610b57cec5SDimitry Andric 206206c3fb27SDimitry Andric auto isImplicitNullCheckCmp = [](const CmpInst &I) { 206306c3fb27SDimitry Andric for (auto *User : I.users()) 206406c3fb27SDimitry Andric if (auto *Instr = dyn_cast<Instruction>(User)) 206506c3fb27SDimitry Andric if (!Instr->getMetadata(LLVMContext::MD_make_implicit)) 206606c3fb27SDimitry Andric return false; 206706c3fb27SDimitry Andric return true; 206806c3fb27SDimitry Andric }; 206906c3fb27SDimitry Andric 20700b57cec5SDimitry Andric // If the comparison is an equality comparison with null, we can simplify it 20710b57cec5SDimitry Andric // if we know the value (argument) can't be null 207206c3fb27SDimitry Andric if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1))) { 207306c3fb27SDimitry Andric if (isKnownNonNullInCallee(I.getOperand(0))) { 20740b57cec5SDimitry Andric bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; 20750b57cec5SDimitry Andric SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) 20760b57cec5SDimitry Andric : ConstantInt::getFalse(I.getType()); 20770b57cec5SDimitry Andric return true; 20780b57cec5SDimitry Andric } 207906c3fb27SDimitry Andric // Implicit null checks act as unconditional branches and their comparisons 208006c3fb27SDimitry Andric // should be treated as simplified and free of cost. 208106c3fb27SDimitry Andric if (isImplicitNullCheckCmp(I)) 208206c3fb27SDimitry Andric return true; 208306c3fb27SDimitry Andric } 2084480093f4SDimitry Andric return handleSROA(I.getOperand(0), isa<ConstantPointerNull>(I.getOperand(1))); 20850b57cec5SDimitry Andric } 20860b57cec5SDimitry Andric 20870b57cec5SDimitry Andric bool CallAnalyzer::visitSub(BinaryOperator &I) { 20880b57cec5SDimitry Andric // Try to handle a special case: we can fold computing the difference of two 20890b57cec5SDimitry Andric // constant-related pointers. 20900b57cec5SDimitry Andric Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 20910b57cec5SDimitry Andric Value *LHSBase, *RHSBase; 20920b57cec5SDimitry Andric APInt LHSOffset, RHSOffset; 20930b57cec5SDimitry Andric std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); 20940b57cec5SDimitry Andric if (LHSBase) { 20950b57cec5SDimitry Andric std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); 20960b57cec5SDimitry Andric if (RHSBase && LHSBase == RHSBase) { 20970b57cec5SDimitry Andric // We have common bases, fold the subtract to a constant based on the 20980b57cec5SDimitry Andric // offsets. 20990b57cec5SDimitry Andric Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); 21000b57cec5SDimitry Andric Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); 21010b57cec5SDimitry Andric if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { 21020b57cec5SDimitry Andric SimplifiedValues[&I] = C; 21030b57cec5SDimitry Andric ++NumConstantPtrDiffs; 21040b57cec5SDimitry Andric return true; 21050b57cec5SDimitry Andric } 21060b57cec5SDimitry Andric } 21070b57cec5SDimitry Andric } 21080b57cec5SDimitry Andric 21090b57cec5SDimitry Andric // Otherwise, fall back to the generic logic for simplifying and handling 21100b57cec5SDimitry Andric // instructions. 21110b57cec5SDimitry Andric return Base::visitSub(I); 21120b57cec5SDimitry Andric } 21130b57cec5SDimitry Andric 21140b57cec5SDimitry Andric bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { 21150b57cec5SDimitry Andric Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 21160b57cec5SDimitry Andric Constant *CLHS = dyn_cast<Constant>(LHS); 21170b57cec5SDimitry Andric if (!CLHS) 21180b57cec5SDimitry Andric CLHS = SimplifiedValues.lookup(LHS); 21190b57cec5SDimitry Andric Constant *CRHS = dyn_cast<Constant>(RHS); 21200b57cec5SDimitry Andric if (!CRHS) 21210b57cec5SDimitry Andric CRHS = SimplifiedValues.lookup(RHS); 21220b57cec5SDimitry Andric 21230b57cec5SDimitry Andric Value *SimpleV = nullptr; 21240b57cec5SDimitry Andric if (auto FI = dyn_cast<FPMathOperator>(&I)) 212581ad6265SDimitry Andric SimpleV = simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, 2126480093f4SDimitry Andric FI->getFastMathFlags(), DL); 21270b57cec5SDimitry Andric else 21280b57cec5SDimitry Andric SimpleV = 212981ad6265SDimitry Andric simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL); 21300b57cec5SDimitry Andric 21310b57cec5SDimitry Andric if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) 21320b57cec5SDimitry Andric SimplifiedValues[&I] = C; 21330b57cec5SDimitry Andric 21340b57cec5SDimitry Andric if (SimpleV) 21350b57cec5SDimitry Andric return true; 21360b57cec5SDimitry Andric 21370b57cec5SDimitry Andric // Disable any SROA on arguments to arbitrary, unsimplified binary operators. 21380b57cec5SDimitry Andric disableSROA(LHS); 21390b57cec5SDimitry Andric disableSROA(RHS); 21400b57cec5SDimitry Andric 21410b57cec5SDimitry Andric // If the instruction is floating point, and the target says this operation 21420b57cec5SDimitry Andric // is expensive, this may eventually become a library call. Treat the cost 21430b57cec5SDimitry Andric // as such. Unless it's fneg which can be implemented with an xor. 21440b57cec5SDimitry Andric using namespace llvm::PatternMatch; 21450b57cec5SDimitry Andric if (I.getType()->isFloatingPointTy() && 21460b57cec5SDimitry Andric TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive && 21470b57cec5SDimitry Andric !match(&I, m_FNeg(m_Value()))) 2148480093f4SDimitry Andric onCallPenalty(); 21490b57cec5SDimitry Andric 21500b57cec5SDimitry Andric return false; 21510b57cec5SDimitry Andric } 21520b57cec5SDimitry Andric 21530b57cec5SDimitry Andric bool CallAnalyzer::visitFNeg(UnaryOperator &I) { 21540b57cec5SDimitry Andric Value *Op = I.getOperand(0); 21550b57cec5SDimitry Andric Constant *COp = dyn_cast<Constant>(Op); 21560b57cec5SDimitry Andric if (!COp) 21570b57cec5SDimitry Andric COp = SimplifiedValues.lookup(Op); 21580b57cec5SDimitry Andric 215981ad6265SDimitry Andric Value *SimpleV = simplifyFNegInst( 2160480093f4SDimitry Andric COp ? COp : Op, cast<FPMathOperator>(I).getFastMathFlags(), DL); 21610b57cec5SDimitry Andric 21620b57cec5SDimitry Andric if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) 21630b57cec5SDimitry Andric SimplifiedValues[&I] = C; 21640b57cec5SDimitry Andric 21650b57cec5SDimitry Andric if (SimpleV) 21660b57cec5SDimitry Andric return true; 21670b57cec5SDimitry Andric 21680b57cec5SDimitry Andric // Disable any SROA on arguments to arbitrary, unsimplified fneg. 21690b57cec5SDimitry Andric disableSROA(Op); 21700b57cec5SDimitry Andric 21710b57cec5SDimitry Andric return false; 21720b57cec5SDimitry Andric } 21730b57cec5SDimitry Andric 21740b57cec5SDimitry Andric bool CallAnalyzer::visitLoad(LoadInst &I) { 2175480093f4SDimitry Andric if (handleSROA(I.getPointerOperand(), I.isSimple())) 21760b57cec5SDimitry Andric return true; 21770b57cec5SDimitry Andric 21780b57cec5SDimitry Andric // If the data is already loaded from this address and hasn't been clobbered 21790b57cec5SDimitry Andric // by any stores or calls, this load is likely to be redundant and can be 21800b57cec5SDimitry Andric // eliminated. 21810b57cec5SDimitry Andric if (EnableLoadElimination && 21820b57cec5SDimitry Andric !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) { 2183480093f4SDimitry Andric onLoadEliminationOpportunity(); 21840b57cec5SDimitry Andric return true; 21850b57cec5SDimitry Andric } 21860b57cec5SDimitry Andric 2187bdd1243dSDimitry Andric onMemAccess(); 21880b57cec5SDimitry Andric return false; 21890b57cec5SDimitry Andric } 21900b57cec5SDimitry Andric 21910b57cec5SDimitry Andric bool CallAnalyzer::visitStore(StoreInst &I) { 2192480093f4SDimitry Andric if (handleSROA(I.getPointerOperand(), I.isSimple())) 21930b57cec5SDimitry Andric return true; 21940b57cec5SDimitry Andric 21950b57cec5SDimitry Andric // The store can potentially clobber loads and prevent repeated loads from 21960b57cec5SDimitry Andric // being eliminated. 21970b57cec5SDimitry Andric // FIXME: 21980b57cec5SDimitry Andric // 1. We can probably keep an initial set of eliminatable loads substracted 21990b57cec5SDimitry Andric // from the cost even when we finally see a store. We just need to disable 22000b57cec5SDimitry Andric // *further* accumulation of elimination savings. 22010b57cec5SDimitry Andric // 2. We should probably at some point thread MemorySSA for the callee into 22020b57cec5SDimitry Andric // this and then use that to actually compute *really* precise savings. 22030b57cec5SDimitry Andric disableLoadElimination(); 2204bdd1243dSDimitry Andric 2205bdd1243dSDimitry Andric onMemAccess(); 22060b57cec5SDimitry Andric return false; 22070b57cec5SDimitry Andric } 22080b57cec5SDimitry Andric 22090b57cec5SDimitry Andric bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { 22100b57cec5SDimitry Andric // Constant folding for extract value is trivial. 221181ad6265SDimitry Andric if (simplifyInstruction(I)) 22120b57cec5SDimitry Andric return true; 22130b57cec5SDimitry Andric 2214fe6060f1SDimitry Andric // SROA can't look through these, but they may be free. 2215fe6060f1SDimitry Andric return Base::visitExtractValue(I); 22160b57cec5SDimitry Andric } 22170b57cec5SDimitry Andric 22180b57cec5SDimitry Andric bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { 22190b57cec5SDimitry Andric // Constant folding for insert value is trivial. 222081ad6265SDimitry Andric if (simplifyInstruction(I)) 22210b57cec5SDimitry Andric return true; 22220b57cec5SDimitry Andric 2223fe6060f1SDimitry Andric // SROA can't look through these, but they may be free. 2224fe6060f1SDimitry Andric return Base::visitInsertValue(I); 22250b57cec5SDimitry Andric } 22260b57cec5SDimitry Andric 22270b57cec5SDimitry Andric /// Try to simplify a call site. 22280b57cec5SDimitry Andric /// 22290b57cec5SDimitry Andric /// Takes a concrete function and callsite and tries to actually simplify it by 22300b57cec5SDimitry Andric /// analyzing the arguments and call itself with instsimplify. Returns true if 22310b57cec5SDimitry Andric /// it has simplified the callsite to some other entity (a constant), making it 22320b57cec5SDimitry Andric /// free. 22330b57cec5SDimitry Andric bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) { 22340b57cec5SDimitry Andric // FIXME: Using the instsimplify logic directly for this is inefficient 22350b57cec5SDimitry Andric // because we have to continually rebuild the argument list even when no 22360b57cec5SDimitry Andric // simplifications can be performed. Until that is fixed with remapping 22370b57cec5SDimitry Andric // inside of instsimplify, directly constant fold calls here. 22380b57cec5SDimitry Andric if (!canConstantFoldCallTo(&Call, F)) 22390b57cec5SDimitry Andric return false; 22400b57cec5SDimitry Andric 22410b57cec5SDimitry Andric // Try to re-map the arguments to constants. 22420b57cec5SDimitry Andric SmallVector<Constant *, 4> ConstantArgs; 22430b57cec5SDimitry Andric ConstantArgs.reserve(Call.arg_size()); 22440b57cec5SDimitry Andric for (Value *I : Call.args()) { 22450b57cec5SDimitry Andric Constant *C = dyn_cast<Constant>(I); 22460b57cec5SDimitry Andric if (!C) 22470b57cec5SDimitry Andric C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(I)); 22480b57cec5SDimitry Andric if (!C) 22490b57cec5SDimitry Andric return false; // This argument doesn't map to a constant. 22500b57cec5SDimitry Andric 22510b57cec5SDimitry Andric ConstantArgs.push_back(C); 22520b57cec5SDimitry Andric } 22530b57cec5SDimitry Andric if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) { 22540b57cec5SDimitry Andric SimplifiedValues[&Call] = C; 22550b57cec5SDimitry Andric return true; 22560b57cec5SDimitry Andric } 22570b57cec5SDimitry Andric 22580b57cec5SDimitry Andric return false; 22590b57cec5SDimitry Andric } 22600b57cec5SDimitry Andric 22610b57cec5SDimitry Andric bool CallAnalyzer::visitCallBase(CallBase &Call) { 2262349cc55cSDimitry Andric if (!onCallBaseVisitStart(Call)) 2263349cc55cSDimitry Andric return true; 2264349cc55cSDimitry Andric 22650b57cec5SDimitry Andric if (Call.hasFnAttr(Attribute::ReturnsTwice) && 22660b57cec5SDimitry Andric !F.hasFnAttribute(Attribute::ReturnsTwice)) { 22670b57cec5SDimitry Andric // This aborts the entire analysis. 22680b57cec5SDimitry Andric ExposesReturnsTwice = true; 22690b57cec5SDimitry Andric return false; 22700b57cec5SDimitry Andric } 22710b57cec5SDimitry Andric if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate()) 22720b57cec5SDimitry Andric ContainsNoDuplicateCall = true; 22730b57cec5SDimitry Andric 227481ad6265SDimitry Andric Function *F = Call.getCalledFunction(); 2275480093f4SDimitry Andric bool IsIndirectCall = !F; 2276480093f4SDimitry Andric if (IsIndirectCall) { 2277480093f4SDimitry Andric // Check if this happens to be an indirect function call to a known function 2278480093f4SDimitry Andric // in this inline context. If not, we've done all we can. 227981ad6265SDimitry Andric Value *Callee = Call.getCalledOperand(); 2280480093f4SDimitry Andric F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee)); 228181ad6265SDimitry Andric if (!F || F->getFunctionType() != Call.getFunctionType()) { 2282480093f4SDimitry Andric onCallArgumentSetup(Call); 2283480093f4SDimitry Andric 2284480093f4SDimitry Andric if (!Call.onlyReadsMemory()) 2285480093f4SDimitry Andric disableLoadElimination(); 2286480093f4SDimitry Andric return Base::visitCallBase(Call); 2287480093f4SDimitry Andric } 2288480093f4SDimitry Andric } 2289480093f4SDimitry Andric 2290480093f4SDimitry Andric assert(F && "Expected a call to a known function"); 2291480093f4SDimitry Andric 22920b57cec5SDimitry Andric // When we have a concrete function, first try to simplify it directly. 22930b57cec5SDimitry Andric if (simplifyCallSite(F, Call)) 22940b57cec5SDimitry Andric return true; 22950b57cec5SDimitry Andric 22960b57cec5SDimitry Andric // Next check if it is an intrinsic we know about. 22970b57cec5SDimitry Andric // FIXME: Lift this into part of the InstVisitor. 22980b57cec5SDimitry Andric if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) { 22990b57cec5SDimitry Andric switch (II->getIntrinsicID()) { 23000b57cec5SDimitry Andric default: 23010b57cec5SDimitry Andric if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) 23020b57cec5SDimitry Andric disableLoadElimination(); 23030b57cec5SDimitry Andric return Base::visitCallBase(Call); 23040b57cec5SDimitry Andric 23050b57cec5SDimitry Andric case Intrinsic::load_relative: 2306480093f4SDimitry Andric onLoadRelativeIntrinsic(); 23070b57cec5SDimitry Andric return false; 23080b57cec5SDimitry Andric 23090b57cec5SDimitry Andric case Intrinsic::memset: 23100b57cec5SDimitry Andric case Intrinsic::memcpy: 23110b57cec5SDimitry Andric case Intrinsic::memmove: 23120b57cec5SDimitry Andric disableLoadElimination(); 23130b57cec5SDimitry Andric // SROA can usually chew through these intrinsics, but they aren't free. 23140b57cec5SDimitry Andric return false; 23150b57cec5SDimitry Andric case Intrinsic::icall_branch_funnel: 23160b57cec5SDimitry Andric case Intrinsic::localescape: 23170b57cec5SDimitry Andric HasUninlineableIntrinsic = true; 23180b57cec5SDimitry Andric return false; 23190b57cec5SDimitry Andric case Intrinsic::vastart: 23200b57cec5SDimitry Andric InitsVargArgs = true; 23210b57cec5SDimitry Andric return false; 2322fe6060f1SDimitry Andric case Intrinsic::launder_invariant_group: 2323fe6060f1SDimitry Andric case Intrinsic::strip_invariant_group: 2324fe6060f1SDimitry Andric if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0))) 2325fe6060f1SDimitry Andric SROAArgValues[II] = SROAArg; 2326fe6060f1SDimitry Andric return true; 2327349cc55cSDimitry Andric case Intrinsic::is_constant: 2328349cc55cSDimitry Andric return simplifyIntrinsicCallIsConstant(Call); 2329bdd1243dSDimitry Andric case Intrinsic::objectsize: 2330bdd1243dSDimitry Andric return simplifyIntrinsicCallObjectSize(Call); 23310b57cec5SDimitry Andric } 23320b57cec5SDimitry Andric } 23330b57cec5SDimitry Andric 23340b57cec5SDimitry Andric if (F == Call.getFunction()) { 23350b57cec5SDimitry Andric // This flag will fully abort the analysis, so don't bother with anything 23360b57cec5SDimitry Andric // else. 23370b57cec5SDimitry Andric IsRecursiveCall = true; 2338349cc55cSDimitry Andric if (!AllowRecursiveCall) 23390b57cec5SDimitry Andric return false; 23400b57cec5SDimitry Andric } 23410b57cec5SDimitry Andric 23420b57cec5SDimitry Andric if (TTI.isLoweredToCall(F)) { 2343480093f4SDimitry Andric onLoweredCall(F, Call, IsIndirectCall); 23440b57cec5SDimitry Andric } 23450b57cec5SDimitry Andric 2346480093f4SDimitry Andric if (!(Call.onlyReadsMemory() || (IsIndirectCall && F->onlyReadsMemory()))) 23470b57cec5SDimitry Andric disableLoadElimination(); 23480b57cec5SDimitry Andric return Base::visitCallBase(Call); 23490b57cec5SDimitry Andric } 23500b57cec5SDimitry Andric 23510b57cec5SDimitry Andric bool CallAnalyzer::visitReturnInst(ReturnInst &RI) { 23520b57cec5SDimitry Andric // At least one return instruction will be free after inlining. 23530b57cec5SDimitry Andric bool Free = !HasReturn; 23540b57cec5SDimitry Andric HasReturn = true; 23550b57cec5SDimitry Andric return Free; 23560b57cec5SDimitry Andric } 23570b57cec5SDimitry Andric 23580b57cec5SDimitry Andric bool CallAnalyzer::visitBranchInst(BranchInst &BI) { 23590b57cec5SDimitry Andric // We model unconditional branches as essentially free -- they really 23600b57cec5SDimitry Andric // shouldn't exist at all, but handling them makes the behavior of the 23610b57cec5SDimitry Andric // inliner more regular and predictable. Interestingly, conditional branches 23620b57cec5SDimitry Andric // which will fold away are also free. 23630b57cec5SDimitry Andric return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) || 236406c3fb27SDimitry Andric BI.getMetadata(LLVMContext::MD_make_implicit) || 2365349cc55cSDimitry Andric isa_and_nonnull<ConstantInt>( 23660b57cec5SDimitry Andric SimplifiedValues.lookup(BI.getCondition())); 23670b57cec5SDimitry Andric } 23680b57cec5SDimitry Andric 23690b57cec5SDimitry Andric bool CallAnalyzer::visitSelectInst(SelectInst &SI) { 23700b57cec5SDimitry Andric bool CheckSROA = SI.getType()->isPointerTy(); 23710b57cec5SDimitry Andric Value *TrueVal = SI.getTrueValue(); 23720b57cec5SDimitry Andric Value *FalseVal = SI.getFalseValue(); 23730b57cec5SDimitry Andric 23740b57cec5SDimitry Andric Constant *TrueC = dyn_cast<Constant>(TrueVal); 23750b57cec5SDimitry Andric if (!TrueC) 23760b57cec5SDimitry Andric TrueC = SimplifiedValues.lookup(TrueVal); 23770b57cec5SDimitry Andric Constant *FalseC = dyn_cast<Constant>(FalseVal); 23780b57cec5SDimitry Andric if (!FalseC) 23790b57cec5SDimitry Andric FalseC = SimplifiedValues.lookup(FalseVal); 23800b57cec5SDimitry Andric Constant *CondC = 23810b57cec5SDimitry Andric dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition())); 23820b57cec5SDimitry Andric 23830b57cec5SDimitry Andric if (!CondC) { 23840b57cec5SDimitry Andric // Select C, X, X => X 23850b57cec5SDimitry Andric if (TrueC == FalseC && TrueC) { 23860b57cec5SDimitry Andric SimplifiedValues[&SI] = TrueC; 23870b57cec5SDimitry Andric return true; 23880b57cec5SDimitry Andric } 23890b57cec5SDimitry Andric 23900b57cec5SDimitry Andric if (!CheckSROA) 23910b57cec5SDimitry Andric return Base::visitSelectInst(SI); 23920b57cec5SDimitry Andric 23930b57cec5SDimitry Andric std::pair<Value *, APInt> TrueBaseAndOffset = 23940b57cec5SDimitry Andric ConstantOffsetPtrs.lookup(TrueVal); 23950b57cec5SDimitry Andric std::pair<Value *, APInt> FalseBaseAndOffset = 23960b57cec5SDimitry Andric ConstantOffsetPtrs.lookup(FalseVal); 23970b57cec5SDimitry Andric if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) { 23980b57cec5SDimitry Andric ConstantOffsetPtrs[&SI] = TrueBaseAndOffset; 23990b57cec5SDimitry Andric 2400480093f4SDimitry Andric if (auto *SROAArg = getSROAArgForValueOrNull(TrueVal)) 24010b57cec5SDimitry Andric SROAArgValues[&SI] = SROAArg; 24020b57cec5SDimitry Andric return true; 24030b57cec5SDimitry Andric } 24040b57cec5SDimitry Andric 24050b57cec5SDimitry Andric return Base::visitSelectInst(SI); 24060b57cec5SDimitry Andric } 24070b57cec5SDimitry Andric 24080b57cec5SDimitry Andric // Select condition is a constant. 2409fe6060f1SDimitry Andric Value *SelectedV = CondC->isAllOnesValue() ? TrueVal 2410fe6060f1SDimitry Andric : (CondC->isNullValue()) ? FalseVal 2411fe6060f1SDimitry Andric : nullptr; 24120b57cec5SDimitry Andric if (!SelectedV) { 24130b57cec5SDimitry Andric // Condition is a vector constant that is not all 1s or all 0s. If all 241406c3fb27SDimitry Andric // operands are constants, ConstantFoldSelectInstruction() can handle the 241506c3fb27SDimitry Andric // cases such as select vectors. 24160b57cec5SDimitry Andric if (TrueC && FalseC) { 241706c3fb27SDimitry Andric if (auto *C = ConstantFoldSelectInstruction(CondC, TrueC, FalseC)) { 24180b57cec5SDimitry Andric SimplifiedValues[&SI] = C; 24190b57cec5SDimitry Andric return true; 24200b57cec5SDimitry Andric } 24210b57cec5SDimitry Andric } 24220b57cec5SDimitry Andric return Base::visitSelectInst(SI); 24230b57cec5SDimitry Andric } 24240b57cec5SDimitry Andric 24250b57cec5SDimitry Andric // Condition is either all 1s or all 0s. SI can be simplified. 24260b57cec5SDimitry Andric if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) { 24270b57cec5SDimitry Andric SimplifiedValues[&SI] = SelectedC; 24280b57cec5SDimitry Andric return true; 24290b57cec5SDimitry Andric } 24300b57cec5SDimitry Andric 24310b57cec5SDimitry Andric if (!CheckSROA) 24320b57cec5SDimitry Andric return true; 24330b57cec5SDimitry Andric 24340b57cec5SDimitry Andric std::pair<Value *, APInt> BaseAndOffset = 24350b57cec5SDimitry Andric ConstantOffsetPtrs.lookup(SelectedV); 24360b57cec5SDimitry Andric if (BaseAndOffset.first) { 24370b57cec5SDimitry Andric ConstantOffsetPtrs[&SI] = BaseAndOffset; 24380b57cec5SDimitry Andric 2439480093f4SDimitry Andric if (auto *SROAArg = getSROAArgForValueOrNull(SelectedV)) 24400b57cec5SDimitry Andric SROAArgValues[&SI] = SROAArg; 24410b57cec5SDimitry Andric } 24420b57cec5SDimitry Andric 24430b57cec5SDimitry Andric return true; 24440b57cec5SDimitry Andric } 24450b57cec5SDimitry Andric 24460b57cec5SDimitry Andric bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { 24470b57cec5SDimitry Andric // We model unconditional switches as free, see the comments on handling 24480b57cec5SDimitry Andric // branches. 24490b57cec5SDimitry Andric if (isa<ConstantInt>(SI.getCondition())) 24500b57cec5SDimitry Andric return true; 24510b57cec5SDimitry Andric if (Value *V = SimplifiedValues.lookup(SI.getCondition())) 24520b57cec5SDimitry Andric if (isa<ConstantInt>(V)) 24530b57cec5SDimitry Andric return true; 24540b57cec5SDimitry Andric 24550b57cec5SDimitry Andric // Assume the most general case where the switch is lowered into 24560b57cec5SDimitry Andric // either a jump table, bit test, or a balanced binary tree consisting of 24570b57cec5SDimitry Andric // case clusters without merging adjacent clusters with the same 24580b57cec5SDimitry Andric // destination. We do not consider the switches that are lowered with a mix 24590b57cec5SDimitry Andric // of jump table/bit test/binary search tree. The cost of the switch is 24600b57cec5SDimitry Andric // proportional to the size of the tree or the size of jump table range. 24610b57cec5SDimitry Andric // 24620b57cec5SDimitry Andric // NB: We convert large switches which are just used to initialize large phi 2463fe6060f1SDimitry Andric // nodes to lookup tables instead in simplifycfg, so this shouldn't prevent 24640b57cec5SDimitry Andric // inlining those. It will prevent inlining in cases where the optimization 24650b57cec5SDimitry Andric // does not (yet) fire. 24660b57cec5SDimitry Andric 24670b57cec5SDimitry Andric unsigned JumpTableSize = 0; 24685ffd83dbSDimitry Andric BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(F)) : nullptr; 24690b57cec5SDimitry Andric unsigned NumCaseCluster = 2470480093f4SDimitry Andric TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize, PSI, BFI); 24710b57cec5SDimitry Andric 2472*0fca6ea1SDimitry Andric onFinalizeSwitch(JumpTableSize, NumCaseCluster, SI.defaultDestUndefined()); 24730b57cec5SDimitry Andric return false; 24740b57cec5SDimitry Andric } 24750b57cec5SDimitry Andric 24760b57cec5SDimitry Andric bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) { 24770b57cec5SDimitry Andric // We never want to inline functions that contain an indirectbr. This is 24780b57cec5SDimitry Andric // incorrect because all the blockaddress's (in static global initializers 24790b57cec5SDimitry Andric // for example) would be referring to the original function, and this 24800b57cec5SDimitry Andric // indirect jump would jump from the inlined copy of the function into the 24810b57cec5SDimitry Andric // original function which is extremely undefined behavior. 24820b57cec5SDimitry Andric // FIXME: This logic isn't really right; we can safely inline functions with 24830b57cec5SDimitry Andric // indirectbr's as long as no other function or global references the 24840b57cec5SDimitry Andric // blockaddress of a block within the current function. 24850b57cec5SDimitry Andric HasIndirectBr = true; 24860b57cec5SDimitry Andric return false; 24870b57cec5SDimitry Andric } 24880b57cec5SDimitry Andric 24890b57cec5SDimitry Andric bool CallAnalyzer::visitResumeInst(ResumeInst &RI) { 24900b57cec5SDimitry Andric // FIXME: It's not clear that a single instruction is an accurate model for 24910b57cec5SDimitry Andric // the inline cost of a resume instruction. 24920b57cec5SDimitry Andric return false; 24930b57cec5SDimitry Andric } 24940b57cec5SDimitry Andric 24950b57cec5SDimitry Andric bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) { 24960b57cec5SDimitry Andric // FIXME: It's not clear that a single instruction is an accurate model for 24970b57cec5SDimitry Andric // the inline cost of a cleanupret instruction. 24980b57cec5SDimitry Andric return false; 24990b57cec5SDimitry Andric } 25000b57cec5SDimitry Andric 25010b57cec5SDimitry Andric bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) { 25020b57cec5SDimitry Andric // FIXME: It's not clear that a single instruction is an accurate model for 25030b57cec5SDimitry Andric // the inline cost of a catchret instruction. 25040b57cec5SDimitry Andric return false; 25050b57cec5SDimitry Andric } 25060b57cec5SDimitry Andric 25070b57cec5SDimitry Andric bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) { 25080b57cec5SDimitry Andric // FIXME: It might be reasonably to discount the cost of instructions leading 25090b57cec5SDimitry Andric // to unreachable as they have the lowest possible impact on both runtime and 25100b57cec5SDimitry Andric // code size. 25110b57cec5SDimitry Andric return true; // No actual code is needed for unreachable. 25120b57cec5SDimitry Andric } 25130b57cec5SDimitry Andric 25140b57cec5SDimitry Andric bool CallAnalyzer::visitInstruction(Instruction &I) { 25150b57cec5SDimitry Andric // Some instructions are free. All of the free intrinsics can also be 25160b57cec5SDimitry Andric // handled by SROA, etc. 2517bdd1243dSDimitry Andric if (TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == 2518fe6060f1SDimitry Andric TargetTransformInfo::TCC_Free) 25190b57cec5SDimitry Andric return true; 25200b57cec5SDimitry Andric 25210b57cec5SDimitry Andric // We found something we don't understand or can't handle. Mark any SROA-able 25220b57cec5SDimitry Andric // values in the operand list as no longer viable. 2523e8d8bef9SDimitry Andric for (const Use &Op : I.operands()) 2524e8d8bef9SDimitry Andric disableSROA(Op); 25250b57cec5SDimitry Andric 25260b57cec5SDimitry Andric return false; 25270b57cec5SDimitry Andric } 25280b57cec5SDimitry Andric 25290b57cec5SDimitry Andric /// Analyze a basic block for its contribution to the inline cost. 25300b57cec5SDimitry Andric /// 25310b57cec5SDimitry Andric /// This method walks the analyzer over every instruction in the given basic 25320b57cec5SDimitry Andric /// block and accounts for their cost during inlining at this callsite. It 25330b57cec5SDimitry Andric /// aborts early if the threshold has been exceeded or an impossible to inline 25340b57cec5SDimitry Andric /// construct has been detected. It returns false if inlining is no longer 25350b57cec5SDimitry Andric /// viable, and true if inlining remains viable. 25360b57cec5SDimitry Andric InlineResult 25370b57cec5SDimitry Andric CallAnalyzer::analyzeBlock(BasicBlock *BB, 25380b57cec5SDimitry Andric SmallPtrSetImpl<const Value *> &EphValues) { 2539e8d8bef9SDimitry Andric for (Instruction &I : *BB) { 25400b57cec5SDimitry Andric // FIXME: Currently, the number of instructions in a function regardless of 25410b57cec5SDimitry Andric // our ability to simplify them during inline to constants or dead code, 25420b57cec5SDimitry Andric // are actually used by the vector bonus heuristic. As long as that's true, 25430b57cec5SDimitry Andric // we have to special case debug intrinsics here to prevent differences in 25440b57cec5SDimitry Andric // inlining due to debug symbols. Eventually, the number of unsimplified 25450b57cec5SDimitry Andric // instructions shouldn't factor into the cost computation, but until then, 25460b57cec5SDimitry Andric // hack around it here. 2547349cc55cSDimitry Andric // Similarly, skip pseudo-probes. 2548349cc55cSDimitry Andric if (I.isDebugOrPseudoInst()) 2549e8d8bef9SDimitry Andric continue; 2550e8d8bef9SDimitry Andric 25510b57cec5SDimitry Andric // Skip ephemeral values. 2552e8d8bef9SDimitry Andric if (EphValues.count(&I)) 25530b57cec5SDimitry Andric continue; 25540b57cec5SDimitry Andric 25550b57cec5SDimitry Andric ++NumInstructions; 2556e8d8bef9SDimitry Andric if (isa<ExtractElementInst>(I) || I.getType()->isVectorTy()) 25570b57cec5SDimitry Andric ++NumVectorInstructions; 25580b57cec5SDimitry Andric 25590b57cec5SDimitry Andric // If the instruction simplified to a constant, there is no cost to this 25600b57cec5SDimitry Andric // instruction. Visit the instructions using our InstVisitor to account for 25610b57cec5SDimitry Andric // all of the per-instruction logic. The visit tree returns true if we 25620b57cec5SDimitry Andric // consumed the instruction in any way, and false if the instruction's base 25630b57cec5SDimitry Andric // cost should count against inlining. 2564e8d8bef9SDimitry Andric onInstructionAnalysisStart(&I); 25655ffd83dbSDimitry Andric 2566e8d8bef9SDimitry Andric if (Base::visit(&I)) 25670b57cec5SDimitry Andric ++NumInstructionsSimplified; 25680b57cec5SDimitry Andric else 25695ffd83dbSDimitry Andric onMissedSimplification(); 25700b57cec5SDimitry Andric 2571e8d8bef9SDimitry Andric onInstructionAnalysisFinish(&I); 25720b57cec5SDimitry Andric using namespace ore; 25730b57cec5SDimitry Andric // If the visit this instruction detected an uninlinable pattern, abort. 25745ffd83dbSDimitry Andric InlineResult IR = InlineResult::success(); 2575349cc55cSDimitry Andric if (IsRecursiveCall && !AllowRecursiveCall) 25765ffd83dbSDimitry Andric IR = InlineResult::failure("recursive"); 25770b57cec5SDimitry Andric else if (ExposesReturnsTwice) 25785ffd83dbSDimitry Andric IR = InlineResult::failure("exposes returns twice"); 25790b57cec5SDimitry Andric else if (HasDynamicAlloca) 25805ffd83dbSDimitry Andric IR = InlineResult::failure("dynamic alloca"); 25810b57cec5SDimitry Andric else if (HasIndirectBr) 25825ffd83dbSDimitry Andric IR = InlineResult::failure("indirect branch"); 25830b57cec5SDimitry Andric else if (HasUninlineableIntrinsic) 25845ffd83dbSDimitry Andric IR = InlineResult::failure("uninlinable intrinsic"); 25850b57cec5SDimitry Andric else if (InitsVargArgs) 25865ffd83dbSDimitry Andric IR = InlineResult::failure("varargs"); 25875ffd83dbSDimitry Andric if (!IR.isSuccess()) { 25880b57cec5SDimitry Andric if (ORE) 25890b57cec5SDimitry Andric ORE->emit([&]() { 25900b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", 25910b57cec5SDimitry Andric &CandidateCall) 25920b57cec5SDimitry Andric << NV("Callee", &F) << " has uninlinable pattern (" 25935ffd83dbSDimitry Andric << NV("InlineResult", IR.getFailureReason()) 25940b57cec5SDimitry Andric << ") and cost is not fully computed"; 25950b57cec5SDimitry Andric }); 25960b57cec5SDimitry Andric return IR; 25970b57cec5SDimitry Andric } 25980b57cec5SDimitry Andric 25990b57cec5SDimitry Andric // If the caller is a recursive function then we don't want to inline 26000b57cec5SDimitry Andric // functions which allocate a lot of stack space because it would increase 26010b57cec5SDimitry Andric // the caller stack usage dramatically. 2602753f127fSDimitry Andric if (IsCallerRecursive && AllocatedSize > RecurStackSizeThreshold) { 26035ffd83dbSDimitry Andric auto IR = 26045ffd83dbSDimitry Andric InlineResult::failure("recursive and allocates too much stack space"); 26050b57cec5SDimitry Andric if (ORE) 26060b57cec5SDimitry Andric ORE->emit([&]() { 26070b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", 26080b57cec5SDimitry Andric &CandidateCall) 26095ffd83dbSDimitry Andric << NV("Callee", &F) << " is " 26105ffd83dbSDimitry Andric << NV("InlineResult", IR.getFailureReason()) 26110b57cec5SDimitry Andric << ". Cost is not fully computed"; 26120b57cec5SDimitry Andric }); 26130b57cec5SDimitry Andric return IR; 26140b57cec5SDimitry Andric } 26150b57cec5SDimitry Andric 2616480093f4SDimitry Andric if (shouldStop()) 26175ffd83dbSDimitry Andric return InlineResult::failure( 26185ffd83dbSDimitry Andric "Call site analysis is not favorable to inlining."); 26190b57cec5SDimitry Andric } 26200b57cec5SDimitry Andric 26215ffd83dbSDimitry Andric return InlineResult::success(); 26220b57cec5SDimitry Andric } 26230b57cec5SDimitry Andric 26240b57cec5SDimitry Andric /// Compute the base pointer and cumulative constant offsets for V. 26250b57cec5SDimitry Andric /// 26260b57cec5SDimitry Andric /// This strips all constant offsets off of V, leaving it the base pointer, and 26270b57cec5SDimitry Andric /// accumulates the total constant offset applied in the returned constant. It 26280b57cec5SDimitry Andric /// returns 0 if V is not a pointer, and returns the constant '0' if there are 26290b57cec5SDimitry Andric /// no constant offsets applied. 26300b57cec5SDimitry Andric ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { 26310b57cec5SDimitry Andric if (!V->getType()->isPointerTy()) 26320b57cec5SDimitry Andric return nullptr; 26330b57cec5SDimitry Andric 26340b57cec5SDimitry Andric unsigned AS = V->getType()->getPointerAddressSpace(); 26350b57cec5SDimitry Andric unsigned IntPtrWidth = DL.getIndexSizeInBits(AS); 2636349cc55cSDimitry Andric APInt Offset = APInt::getZero(IntPtrWidth); 26370b57cec5SDimitry Andric 26380b57cec5SDimitry Andric // Even though we don't look through PHI nodes, we could be called on an 26390b57cec5SDimitry Andric // instruction in an unreachable block, which may be on a cycle. 26400b57cec5SDimitry Andric SmallPtrSet<Value *, 4> Visited; 26410b57cec5SDimitry Andric Visited.insert(V); 26420b57cec5SDimitry Andric do { 26430b57cec5SDimitry Andric if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { 26440b57cec5SDimitry Andric if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) 26450b57cec5SDimitry Andric return nullptr; 26460b57cec5SDimitry Andric V = GEP->getPointerOperand(); 26470b57cec5SDimitry Andric } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { 26480b57cec5SDimitry Andric if (GA->isInterposable()) 26490b57cec5SDimitry Andric break; 26500b57cec5SDimitry Andric V = GA->getAliasee(); 26510b57cec5SDimitry Andric } else { 26520b57cec5SDimitry Andric break; 26530b57cec5SDimitry Andric } 26540b57cec5SDimitry Andric assert(V->getType()->isPointerTy() && "Unexpected operand type!"); 26550b57cec5SDimitry Andric } while (Visited.insert(V).second); 26560b57cec5SDimitry Andric 2657480093f4SDimitry Andric Type *IdxPtrTy = DL.getIndexType(V->getType()); 2658480093f4SDimitry Andric return cast<ConstantInt>(ConstantInt::get(IdxPtrTy, Offset)); 26590b57cec5SDimitry Andric } 26600b57cec5SDimitry Andric 26610b57cec5SDimitry Andric /// Find dead blocks due to deleted CFG edges during inlining. 26620b57cec5SDimitry Andric /// 26630b57cec5SDimitry Andric /// If we know the successor of the current block, \p CurrBB, has to be \p 26640b57cec5SDimitry Andric /// NextBB, the other successors of \p CurrBB are dead if these successors have 26650b57cec5SDimitry Andric /// no live incoming CFG edges. If one block is found to be dead, we can 26660b57cec5SDimitry Andric /// continue growing the dead block list by checking the successors of the dead 26670b57cec5SDimitry Andric /// blocks to see if all their incoming edges are dead or not. 26680b57cec5SDimitry Andric void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) { 26690b57cec5SDimitry Andric auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) { 26700b57cec5SDimitry Andric // A CFG edge is dead if the predecessor is dead or the predecessor has a 26710b57cec5SDimitry Andric // known successor which is not the one under exam. 26720b57cec5SDimitry Andric return (DeadBlocks.count(Pred) || 26730b57cec5SDimitry Andric (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ)); 26740b57cec5SDimitry Andric }; 26750b57cec5SDimitry Andric 26760b57cec5SDimitry Andric auto IsNewlyDead = [&](BasicBlock *BB) { 26770b57cec5SDimitry Andric // If all the edges to a block are dead, the block is also dead. 26780b57cec5SDimitry Andric return (!DeadBlocks.count(BB) && 26790b57cec5SDimitry Andric llvm::all_of(predecessors(BB), 26800b57cec5SDimitry Andric [&](BasicBlock *P) { return IsEdgeDead(P, BB); })); 26810b57cec5SDimitry Andric }; 26820b57cec5SDimitry Andric 26830b57cec5SDimitry Andric for (BasicBlock *Succ : successors(CurrBB)) { 26840b57cec5SDimitry Andric if (Succ == NextBB || !IsNewlyDead(Succ)) 26850b57cec5SDimitry Andric continue; 26860b57cec5SDimitry Andric SmallVector<BasicBlock *, 4> NewDead; 26870b57cec5SDimitry Andric NewDead.push_back(Succ); 26880b57cec5SDimitry Andric while (!NewDead.empty()) { 26890b57cec5SDimitry Andric BasicBlock *Dead = NewDead.pop_back_val(); 269081ad6265SDimitry Andric if (DeadBlocks.insert(Dead).second) 26910b57cec5SDimitry Andric // Continue growing the dead block lists. 26920b57cec5SDimitry Andric for (BasicBlock *S : successors(Dead)) 26930b57cec5SDimitry Andric if (IsNewlyDead(S)) 26940b57cec5SDimitry Andric NewDead.push_back(S); 26950b57cec5SDimitry Andric } 26960b57cec5SDimitry Andric } 26970b57cec5SDimitry Andric } 26980b57cec5SDimitry Andric 26990b57cec5SDimitry Andric /// Analyze a call site for potential inlining. 27000b57cec5SDimitry Andric /// 27010b57cec5SDimitry Andric /// Returns true if inlining this call is viable, and false if it is not 27020b57cec5SDimitry Andric /// viable. It computes the cost and adjusts the threshold based on numerous 27030b57cec5SDimitry Andric /// factors and heuristics. If this method returns false but the computed cost 27040b57cec5SDimitry Andric /// is below the computed threshold, then inlining was forcibly disabled by 27050b57cec5SDimitry Andric /// some artifact of the routine. 2706480093f4SDimitry Andric InlineResult CallAnalyzer::analyze() { 27070b57cec5SDimitry Andric ++NumCallsAnalyzed; 27080b57cec5SDimitry Andric 2709480093f4SDimitry Andric auto Result = onAnalysisStart(); 27105ffd83dbSDimitry Andric if (!Result.isSuccess()) 2711480093f4SDimitry Andric return Result; 27120b57cec5SDimitry Andric 27130b57cec5SDimitry Andric if (F.empty()) 27145ffd83dbSDimitry Andric return InlineResult::success(); 27150b57cec5SDimitry Andric 2716480093f4SDimitry Andric Function *Caller = CandidateCall.getFunction(); 27170b57cec5SDimitry Andric // Check if the caller function is recursive itself. 27180b57cec5SDimitry Andric for (User *U : Caller->users()) { 27190b57cec5SDimitry Andric CallBase *Call = dyn_cast<CallBase>(U); 27200b57cec5SDimitry Andric if (Call && Call->getFunction() == Caller) { 27210b57cec5SDimitry Andric IsCallerRecursive = true; 27220b57cec5SDimitry Andric break; 27230b57cec5SDimitry Andric } 27240b57cec5SDimitry Andric } 27250b57cec5SDimitry Andric 27260b57cec5SDimitry Andric // Populate our simplified values by mapping from function arguments to call 27270b57cec5SDimitry Andric // arguments with known important simplifications. 2728480093f4SDimitry Andric auto CAI = CandidateCall.arg_begin(); 2729e8d8bef9SDimitry Andric for (Argument &FAI : F.args()) { 2730480093f4SDimitry Andric assert(CAI != CandidateCall.arg_end()); 27310b57cec5SDimitry Andric if (Constant *C = dyn_cast<Constant>(CAI)) 2732e8d8bef9SDimitry Andric SimplifiedValues[&FAI] = C; 27330b57cec5SDimitry Andric 27340b57cec5SDimitry Andric Value *PtrArg = *CAI; 27350b57cec5SDimitry Andric if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { 2736e8d8bef9SDimitry Andric ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg, C->getValue()); 27370b57cec5SDimitry Andric 27380b57cec5SDimitry Andric // We can SROA any pointer arguments derived from alloca instructions. 2739480093f4SDimitry Andric if (auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) { 2740e8d8bef9SDimitry Andric SROAArgValues[&FAI] = SROAArg; 2741480093f4SDimitry Andric onInitializeSROAArg(SROAArg); 27425ffd83dbSDimitry Andric EnabledSROAAllocas.insert(SROAArg); 27430b57cec5SDimitry Andric } 27440b57cec5SDimitry Andric } 2745e8d8bef9SDimitry Andric ++CAI; 27460b57cec5SDimitry Andric } 27470b57cec5SDimitry Andric NumConstantArgs = SimplifiedValues.size(); 27480b57cec5SDimitry Andric NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); 27490b57cec5SDimitry Andric NumAllocaArgs = SROAArgValues.size(); 27500b57cec5SDimitry Andric 27510b57cec5SDimitry Andric // FIXME: If a caller has multiple calls to a callee, we end up recomputing 27520b57cec5SDimitry Andric // the ephemeral values multiple times (and they're completely determined by 27530b57cec5SDimitry Andric // the callee, so this is purely duplicate work). 27540b57cec5SDimitry Andric SmallPtrSet<const Value *, 32> EphValues; 27550b57cec5SDimitry Andric CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues); 27560b57cec5SDimitry Andric 27570b57cec5SDimitry Andric // The worklist of live basic blocks in the callee *after* inlining. We avoid 27580b57cec5SDimitry Andric // adding basic blocks of the callee which can be proven to be dead for this 27590b57cec5SDimitry Andric // particular call site in order to get more accurate cost estimates. This 27600b57cec5SDimitry Andric // requires a somewhat heavyweight iteration pattern: we need to walk the 27610b57cec5SDimitry Andric // basic blocks in a breadth-first order as we insert live successors. To 27620b57cec5SDimitry Andric // accomplish this, prioritizing for small iterations because we exit after 27630b57cec5SDimitry Andric // crossing our threshold, we use a small-size optimized SetVector. 276406c3fb27SDimitry Andric typedef SmallSetVector<BasicBlock *, 16> BBSetVector; 27650b57cec5SDimitry Andric BBSetVector BBWorklist; 27660b57cec5SDimitry Andric BBWorklist.insert(&F.getEntryBlock()); 2767480093f4SDimitry Andric 27680b57cec5SDimitry Andric // Note that we *must not* cache the size, this loop grows the worklist. 27690b57cec5SDimitry Andric for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { 2770480093f4SDimitry Andric if (shouldStop()) 27710b57cec5SDimitry Andric break; 27720b57cec5SDimitry Andric 27730b57cec5SDimitry Andric BasicBlock *BB = BBWorklist[Idx]; 27740b57cec5SDimitry Andric if (BB->empty()) 27750b57cec5SDimitry Andric continue; 27760b57cec5SDimitry Andric 2777e8d8bef9SDimitry Andric onBlockStart(BB); 2778e8d8bef9SDimitry Andric 27790b57cec5SDimitry Andric // Disallow inlining a blockaddress with uses other than strictly callbr. 27800b57cec5SDimitry Andric // A blockaddress only has defined behavior for an indirect branch in the 27810b57cec5SDimitry Andric // same function, and we do not currently support inlining indirect 27820b57cec5SDimitry Andric // branches. But, the inliner may not see an indirect branch that ends up 27830b57cec5SDimitry Andric // being dead code at a particular call site. If the blockaddress escapes 27840b57cec5SDimitry Andric // the function, e.g., via a global variable, inlining may lead to an 27850b57cec5SDimitry Andric // invalid cross-function reference. 27860b57cec5SDimitry Andric // FIXME: pr/39560: continue relaxing this overt restriction. 27870b57cec5SDimitry Andric if (BB->hasAddressTaken()) 27880b57cec5SDimitry Andric for (User *U : BlockAddress::get(&*BB)->users()) 27890b57cec5SDimitry Andric if (!isa<CallBrInst>(*U)) 27905ffd83dbSDimitry Andric return InlineResult::failure("blockaddress used outside of callbr"); 27910b57cec5SDimitry Andric 27920b57cec5SDimitry Andric // Analyze the cost of this block. If we blow through the threshold, this 27930b57cec5SDimitry Andric // returns false, and we can bail on out. 27940b57cec5SDimitry Andric InlineResult IR = analyzeBlock(BB, EphValues); 27955ffd83dbSDimitry Andric if (!IR.isSuccess()) 27960b57cec5SDimitry Andric return IR; 27970b57cec5SDimitry Andric 27980b57cec5SDimitry Andric Instruction *TI = BB->getTerminator(); 27990b57cec5SDimitry Andric 28000b57cec5SDimitry Andric // Add in the live successors by first checking whether we have terminator 28010b57cec5SDimitry Andric // that may be simplified based on the values simplified by this call. 28020b57cec5SDimitry Andric if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 28030b57cec5SDimitry Andric if (BI->isConditional()) { 28040b57cec5SDimitry Andric Value *Cond = BI->getCondition(); 28050b57cec5SDimitry Andric if (ConstantInt *SimpleCond = 28060b57cec5SDimitry Andric dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { 28070b57cec5SDimitry Andric BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0); 28080b57cec5SDimitry Andric BBWorklist.insert(NextBB); 28090b57cec5SDimitry Andric KnownSuccessors[BB] = NextBB; 28100b57cec5SDimitry Andric findDeadBlocks(BB, NextBB); 28110b57cec5SDimitry Andric continue; 28120b57cec5SDimitry Andric } 28130b57cec5SDimitry Andric } 28140b57cec5SDimitry Andric } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { 28150b57cec5SDimitry Andric Value *Cond = SI->getCondition(); 28160b57cec5SDimitry Andric if (ConstantInt *SimpleCond = 28170b57cec5SDimitry Andric dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { 28180b57cec5SDimitry Andric BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor(); 28190b57cec5SDimitry Andric BBWorklist.insert(NextBB); 28200b57cec5SDimitry Andric KnownSuccessors[BB] = NextBB; 28210b57cec5SDimitry Andric findDeadBlocks(BB, NextBB); 28220b57cec5SDimitry Andric continue; 28230b57cec5SDimitry Andric } 28240b57cec5SDimitry Andric } 28250b57cec5SDimitry Andric 28260b57cec5SDimitry Andric // If we're unable to select a particular successor, just count all of 28270b57cec5SDimitry Andric // them. 2828*0fca6ea1SDimitry Andric for (BasicBlock *Succ : successors(BB)) 2829*0fca6ea1SDimitry Andric BBWorklist.insert(Succ); 28300b57cec5SDimitry Andric 2831480093f4SDimitry Andric onBlockAnalyzed(BB); 28320b57cec5SDimitry Andric } 28330b57cec5SDimitry Andric 28340b57cec5SDimitry Andric // If this is a noduplicate call, we can still inline as long as 28350b57cec5SDimitry Andric // inlining this would cause the removal of the caller (so the instruction 28360b57cec5SDimitry Andric // is not actually duplicated, just moved). 2837bdd1243dSDimitry Andric if (!isSoleCallToLocalFunction(CandidateCall, F) && ContainsNoDuplicateCall) 28385ffd83dbSDimitry Andric return InlineResult::failure("noduplicate"); 28390b57cec5SDimitry Andric 284081ad6265SDimitry Andric // If the callee's stack size exceeds the user-specified threshold, 284181ad6265SDimitry Andric // do not let it be inlined. 2842bdd1243dSDimitry Andric // The command line option overrides a limit set in the function attributes. 2843bdd1243dSDimitry Andric size_t FinalStackSizeThreshold = StackSizeThreshold; 2844bdd1243dSDimitry Andric if (!StackSizeThreshold.getNumOccurrences()) 2845bdd1243dSDimitry Andric if (std::optional<int> AttrMaxStackSize = getStringFnAttrAsInt( 2846bdd1243dSDimitry Andric Caller, InlineConstants::MaxInlineStackSizeAttributeName)) 2847bdd1243dSDimitry Andric FinalStackSizeThreshold = *AttrMaxStackSize; 2848bdd1243dSDimitry Andric if (AllocatedSize > FinalStackSizeThreshold) 284981ad6265SDimitry Andric return InlineResult::failure("stacksize"); 285081ad6265SDimitry Andric 2851480093f4SDimitry Andric return finalizeAnalysis(); 28520b57cec5SDimitry Andric } 28530b57cec5SDimitry Andric 2854349cc55cSDimitry Andric void InlineCostCallAnalyzer::print(raw_ostream &OS) { 2855349cc55cSDimitry Andric #define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n" 28565ffd83dbSDimitry Andric if (PrintInstructionComments) 2857349cc55cSDimitry Andric F.print(OS, &Writer); 28580b57cec5SDimitry Andric DEBUG_PRINT_STAT(NumConstantArgs); 28590b57cec5SDimitry Andric DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); 28600b57cec5SDimitry Andric DEBUG_PRINT_STAT(NumAllocaArgs); 28610b57cec5SDimitry Andric DEBUG_PRINT_STAT(NumConstantPtrCmps); 28620b57cec5SDimitry Andric DEBUG_PRINT_STAT(NumConstantPtrDiffs); 28630b57cec5SDimitry Andric DEBUG_PRINT_STAT(NumInstructionsSimplified); 28640b57cec5SDimitry Andric DEBUG_PRINT_STAT(NumInstructions); 28650b57cec5SDimitry Andric DEBUG_PRINT_STAT(SROACostSavings); 28660b57cec5SDimitry Andric DEBUG_PRINT_STAT(SROACostSavingsLost); 28670b57cec5SDimitry Andric DEBUG_PRINT_STAT(LoadEliminationCost); 28680b57cec5SDimitry Andric DEBUG_PRINT_STAT(ContainsNoDuplicateCall); 28690b57cec5SDimitry Andric DEBUG_PRINT_STAT(Cost); 28700b57cec5SDimitry Andric DEBUG_PRINT_STAT(Threshold); 28710b57cec5SDimitry Andric #undef DEBUG_PRINT_STAT 28720b57cec5SDimitry Andric } 28735ffd83dbSDimitry Andric 28745ffd83dbSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 28755ffd83dbSDimitry Andric /// Dump stats about this call's analysis. 2876349cc55cSDimitry Andric LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { print(dbgs()); } 28770b57cec5SDimitry Andric #endif 28780b57cec5SDimitry Andric 28790b57cec5SDimitry Andric /// Test that there are no attribute conflicts between Caller and Callee 28800b57cec5SDimitry Andric /// that prevent inlining. 28815ffd83dbSDimitry Andric static bool functionsHaveCompatibleAttributes( 28824542f901SDimitry Andric Function *Caller, Function *Callee, TargetTransformInfo &TTI, 28835ffd83dbSDimitry Andric function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) { 28845ffd83dbSDimitry Andric // Note that CalleeTLI must be a copy not a reference. The legacy pass manager 28855ffd83dbSDimitry Andric // caches the most recently created TLI in the TargetLibraryInfoWrapperPass 28865ffd83dbSDimitry Andric // object, and always returns the same object (which is overwritten on each 28875ffd83dbSDimitry Andric // GetTLI call). Therefore we copy the first result. 28885ffd83dbSDimitry Andric auto CalleeTLI = GetTLI(*Callee); 28894542f901SDimitry Andric return (IgnoreTTIInlineCompatible || 28904542f901SDimitry Andric TTI.areInlineCompatible(Caller, Callee)) && 28914542f901SDimitry Andric GetTLI(*Caller).areInlineCompatible(CalleeTLI, 28925ffd83dbSDimitry Andric InlineCallerSupersetNoBuiltin) && 28930b57cec5SDimitry Andric AttributeFuncs::areInlineCompatible(*Caller, *Callee); 28940b57cec5SDimitry Andric } 28950b57cec5SDimitry Andric 28965f757f3fSDimitry Andric int llvm::getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call, 28975f757f3fSDimitry Andric const DataLayout &DL) { 2898bdd1243dSDimitry Andric int64_t Cost = 0; 28990b57cec5SDimitry Andric for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) { 29000b57cec5SDimitry Andric if (Call.isByValArgument(I)) { 29010b57cec5SDimitry Andric // We approximate the number of loads and stores needed by dividing the 29020b57cec5SDimitry Andric // size of the byval type by the target's pointer size. 29030b57cec5SDimitry Andric PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType()); 2904fe6060f1SDimitry Andric unsigned TypeSize = DL.getTypeSizeInBits(Call.getParamByValType(I)); 29050b57cec5SDimitry Andric unsigned AS = PTy->getAddressSpace(); 29060b57cec5SDimitry Andric unsigned PointerSize = DL.getPointerSizeInBits(AS); 29070b57cec5SDimitry Andric // Ceiling division. 29080b57cec5SDimitry Andric unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; 29090b57cec5SDimitry Andric 29100b57cec5SDimitry Andric // If it generates more than 8 stores it is likely to be expanded as an 29110b57cec5SDimitry Andric // inline memcpy so we take that as an upper bound. Otherwise we assume 29120b57cec5SDimitry Andric // one load and one store per word copied. 29130b57cec5SDimitry Andric // FIXME: The maxStoresPerMemcpy setting from the target should be used 29140b57cec5SDimitry Andric // here instead of a magic number of 8, but it's not available via 29150b57cec5SDimitry Andric // DataLayout. 29160b57cec5SDimitry Andric NumStores = std::min(NumStores, 8U); 29170b57cec5SDimitry Andric 2918bdd1243dSDimitry Andric Cost += 2 * NumStores * InstrCost; 29190b57cec5SDimitry Andric } else { 29200b57cec5SDimitry Andric // For non-byval arguments subtract off one instruction per call 29210b57cec5SDimitry Andric // argument. 2922bdd1243dSDimitry Andric Cost += InstrCost; 29230b57cec5SDimitry Andric } 29240b57cec5SDimitry Andric } 29250b57cec5SDimitry Andric // The call instruction also disappears after inlining. 2926bdd1243dSDimitry Andric Cost += InstrCost; 29275f757f3fSDimitry Andric Cost += TTI.getInlineCallPenalty(Call.getCaller(), Call, CallPenalty); 29285f757f3fSDimitry Andric 2929bdd1243dSDimitry Andric return std::min<int64_t>(Cost, INT_MAX); 29300b57cec5SDimitry Andric } 29310b57cec5SDimitry Andric 29320b57cec5SDimitry Andric InlineCost llvm::getInlineCost( 29330b57cec5SDimitry Andric CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, 29345ffd83dbSDimitry Andric function_ref<AssumptionCache &(Function &)> GetAssumptionCache, 29355ffd83dbSDimitry Andric function_ref<const TargetLibraryInfo &(Function &)> GetTLI, 29365ffd83dbSDimitry Andric function_ref<BlockFrequencyInfo &(Function &)> GetBFI, 29370b57cec5SDimitry Andric ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { 29380b57cec5SDimitry Andric return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI, 29395ffd83dbSDimitry Andric GetAssumptionCache, GetTLI, GetBFI, PSI, ORE); 29400b57cec5SDimitry Andric } 29410b57cec5SDimitry Andric 2942bdd1243dSDimitry Andric std::optional<int> llvm::getInliningCostEstimate( 29435ffd83dbSDimitry Andric CallBase &Call, TargetTransformInfo &CalleeTTI, 29445ffd83dbSDimitry Andric function_ref<AssumptionCache &(Function &)> GetAssumptionCache, 29455ffd83dbSDimitry Andric function_ref<BlockFrequencyInfo &(Function &)> GetBFI, 29460b57cec5SDimitry Andric ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { 29475ffd83dbSDimitry Andric const InlineParams Params = {/* DefaultThreshold*/ 0, 29485ffd83dbSDimitry Andric /*HintThreshold*/ {}, 29495ffd83dbSDimitry Andric /*ColdThreshold*/ {}, 29505ffd83dbSDimitry Andric /*OptSizeThreshold*/ {}, 29515ffd83dbSDimitry Andric /*OptMinSizeThreshold*/ {}, 29525ffd83dbSDimitry Andric /*HotCallSiteThreshold*/ {}, 29535ffd83dbSDimitry Andric /*LocallyHotCallSiteThreshold*/ {}, 29545ffd83dbSDimitry Andric /*ColdCallSiteThreshold*/ {}, 29555ffd83dbSDimitry Andric /*ComputeFullInlineCost*/ true, 29565ffd83dbSDimitry Andric /*EnableDeferral*/ true}; 29575ffd83dbSDimitry Andric 29585ffd83dbSDimitry Andric InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI, 29595ffd83dbSDimitry Andric GetAssumptionCache, GetBFI, PSI, ORE, true, 29605ffd83dbSDimitry Andric /*IgnoreThreshold*/ true); 29615ffd83dbSDimitry Andric auto R = CA.analyze(); 29625ffd83dbSDimitry Andric if (!R.isSuccess()) 2963bdd1243dSDimitry Andric return std::nullopt; 29645ffd83dbSDimitry Andric return CA.getCost(); 29655ffd83dbSDimitry Andric } 29665ffd83dbSDimitry Andric 2967bdd1243dSDimitry Andric std::optional<InlineCostFeatures> llvm::getInliningCostFeatures( 2968fe6060f1SDimitry Andric CallBase &Call, TargetTransformInfo &CalleeTTI, 2969fe6060f1SDimitry Andric function_ref<AssumptionCache &(Function &)> GetAssumptionCache, 2970fe6060f1SDimitry Andric function_ref<BlockFrequencyInfo &(Function &)> GetBFI, 2971fe6060f1SDimitry Andric ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { 2972fe6060f1SDimitry Andric InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, 2973fe6060f1SDimitry Andric ORE, *Call.getCalledFunction(), Call); 2974fe6060f1SDimitry Andric auto R = CFA.analyze(); 2975fe6060f1SDimitry Andric if (!R.isSuccess()) 2976bdd1243dSDimitry Andric return std::nullopt; 2977fe6060f1SDimitry Andric return CFA.features(); 2978fe6060f1SDimitry Andric } 2979fe6060f1SDimitry Andric 2980bdd1243dSDimitry Andric std::optional<InlineResult> llvm::getAttributeBasedInliningDecision( 29815ffd83dbSDimitry Andric CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, 29825ffd83dbSDimitry Andric function_ref<const TargetLibraryInfo &(Function &)> GetTLI) { 29830b57cec5SDimitry Andric 29840b57cec5SDimitry Andric // Cannot inline indirect calls. 29850b57cec5SDimitry Andric if (!Callee) 29865ffd83dbSDimitry Andric return InlineResult::failure("indirect call"); 29870b57cec5SDimitry Andric 2988e8d8bef9SDimitry Andric // When callee coroutine function is inlined into caller coroutine function 2989e8d8bef9SDimitry Andric // before coro-split pass, 2990e8d8bef9SDimitry Andric // coro-early pass can not handle this quiet well. 2991e8d8bef9SDimitry Andric // So we won't inline the coroutine function if it have not been unsplited 2992e8d8bef9SDimitry Andric if (Callee->isPresplitCoroutine()) 2993e8d8bef9SDimitry Andric return InlineResult::failure("unsplited coroutine call"); 2994e8d8bef9SDimitry Andric 29950b57cec5SDimitry Andric // Never inline calls with byval arguments that does not have the alloca 29960b57cec5SDimitry Andric // address space. Since byval arguments can be replaced with a copy to an 29970b57cec5SDimitry Andric // alloca, the inlined code would need to be adjusted to handle that the 29980b57cec5SDimitry Andric // argument is in the alloca address space (so it is a little bit complicated 29990b57cec5SDimitry Andric // to solve). 3000*0fca6ea1SDimitry Andric unsigned AllocaAS = Callee->getDataLayout().getAllocaAddrSpace(); 30010b57cec5SDimitry Andric for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) 30020b57cec5SDimitry Andric if (Call.isByValArgument(I)) { 30030b57cec5SDimitry Andric PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType()); 30040b57cec5SDimitry Andric if (PTy->getAddressSpace() != AllocaAS) 30055ffd83dbSDimitry Andric return InlineResult::failure("byval arguments without alloca" 30060b57cec5SDimitry Andric " address space"); 30070b57cec5SDimitry Andric } 30080b57cec5SDimitry Andric 30090b57cec5SDimitry Andric // Calls to functions with always-inline attributes should be inlined 30100b57cec5SDimitry Andric // whenever possible. 30110b57cec5SDimitry Andric if (Call.hasFnAttr(Attribute::AlwaysInline)) { 301281ad6265SDimitry Andric if (Call.getAttributes().hasFnAttr(Attribute::NoInline)) 301381ad6265SDimitry Andric return InlineResult::failure("noinline call site attribute"); 301481ad6265SDimitry Andric 30150b57cec5SDimitry Andric auto IsViable = isInlineViable(*Callee); 30165ffd83dbSDimitry Andric if (IsViable.isSuccess()) 30175ffd83dbSDimitry Andric return InlineResult::success(); 30185ffd83dbSDimitry Andric return InlineResult::failure(IsViable.getFailureReason()); 30190b57cec5SDimitry Andric } 30200b57cec5SDimitry Andric 30210b57cec5SDimitry Andric // Never inline functions with conflicting attributes (unless callee has 30220b57cec5SDimitry Andric // always-inline attribute). 30234542f901SDimitry Andric Function *Caller = Call.getCaller(); 30244542f901SDimitry Andric if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI, GetTLI)) 30255ffd83dbSDimitry Andric return InlineResult::failure("conflicting attributes"); 30260b57cec5SDimitry Andric 30270b57cec5SDimitry Andric // Don't inline this call if the caller has the optnone attribute. 30280b57cec5SDimitry Andric if (Caller->hasOptNone()) 30295ffd83dbSDimitry Andric return InlineResult::failure("optnone attribute"); 30300b57cec5SDimitry Andric 30310b57cec5SDimitry Andric // Don't inline a function that treats null pointer as valid into a caller 30320b57cec5SDimitry Andric // that does not have this attribute. 30330b57cec5SDimitry Andric if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined()) 30345ffd83dbSDimitry Andric return InlineResult::failure("nullptr definitions incompatible"); 30350b57cec5SDimitry Andric 30360b57cec5SDimitry Andric // Don't inline functions which can be interposed at link-time. 30370b57cec5SDimitry Andric if (Callee->isInterposable()) 30385ffd83dbSDimitry Andric return InlineResult::failure("interposable"); 30390b57cec5SDimitry Andric 30400b57cec5SDimitry Andric // Don't inline functions marked noinline. 30410b57cec5SDimitry Andric if (Callee->hasFnAttribute(Attribute::NoInline)) 30425ffd83dbSDimitry Andric return InlineResult::failure("noinline function attribute"); 30430b57cec5SDimitry Andric 30440b57cec5SDimitry Andric // Don't inline call sites marked noinline. 30450b57cec5SDimitry Andric if (Call.isNoInline()) 30465ffd83dbSDimitry Andric return InlineResult::failure("noinline call site attribute"); 30475ffd83dbSDimitry Andric 3048bdd1243dSDimitry Andric return std::nullopt; 30495ffd83dbSDimitry Andric } 30505ffd83dbSDimitry Andric 30515ffd83dbSDimitry Andric InlineCost llvm::getInlineCost( 30525ffd83dbSDimitry Andric CallBase &Call, Function *Callee, const InlineParams &Params, 30535ffd83dbSDimitry Andric TargetTransformInfo &CalleeTTI, 30545ffd83dbSDimitry Andric function_ref<AssumptionCache &(Function &)> GetAssumptionCache, 30555ffd83dbSDimitry Andric function_ref<const TargetLibraryInfo &(Function &)> GetTLI, 30565ffd83dbSDimitry Andric function_ref<BlockFrequencyInfo &(Function &)> GetBFI, 30575ffd83dbSDimitry Andric ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { 30585ffd83dbSDimitry Andric 30595ffd83dbSDimitry Andric auto UserDecision = 30605ffd83dbSDimitry Andric llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI); 30615ffd83dbSDimitry Andric 306281ad6265SDimitry Andric if (UserDecision) { 30635ffd83dbSDimitry Andric if (UserDecision->isSuccess()) 30645ffd83dbSDimitry Andric return llvm::InlineCost::getAlways("always inline attribute"); 30655ffd83dbSDimitry Andric return llvm::InlineCost::getNever(UserDecision->getFailureReason()); 30665ffd83dbSDimitry Andric } 30670b57cec5SDimitry Andric 30680b57cec5SDimitry Andric LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() 30695ffd83dbSDimitry Andric << "... (caller:" << Call.getCaller()->getName() 30705ffd83dbSDimitry Andric << ")\n"); 30710b57cec5SDimitry Andric 30725ffd83dbSDimitry Andric InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI, 30735ffd83dbSDimitry Andric GetAssumptionCache, GetBFI, PSI, ORE); 3074480093f4SDimitry Andric InlineResult ShouldInline = CA.analyze(); 30750b57cec5SDimitry Andric 30760b57cec5SDimitry Andric LLVM_DEBUG(CA.dump()); 30770b57cec5SDimitry Andric 3078fe6060f1SDimitry Andric // Always make cost benefit based decision explicit. 3079fe6060f1SDimitry Andric // We use always/never here since threshold is not meaningful, 3080fe6060f1SDimitry Andric // as it's not what drives cost-benefit analysis. 3081fe6060f1SDimitry Andric if (CA.wasDecidedByCostBenefit()) { 3082fe6060f1SDimitry Andric if (ShouldInline.isSuccess()) 3083fe6060f1SDimitry Andric return InlineCost::getAlways("benefit over cost", 3084fe6060f1SDimitry Andric CA.getCostBenefitPair()); 3085fe6060f1SDimitry Andric else 3086fe6060f1SDimitry Andric return InlineCost::getNever("cost over benefit", CA.getCostBenefitPair()); 3087fe6060f1SDimitry Andric } 3088fe6060f1SDimitry Andric 3089349cc55cSDimitry Andric if (CA.wasDecidedByCostThreshold()) 3090bdd1243dSDimitry Andric return InlineCost::get(CA.getCost(), CA.getThreshold(), 3091bdd1243dSDimitry Andric CA.getStaticBonusApplied()); 30920b57cec5SDimitry Andric 3093349cc55cSDimitry Andric // No details on how the decision was made, simply return always or never. 3094349cc55cSDimitry Andric return ShouldInline.isSuccess() 3095349cc55cSDimitry Andric ? InlineCost::getAlways("empty function") 3096349cc55cSDimitry Andric : InlineCost::getNever(ShouldInline.getFailureReason()); 30970b57cec5SDimitry Andric } 30980b57cec5SDimitry Andric 30990b57cec5SDimitry Andric InlineResult llvm::isInlineViable(Function &F) { 31000b57cec5SDimitry Andric bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice); 3101e8d8bef9SDimitry Andric for (BasicBlock &BB : F) { 31020b57cec5SDimitry Andric // Disallow inlining of functions which contain indirect branches. 3103e8d8bef9SDimitry Andric if (isa<IndirectBrInst>(BB.getTerminator())) 31045ffd83dbSDimitry Andric return InlineResult::failure("contains indirect branches"); 31050b57cec5SDimitry Andric 31060b57cec5SDimitry Andric // Disallow inlining of blockaddresses which are used by non-callbr 31070b57cec5SDimitry Andric // instructions. 3108e8d8bef9SDimitry Andric if (BB.hasAddressTaken()) 3109e8d8bef9SDimitry Andric for (User *U : BlockAddress::get(&BB)->users()) 31100b57cec5SDimitry Andric if (!isa<CallBrInst>(*U)) 31115ffd83dbSDimitry Andric return InlineResult::failure("blockaddress used outside of callbr"); 31120b57cec5SDimitry Andric 3113e8d8bef9SDimitry Andric for (auto &II : BB) { 31140b57cec5SDimitry Andric CallBase *Call = dyn_cast<CallBase>(&II); 31150b57cec5SDimitry Andric if (!Call) 31160b57cec5SDimitry Andric continue; 31170b57cec5SDimitry Andric 31180b57cec5SDimitry Andric // Disallow recursive calls. 3119e8d8bef9SDimitry Andric Function *Callee = Call->getCalledFunction(); 3120e8d8bef9SDimitry Andric if (&F == Callee) 31215ffd83dbSDimitry Andric return InlineResult::failure("recursive call"); 31220b57cec5SDimitry Andric 31230b57cec5SDimitry Andric // Disallow calls which expose returns-twice to a function not previously 31240b57cec5SDimitry Andric // attributed as such. 31250b57cec5SDimitry Andric if (!ReturnsTwice && isa<CallInst>(Call) && 31260b57cec5SDimitry Andric cast<CallInst>(Call)->canReturnTwice()) 31275ffd83dbSDimitry Andric return InlineResult::failure("exposes returns-twice attribute"); 31280b57cec5SDimitry Andric 3129e8d8bef9SDimitry Andric if (Callee) 3130e8d8bef9SDimitry Andric switch (Callee->getIntrinsicID()) { 31310b57cec5SDimitry Andric default: 31320b57cec5SDimitry Andric break; 3133480093f4SDimitry Andric case llvm::Intrinsic::icall_branch_funnel: 31340b57cec5SDimitry Andric // Disallow inlining of @llvm.icall.branch.funnel because current 31350b57cec5SDimitry Andric // backend can't separate call targets from call arguments. 31365ffd83dbSDimitry Andric return InlineResult::failure( 31375ffd83dbSDimitry Andric "disallowed inlining of @llvm.icall.branch.funnel"); 3138480093f4SDimitry Andric case llvm::Intrinsic::localescape: 31390b57cec5SDimitry Andric // Disallow inlining functions that call @llvm.localescape. Doing this 31400b57cec5SDimitry Andric // correctly would require major changes to the inliner. 31415ffd83dbSDimitry Andric return InlineResult::failure( 31425ffd83dbSDimitry Andric "disallowed inlining of @llvm.localescape"); 31430b57cec5SDimitry Andric case llvm::Intrinsic::vastart: 3144480093f4SDimitry Andric // Disallow inlining of functions that initialize VarArgs with 3145480093f4SDimitry Andric // va_start. 31465ffd83dbSDimitry Andric return InlineResult::failure( 31475ffd83dbSDimitry Andric "contains VarArgs initialized with va_start"); 31480b57cec5SDimitry Andric } 31490b57cec5SDimitry Andric } 31500b57cec5SDimitry Andric } 31510b57cec5SDimitry Andric 31525ffd83dbSDimitry Andric return InlineResult::success(); 31530b57cec5SDimitry Andric } 31540b57cec5SDimitry Andric 31550b57cec5SDimitry Andric // APIs to create InlineParams based on command line flags and/or other 31560b57cec5SDimitry Andric // parameters. 31570b57cec5SDimitry Andric 31580b57cec5SDimitry Andric InlineParams llvm::getInlineParams(int Threshold) { 31590b57cec5SDimitry Andric InlineParams Params; 31600b57cec5SDimitry Andric 31610b57cec5SDimitry Andric // This field is the threshold to use for a callee by default. This is 31620b57cec5SDimitry Andric // derived from one or more of: 31630b57cec5SDimitry Andric // * optimization or size-optimization levels, 31640b57cec5SDimitry Andric // * a value passed to createFunctionInliningPass function, or 31650b57cec5SDimitry Andric // * the -inline-threshold flag. 31660b57cec5SDimitry Andric // If the -inline-threshold flag is explicitly specified, that is used 31670b57cec5SDimitry Andric // irrespective of anything else. 31680b57cec5SDimitry Andric if (InlineThreshold.getNumOccurrences() > 0) 31690b57cec5SDimitry Andric Params.DefaultThreshold = InlineThreshold; 31700b57cec5SDimitry Andric else 31710b57cec5SDimitry Andric Params.DefaultThreshold = Threshold; 31720b57cec5SDimitry Andric 31730b57cec5SDimitry Andric // Set the HintThreshold knob from the -inlinehint-threshold. 31740b57cec5SDimitry Andric Params.HintThreshold = HintThreshold; 31750b57cec5SDimitry Andric 31760b57cec5SDimitry Andric // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold. 31770b57cec5SDimitry Andric Params.HotCallSiteThreshold = HotCallSiteThreshold; 31780b57cec5SDimitry Andric 31790b57cec5SDimitry Andric // If the -locally-hot-callsite-threshold is explicitly specified, use it to 31800b57cec5SDimitry Andric // populate LocallyHotCallSiteThreshold. Later, we populate 31810b57cec5SDimitry Andric // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if 31820b57cec5SDimitry Andric // we know that optimization level is O3 (in the getInlineParams variant that 31830b57cec5SDimitry Andric // takes the opt and size levels). 31840b57cec5SDimitry Andric // FIXME: Remove this check (and make the assignment unconditional) after 31850b57cec5SDimitry Andric // addressing size regression issues at O2. 31860b57cec5SDimitry Andric if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0) 31870b57cec5SDimitry Andric Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold; 31880b57cec5SDimitry Andric 3189480093f4SDimitry Andric // Set the ColdCallSiteThreshold knob from the 3190480093f4SDimitry Andric // -inline-cold-callsite-threshold. 31910b57cec5SDimitry Andric Params.ColdCallSiteThreshold = ColdCallSiteThreshold; 31920b57cec5SDimitry Andric 31930b57cec5SDimitry Andric // Set the OptMinSizeThreshold and OptSizeThreshold params only if the 31940b57cec5SDimitry Andric // -inlinehint-threshold commandline option is not explicitly given. If that 31950b57cec5SDimitry Andric // option is present, then its value applies even for callees with size and 31960b57cec5SDimitry Andric // minsize attributes. 31970b57cec5SDimitry Andric // If the -inline-threshold is not specified, set the ColdThreshold from the 31980b57cec5SDimitry Andric // -inlinecold-threshold even if it is not explicitly passed. If 31990b57cec5SDimitry Andric // -inline-threshold is specified, then -inlinecold-threshold needs to be 32000b57cec5SDimitry Andric // explicitly specified to set the ColdThreshold knob 32010b57cec5SDimitry Andric if (InlineThreshold.getNumOccurrences() == 0) { 32020b57cec5SDimitry Andric Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold; 32030b57cec5SDimitry Andric Params.OptSizeThreshold = InlineConstants::OptSizeThreshold; 32040b57cec5SDimitry Andric Params.ColdThreshold = ColdThreshold; 32050b57cec5SDimitry Andric } else if (ColdThreshold.getNumOccurrences() > 0) { 32060b57cec5SDimitry Andric Params.ColdThreshold = ColdThreshold; 32070b57cec5SDimitry Andric } 32080b57cec5SDimitry Andric return Params; 32090b57cec5SDimitry Andric } 32100b57cec5SDimitry Andric 32110b57cec5SDimitry Andric InlineParams llvm::getInlineParams() { 32125ffd83dbSDimitry Andric return getInlineParams(DefaultThreshold); 32130b57cec5SDimitry Andric } 32140b57cec5SDimitry Andric 32150b57cec5SDimitry Andric // Compute the default threshold for inlining based on the opt level and the 32160b57cec5SDimitry Andric // size opt level. 32170b57cec5SDimitry Andric static int computeThresholdFromOptLevels(unsigned OptLevel, 32180b57cec5SDimitry Andric unsigned SizeOptLevel) { 32190b57cec5SDimitry Andric if (OptLevel > 2) 32200b57cec5SDimitry Andric return InlineConstants::OptAggressiveThreshold; 32210b57cec5SDimitry Andric if (SizeOptLevel == 1) // -Os 32220b57cec5SDimitry Andric return InlineConstants::OptSizeThreshold; 32230b57cec5SDimitry Andric if (SizeOptLevel == 2) // -Oz 32240b57cec5SDimitry Andric return InlineConstants::OptMinSizeThreshold; 32255ffd83dbSDimitry Andric return DefaultThreshold; 32260b57cec5SDimitry Andric } 32270b57cec5SDimitry Andric 32280b57cec5SDimitry Andric InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) { 32290b57cec5SDimitry Andric auto Params = 32300b57cec5SDimitry Andric getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel)); 32310b57cec5SDimitry Andric // At O3, use the value of -locally-hot-callsite-threshold option to populate 32320b57cec5SDimitry Andric // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only 32330b57cec5SDimitry Andric // when it is specified explicitly. 32340b57cec5SDimitry Andric if (OptLevel > 2) 32350b57cec5SDimitry Andric Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold; 32360b57cec5SDimitry Andric return Params; 32370b57cec5SDimitry Andric } 32385ffd83dbSDimitry Andric 32395ffd83dbSDimitry Andric PreservedAnalyses 32405ffd83dbSDimitry Andric InlineCostAnnotationPrinterPass::run(Function &F, 32415ffd83dbSDimitry Andric FunctionAnalysisManager &FAM) { 32425ffd83dbSDimitry Andric PrintInstructionComments = true; 3243fe6060f1SDimitry Andric std::function<AssumptionCache &(Function &)> GetAssumptionCache = 3244fe6060f1SDimitry Andric [&](Function &F) -> AssumptionCache & { 32455ffd83dbSDimitry Andric return FAM.getResult<AssumptionAnalysis>(F); 32465ffd83dbSDimitry Andric }; 32475ffd83dbSDimitry Andric Module *M = F.getParent(); 32485ffd83dbSDimitry Andric ProfileSummaryInfo PSI(*M); 32495ffd83dbSDimitry Andric DataLayout DL(M); 32505ffd83dbSDimitry Andric TargetTransformInfo TTI(DL); 32515ffd83dbSDimitry Andric // FIXME: Redesign the usage of InlineParams to expand the scope of this pass. 32525ffd83dbSDimitry Andric // In the current implementation, the type of InlineParams doesn't matter as 32535ffd83dbSDimitry Andric // the pass serves only for verification of inliner's decisions. 32545ffd83dbSDimitry Andric // We can add a flag which determines InlineParams for this run. Right now, 32555ffd83dbSDimitry Andric // the default InlineParams are used. 32565ffd83dbSDimitry Andric const InlineParams Params = llvm::getInlineParams(); 32575ffd83dbSDimitry Andric for (BasicBlock &BB : F) { 32585ffd83dbSDimitry Andric for (Instruction &I : BB) { 32595ffd83dbSDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(&I)) { 32605ffd83dbSDimitry Andric Function *CalledFunction = CI->getCalledFunction(); 32615ffd83dbSDimitry Andric if (!CalledFunction || CalledFunction->isDeclaration()) 32625ffd83dbSDimitry Andric continue; 32635ffd83dbSDimitry Andric OptimizationRemarkEmitter ORE(CalledFunction); 32645ffd83dbSDimitry Andric InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params, TTI, 32655ffd83dbSDimitry Andric GetAssumptionCache, nullptr, &PSI, &ORE); 32665ffd83dbSDimitry Andric ICCA.analyze(); 32675ffd83dbSDimitry Andric OS << " Analyzing call of " << CalledFunction->getName() 32685ffd83dbSDimitry Andric << "... (caller:" << CI->getCaller()->getName() << ")\n"; 3269349cc55cSDimitry Andric ICCA.print(OS); 3270349cc55cSDimitry Andric OS << "\n"; 32715ffd83dbSDimitry Andric } 32725ffd83dbSDimitry Andric } 32735ffd83dbSDimitry Andric } 32745ffd83dbSDimitry Andric return PreservedAnalyses::all(); 32755ffd83dbSDimitry Andric } 3276