19907746fSSjoerd Meijer //===- FunctionSpecialization.cpp - Function Specialization ---------------===// 29907746fSSjoerd Meijer // 39907746fSSjoerd Meijer // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 49907746fSSjoerd Meijer // See https://llvm.org/LICENSE.txt for license information. 59907746fSSjoerd Meijer // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 69907746fSSjoerd Meijer // 79907746fSSjoerd Meijer //===----------------------------------------------------------------------===// 89907746fSSjoerd Meijer 98136a017SAlexandros Lamprineas #include "llvm/Transforms/IPO/FunctionSpecialization.h" 109907746fSSjoerd Meijer #include "llvm/ADT/Statistic.h" 119907746fSSjoerd Meijer #include "llvm/Analysis/CodeMetrics.h" 124d13896dSAlexandros Lamprineas #include "llvm/Analysis/ConstantFolding.h" 139907746fSSjoerd Meijer #include "llvm/Analysis/InlineCost.h" 144d13896dSAlexandros Lamprineas #include "llvm/Analysis/InstructionSimplify.h" 159907746fSSjoerd Meijer #include "llvm/Analysis/TargetTransformInfo.h" 16a494ae43Sserge-sans-paille #include "llvm/Analysis/ValueLattice.h" 17a494ae43Sserge-sans-paille #include "llvm/Analysis/ValueLatticeUtils.h" 18de24d084SMomchil Velikov #include "llvm/Analysis/ValueTracking.h" 19a494ae43Sserge-sans-paille #include "llvm/IR/IntrinsicInst.h" 209907746fSSjoerd Meijer #include "llvm/Transforms/Scalar/SCCP.h" 219907746fSSjoerd Meijer #include "llvm/Transforms/Utils/Cloning.h" 2259630917Sserge-sans-paille #include "llvm/Transforms/Utils/SCCPSolver.h" 239907746fSSjoerd Meijer #include "llvm/Transforms/Utils/SizeOpts.h" 2486906304SChuanqi Xu #include <cmath> 259907746fSSjoerd Meijer 269907746fSSjoerd Meijer using namespace llvm; 279907746fSSjoerd Meijer 289907746fSSjoerd Meijer #define DEBUG_TYPE "function-specialization" 299907746fSSjoerd Meijer 3067fde2b9SAlexandros Lamprineas STATISTIC(NumSpecsCreated, "Number of specializations created"); 319907746fSSjoerd Meijer 329627bcdeSAlexandros Lamprineas static cl::opt<bool> ForceSpecialization( 339627bcdeSAlexandros Lamprineas "force-specialization", cl::init(false), cl::Hidden, cl::desc( 349627bcdeSAlexandros Lamprineas "Force function specialization for every call site with a constant " 359627bcdeSAlexandros Lamprineas "argument")); 369907746fSSjoerd Meijer 379627bcdeSAlexandros Lamprineas static cl::opt<unsigned> MaxClones( 389627bcdeSAlexandros Lamprineas "funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc( 399627bcdeSAlexandros Lamprineas "The maximum number of clones allowed for a single function " 409627bcdeSAlexandros Lamprineas "specialization")); 419907746fSSjoerd Meijer 422fb51fbaSMats Petersson static cl::opt<unsigned> 432fb51fbaSMats Petersson MaxDiscoveryIterations("funcspec-max-discovery-iterations", cl::init(100), 442fb51fbaSMats Petersson cl::Hidden, 452fb51fbaSMats Petersson cl::desc("The maximum number of iterations allowed " 462fb51fbaSMats Petersson "when searching for transitive " 472fb51fbaSMats Petersson "phis")); 482fb51fbaSMats Petersson 49893d3a61SAlexandros Lamprineas static cl::opt<unsigned> MaxIncomingPhiValues( 502fb51fbaSMats Petersson "funcspec-max-incoming-phi-values", cl::init(8), cl::Hidden, 512fb51fbaSMats Petersson cl::desc("The maximum number of incoming values a PHI node can have to be " 52893d3a61SAlexandros Lamprineas "considered during the specialization bonus estimation")); 53893d3a61SAlexandros Lamprineas 54c2d19002SAlexandros Lamprineas static cl::opt<unsigned> MaxBlockPredecessors( 55c2d19002SAlexandros Lamprineas "funcspec-max-block-predecessors", cl::init(2), cl::Hidden, cl::desc( 56c2d19002SAlexandros Lamprineas "The maximum number of predecessors a basic block can have to be " 57c2d19002SAlexandros Lamprineas "considered during the estimation of dead code")); 58c2d19002SAlexandros Lamprineas 599627bcdeSAlexandros Lamprineas static cl::opt<unsigned> MinFunctionSize( 600d1a91e8SHari Limaye "funcspec-min-function-size", cl::init(500), cl::Hidden, 610d1a91e8SHari Limaye cl::desc("Don't specialize functions that have less than this number of " 629627bcdeSAlexandros Lamprineas "instructions")); 632556f581SChuanqi Xu 64386aa2abSAlexandros Lamprineas static cl::opt<unsigned> MaxCodeSizeGrowth( 65386aa2abSAlexandros Lamprineas "funcspec-max-codesize-growth", cl::init(3), cl::Hidden, cl::desc( 66386aa2abSAlexandros Lamprineas "Maximum codesize growth allowed per function")); 67386aa2abSAlexandros Lamprineas 68d1b376fdSAlexandros Lamprineas static cl::opt<unsigned> MinCodeSizeSavings( 6967efbd0bSRyan Mansfield "funcspec-min-codesize-savings", cl::init(20), cl::Hidden, 7067efbd0bSRyan Mansfield cl::desc("Reject specializations whose codesize savings are less than this " 71d1b376fdSAlexandros Lamprineas "much percent of the original function size")); 72d1b376fdSAlexandros Lamprineas 73d1b376fdSAlexandros Lamprineas static cl::opt<unsigned> MinLatencySavings( 742fb51fbaSMats Petersson "funcspec-min-latency-savings", cl::init(40), cl::Hidden, 752fb51fbaSMats Petersson cl::desc("Reject specializations whose latency savings are less than this " 76d1b376fdSAlexandros Lamprineas "much percent of the original function size")); 77d1b376fdSAlexandros Lamprineas 78d1b376fdSAlexandros Lamprineas static cl::opt<unsigned> MinInliningBonus( 7967efbd0bSRyan Mansfield "funcspec-min-inlining-bonus", cl::init(300), cl::Hidden, 8067efbd0bSRyan Mansfield cl::desc("Reject specializations whose inlining bonus is less than this " 81d1b376fdSAlexandros Lamprineas "much percent of the original function size")); 82d1b376fdSAlexandros Lamprineas 839627bcdeSAlexandros Lamprineas static cl::opt<bool> SpecializeOnAddress( 849627bcdeSAlexandros Lamprineas "funcspec-on-address", cl::init(false), cl::Hidden, cl::desc( 859627bcdeSAlexandros Lamprineas "Enable function specialization on the address of global values")); 8697cc678cSSjoerd Meijer 879627bcdeSAlexandros Lamprineas static cl::opt<bool> SpecializeLiteralConstant( 8806664fdcSHari Limaye "funcspec-for-literal-constant", cl::init(true), cl::Hidden, 8906664fdcSHari Limaye cl::desc( 909627bcdeSAlexandros Lamprineas "Enable specialization of functions that take a literal constant as an " 919627bcdeSAlexandros Lamprineas "argument")); 92801c2b9bSChuanqi Xu 9388e9b373SHari Limaye bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB, 9488e9b373SHari Limaye BasicBlock *Succ) const { 95c2d19002SAlexandros Lamprineas unsigned I = 0; 9688e9b373SHari Limaye return all_of(predecessors(Succ), [&I, BB, Succ, this](BasicBlock *Pred) { 97c2d19002SAlexandros Lamprineas return I++ < MaxBlockPredecessors && 9888e9b373SHari Limaye (Pred == BB || Pred == Succ || !isBlockExecutable(Pred)); 99c2d19002SAlexandros Lamprineas }); 100c2d19002SAlexandros Lamprineas } 101c2d19002SAlexandros Lamprineas 1025bfefff1SAlexandros Lamprineas // Estimates the codesize savings due to dead code after constant propagation. 1035bfefff1SAlexandros Lamprineas // \p WorkList represents the basic blocks of a specialization which will 1045bfefff1SAlexandros Lamprineas // eventually become dead once we replace instructions that are known to be 1055bfefff1SAlexandros Lamprineas // constants. The successors of such blocks are added to the list as long as 1065bfefff1SAlexandros Lamprineas // the \p Solver found they were executable prior to specialization, and only 107c2d19002SAlexandros Lamprineas // if all their predecessors are dead. 108c2d19002SAlexandros Lamprineas Cost InstCostVisitor::estimateBasicBlocks( 109c2d19002SAlexandros Lamprineas SmallVectorImpl<BasicBlock *> &WorkList) { 1105bfefff1SAlexandros Lamprineas Cost CodeSize = 0; 111c6931c25SHari Limaye // Accumulate the codesize savings of each basic block. 1124d13896dSAlexandros Lamprineas while (!WorkList.empty()) { 1134d13896dSAlexandros Lamprineas BasicBlock *BB = WorkList.pop_back_val(); 1144d13896dSAlexandros Lamprineas 115893d3a61SAlexandros Lamprineas // These blocks are considered dead as far as the InstCostVisitor 116893d3a61SAlexandros Lamprineas // is concerned. They haven't been proven dead yet by the Solver, 117893d3a61SAlexandros Lamprineas // but may become if we propagate the specialization arguments. 11888e9b373SHari Limaye assert(Solver.isBlockExecutable(BB) && "BB already found dead by IPSCCP!"); 119893d3a61SAlexandros Lamprineas if (!DeadBlocks.insert(BB).second) 120893d3a61SAlexandros Lamprineas continue; 121893d3a61SAlexandros Lamprineas 1224d13896dSAlexandros Lamprineas for (Instruction &I : *BB) { 1234d13896dSAlexandros Lamprineas // If it's a known constant we have already accounted for it. 1244d13896dSAlexandros Lamprineas if (KnownConstants.contains(&I)) 1254d13896dSAlexandros Lamprineas continue; 1264d13896dSAlexandros Lamprineas 1275bfefff1SAlexandros Lamprineas Cost C = TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize); 1284d13896dSAlexandros Lamprineas 1295bfefff1SAlexandros Lamprineas LLVM_DEBUG(dbgs() << "FnSpecialization: CodeSize " << C 1305bfefff1SAlexandros Lamprineas << " for user " << I << "\n"); 1315bfefff1SAlexandros Lamprineas CodeSize += C; 1324d13896dSAlexandros Lamprineas } 1334d13896dSAlexandros Lamprineas 1344d13896dSAlexandros Lamprineas // Keep adding dead successors to the list as long as they are 135c2d19002SAlexandros Lamprineas // executable and only reachable from dead blocks. 1364d13896dSAlexandros Lamprineas for (BasicBlock *SuccBB : successors(BB)) 13788e9b373SHari Limaye if (isBlockExecutable(SuccBB) && canEliminateSuccessor(BB, SuccBB)) 1384d13896dSAlexandros Lamprineas WorkList.push_back(SuccBB); 1394d13896dSAlexandros Lamprineas } 1405bfefff1SAlexandros Lamprineas return CodeSize; 1414d13896dSAlexandros Lamprineas } 1424d13896dSAlexandros Lamprineas 14388e9b373SHari Limaye Constant *InstCostVisitor::findConstantFor(Value *V) const { 144bb6d60bfSAlexandros Lamprineas if (auto *C = dyn_cast<Constant>(V)) 145bb6d60bfSAlexandros Lamprineas return C; 14688e9b373SHari Limaye if (auto *C = Solver.getConstantOrNull(V)) 14788e9b373SHari Limaye return C; 1483c03a151SKazu Hirata return KnownConstants.lookup(V); 1494d13896dSAlexandros Lamprineas } 1504d13896dSAlexandros Lamprineas 151c6931c25SHari Limaye Cost InstCostVisitor::getCodeSizeSavingsFromPendingPHIs() { 152c6931c25SHari Limaye Cost CodeSize; 153893d3a61SAlexandros Lamprineas while (!PendingPHIs.empty()) { 154893d3a61SAlexandros Lamprineas Instruction *Phi = PendingPHIs.pop_back_val(); 155893d3a61SAlexandros Lamprineas // The pending PHIs could have been proven dead by now. 156893d3a61SAlexandros Lamprineas if (isBlockExecutable(Phi->getParent())) 157c6931c25SHari Limaye CodeSize += getCodeSizeSavingsForUser(Phi); 158893d3a61SAlexandros Lamprineas } 159c6931c25SHari Limaye return CodeSize; 160893d3a61SAlexandros Lamprineas } 161893d3a61SAlexandros Lamprineas 162c6931c25SHari Limaye /// Compute the codesize savings for replacing argument \p A with constant \p C. 163c6931c25SHari Limaye Cost InstCostVisitor::getCodeSizeSavingsForArg(Argument *A, Constant *C) { 164d1b376fdSAlexandros Lamprineas LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " 165d1b376fdSAlexandros Lamprineas << C->getNameOrAsOperand() << "\n"); 166c6931c25SHari Limaye Cost CodeSize; 167d1b376fdSAlexandros Lamprineas for (auto *U : A->users()) 168d1b376fdSAlexandros Lamprineas if (auto *UI = dyn_cast<Instruction>(U)) 169d1b376fdSAlexandros Lamprineas if (isBlockExecutable(UI->getParent())) 170c6931c25SHari Limaye CodeSize += getCodeSizeSavingsForUser(UI, A, C); 171d1b376fdSAlexandros Lamprineas 172d1b376fdSAlexandros Lamprineas LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated bonus {CodeSize = " 173c6931c25SHari Limaye << CodeSize << "} for argument " << *A << "\n"); 174c6931c25SHari Limaye return CodeSize; 175d1b376fdSAlexandros Lamprineas } 176d1b376fdSAlexandros Lamprineas 177c6931c25SHari Limaye /// Compute the latency savings from replacing all arguments with constants for 178c6931c25SHari Limaye /// a specialization candidate. As this function computes the latency savings 179c6931c25SHari Limaye /// for all Instructions in KnownConstants at once, it should be called only 180c6931c25SHari Limaye /// after every instruction has been visited, i.e. after: 181c6931c25SHari Limaye /// 182c6931c25SHari Limaye /// * getCodeSizeSavingsForArg has been run for every constant argument of a 183c6931c25SHari Limaye /// specialization candidate 184c6931c25SHari Limaye /// 185c6931c25SHari Limaye /// * getCodeSizeSavingsFromPendingPHIs has been run 186c6931c25SHari Limaye /// 187c6931c25SHari Limaye /// to ensure that the latency savings are calculated for all Instructions we 188c6931c25SHari Limaye /// have visited and found to be constant. 189c6931c25SHari Limaye Cost InstCostVisitor::getLatencySavingsForKnownConstants() { 190c6931c25SHari Limaye auto &BFI = GetBFI(*F); 191c6931c25SHari Limaye Cost TotalLatency = 0; 192c6931c25SHari Limaye 193c6931c25SHari Limaye for (auto Pair : KnownConstants) { 194c6931c25SHari Limaye Instruction *I = dyn_cast<Instruction>(Pair.first); 195c6931c25SHari Limaye if (!I) 196c6931c25SHari Limaye continue; 197c6931c25SHari Limaye 198c6931c25SHari Limaye uint64_t Weight = BFI.getBlockFreq(I->getParent()).getFrequency() / 199c6931c25SHari Limaye BFI.getEntryFreq().getFrequency(); 200c6931c25SHari Limaye 201c6931c25SHari Limaye Cost Latency = 202c6931c25SHari Limaye Weight * TTI.getInstructionCost(I, TargetTransformInfo::TCK_Latency); 203c6931c25SHari Limaye 204c6931c25SHari Limaye LLVM_DEBUG(dbgs() << "FnSpecialization: {Latency = " << Latency 205c6931c25SHari Limaye << "} for instruction " << *I << "\n"); 206c6931c25SHari Limaye 207c6931c25SHari Limaye TotalLatency += Latency; 208c6931c25SHari Limaye } 209c6931c25SHari Limaye 210c6931c25SHari Limaye return TotalLatency; 211c6931c25SHari Limaye } 212c6931c25SHari Limaye 213c6931c25SHari Limaye Cost InstCostVisitor::getCodeSizeSavingsForUser(Instruction *User, Value *Use, 214c6931c25SHari Limaye Constant *C) { 215893d3a61SAlexandros Lamprineas // We have already propagated a constant for this user. 216893d3a61SAlexandros Lamprineas if (KnownConstants.contains(User)) 217c6931c25SHari Limaye return 0; 218893d3a61SAlexandros Lamprineas 2194d13896dSAlexandros Lamprineas // Cache the iterator before visiting. 220893d3a61SAlexandros Lamprineas LastVisited = Use ? KnownConstants.insert({Use, C}).first 221893d3a61SAlexandros Lamprineas : KnownConstants.end(); 2224d13896dSAlexandros Lamprineas 2235bfefff1SAlexandros Lamprineas Cost CodeSize = 0; 2245bfefff1SAlexandros Lamprineas if (auto *I = dyn_cast<SwitchInst>(User)) { 2255bfefff1SAlexandros Lamprineas CodeSize = estimateSwitchInst(*I); 2265bfefff1SAlexandros Lamprineas } else if (auto *I = dyn_cast<BranchInst>(User)) { 2275bfefff1SAlexandros Lamprineas CodeSize = estimateBranchInst(*I); 2285bfefff1SAlexandros Lamprineas } else { 2294d13896dSAlexandros Lamprineas C = visit(*User); 2304d13896dSAlexandros Lamprineas if (!C) 231c6931c25SHari Limaye return 0; 2325bfefff1SAlexandros Lamprineas } 2335bfefff1SAlexandros Lamprineas 234c2d19002SAlexandros Lamprineas // Even though it doesn't make sense to bind switch and branch instructions 235c2d19002SAlexandros Lamprineas // with a constant, unlike any other instruction type, it prevents estimating 236c2d19002SAlexandros Lamprineas // their bonus multiple times. 237c2d19002SAlexandros Lamprineas KnownConstants.insert({User, C}); 238c2d19002SAlexandros Lamprineas 2395bfefff1SAlexandros Lamprineas CodeSize += TTI.getInstructionCost(User, TargetTransformInfo::TCK_CodeSize); 2404d13896dSAlexandros Lamprineas 2415bfefff1SAlexandros Lamprineas LLVM_DEBUG(dbgs() << "FnSpecialization: {CodeSize = " << CodeSize 242c6931c25SHari Limaye << "} for user " << *User << "\n"); 2434d13896dSAlexandros Lamprineas 2444d13896dSAlexandros Lamprineas for (auto *U : User->users()) 2454d13896dSAlexandros Lamprineas if (auto *UI = dyn_cast<Instruction>(U)) 246893d3a61SAlexandros Lamprineas if (UI != User && isBlockExecutable(UI->getParent())) 247c6931c25SHari Limaye CodeSize += getCodeSizeSavingsForUser(UI, User, C); 2484d13896dSAlexandros Lamprineas 249c6931c25SHari Limaye return CodeSize; 2504d13896dSAlexandros Lamprineas } 2514d13896dSAlexandros Lamprineas 2524d13896dSAlexandros Lamprineas Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) { 253893d3a61SAlexandros Lamprineas assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); 254893d3a61SAlexandros Lamprineas 2554d13896dSAlexandros Lamprineas if (I.getCondition() != LastVisited->first) 2564d13896dSAlexandros Lamprineas return 0; 2574d13896dSAlexandros Lamprineas 2582fc0d17eSVincent Lee auto *C = dyn_cast<ConstantInt>(LastVisited->second); 2592fc0d17eSVincent Lee if (!C) 2602fc0d17eSVincent Lee return 0; 2612fc0d17eSVincent Lee 2624d13896dSAlexandros Lamprineas BasicBlock *Succ = I.findCaseValue(C)->getCaseSuccessor(); 2634d13896dSAlexandros Lamprineas // Initialize the worklist with the dead basic blocks. These are the 2644d13896dSAlexandros Lamprineas // destination labels which are different from the one corresponding 2654d13896dSAlexandros Lamprineas // to \p C. They should be executable and have a unique predecessor. 2664d13896dSAlexandros Lamprineas SmallVector<BasicBlock *> WorkList; 2674d13896dSAlexandros Lamprineas for (const auto &Case : I.cases()) { 2684d13896dSAlexandros Lamprineas BasicBlock *BB = Case.getCaseSuccessor(); 269c2d19002SAlexandros Lamprineas if (BB != Succ && isBlockExecutable(BB) && 27088e9b373SHari Limaye canEliminateSuccessor(I.getParent(), BB)) 2714d13896dSAlexandros Lamprineas WorkList.push_back(BB); 2724d13896dSAlexandros Lamprineas } 2734d13896dSAlexandros Lamprineas 274c2d19002SAlexandros Lamprineas return estimateBasicBlocks(WorkList); 2754d13896dSAlexandros Lamprineas } 2764d13896dSAlexandros Lamprineas 2774d13896dSAlexandros Lamprineas Cost InstCostVisitor::estimateBranchInst(BranchInst &I) { 278893d3a61SAlexandros Lamprineas assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); 279893d3a61SAlexandros Lamprineas 2804d13896dSAlexandros Lamprineas if (I.getCondition() != LastVisited->first) 2814d13896dSAlexandros Lamprineas return 0; 2824d13896dSAlexandros Lamprineas 2834d13896dSAlexandros Lamprineas BasicBlock *Succ = I.getSuccessor(LastVisited->second->isOneValue()); 2844d13896dSAlexandros Lamprineas // Initialize the worklist with the dead successor as long as 2854d13896dSAlexandros Lamprineas // it is executable and has a unique predecessor. 2864d13896dSAlexandros Lamprineas SmallVector<BasicBlock *> WorkList; 28788e9b373SHari Limaye if (isBlockExecutable(Succ) && canEliminateSuccessor(I.getParent(), Succ)) 2884d13896dSAlexandros Lamprineas WorkList.push_back(Succ); 2894d13896dSAlexandros Lamprineas 290c2d19002SAlexandros Lamprineas return estimateBasicBlocks(WorkList); 291893d3a61SAlexandros Lamprineas } 292893d3a61SAlexandros Lamprineas 2932fb51fbaSMats Petersson bool InstCostVisitor::discoverTransitivelyIncomingValues( 2942fb51fbaSMats Petersson Constant *Const, PHINode *Root, DenseSet<PHINode *> &TransitivePHIs) { 2952fb51fbaSMats Petersson 2962fb51fbaSMats Petersson SmallVector<PHINode *, 64> WorkList; 2972fb51fbaSMats Petersson WorkList.push_back(Root); 2982fb51fbaSMats Petersson unsigned Iter = 0; 2992fb51fbaSMats Petersson 3002fb51fbaSMats Petersson while (!WorkList.empty()) { 3012fb51fbaSMats Petersson PHINode *PN = WorkList.pop_back_val(); 3022fb51fbaSMats Petersson 3032fb51fbaSMats Petersson if (++Iter > MaxDiscoveryIterations || 3042fb51fbaSMats Petersson PN->getNumIncomingValues() > MaxIncomingPhiValues) 3052fb51fbaSMats Petersson return false; 3062fb51fbaSMats Petersson 3072fb51fbaSMats Petersson if (!TransitivePHIs.insert(PN).second) 3082fb51fbaSMats Petersson continue; 3092fb51fbaSMats Petersson 3102fb51fbaSMats Petersson for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { 3112fb51fbaSMats Petersson Value *V = PN->getIncomingValue(I); 3122fb51fbaSMats Petersson 3132fb51fbaSMats Petersson // Disregard self-references and dead incoming values. 3142fb51fbaSMats Petersson if (auto *Inst = dyn_cast<Instruction>(V)) 31588e9b373SHari Limaye if (Inst == PN || !isBlockExecutable(PN->getIncomingBlock(I))) 3162fb51fbaSMats Petersson continue; 3172fb51fbaSMats Petersson 31888e9b373SHari Limaye if (Constant *C = findConstantFor(V)) { 3192fb51fbaSMats Petersson // Not all incoming values are the same constant. Bail immediately. 3202fb51fbaSMats Petersson if (C != Const) 3212fb51fbaSMats Petersson return false; 3222fb51fbaSMats Petersson continue; 3232fb51fbaSMats Petersson } 3242fb51fbaSMats Petersson 3252fb51fbaSMats Petersson if (auto *Phi = dyn_cast<PHINode>(V)) { 3262fb51fbaSMats Petersson WorkList.push_back(Phi); 3272fb51fbaSMats Petersson continue; 3282fb51fbaSMats Petersson } 3292fb51fbaSMats Petersson 3302fb51fbaSMats Petersson // We can't reason about anything else. 3312fb51fbaSMats Petersson return false; 3322fb51fbaSMats Petersson } 3332fb51fbaSMats Petersson } 3342fb51fbaSMats Petersson return true; 3352fb51fbaSMats Petersson } 3362fb51fbaSMats Petersson 337893d3a61SAlexandros Lamprineas Constant *InstCostVisitor::visitPHINode(PHINode &I) { 338893d3a61SAlexandros Lamprineas if (I.getNumIncomingValues() > MaxIncomingPhiValues) 339893d3a61SAlexandros Lamprineas return nullptr; 340893d3a61SAlexandros Lamprineas 341893d3a61SAlexandros Lamprineas bool Inserted = VisitedPHIs.insert(&I).second; 342893d3a61SAlexandros Lamprineas Constant *Const = nullptr; 3432fb51fbaSMats Petersson bool HaveSeenIncomingPHI = false; 344893d3a61SAlexandros Lamprineas 345893d3a61SAlexandros Lamprineas for (unsigned Idx = 0, E = I.getNumIncomingValues(); Idx != E; ++Idx) { 346893d3a61SAlexandros Lamprineas Value *V = I.getIncomingValue(Idx); 3472fb51fbaSMats Petersson 3482fb51fbaSMats Petersson // Disregard self-references and dead incoming values. 349893d3a61SAlexandros Lamprineas if (auto *Inst = dyn_cast<Instruction>(V)) 35088e9b373SHari Limaye if (Inst == &I || !isBlockExecutable(I.getIncomingBlock(Idx))) 351893d3a61SAlexandros Lamprineas continue; 3522fb51fbaSMats Petersson 35388e9b373SHari Limaye if (Constant *C = findConstantFor(V)) { 3542fb51fbaSMats Petersson if (!Const) 3552fb51fbaSMats Petersson Const = C; 3562fb51fbaSMats Petersson // Not all incoming values are the same constant. Bail immediately. 3572fb51fbaSMats Petersson if (C != Const) 3582fb51fbaSMats Petersson return nullptr; 3592fb51fbaSMats Petersson continue; 3602fb51fbaSMats Petersson } 3612fb51fbaSMats Petersson 3622fb51fbaSMats Petersson if (Inserted) { 3632fb51fbaSMats Petersson // First time we are seeing this phi. We will retry later, after 3642fb51fbaSMats Petersson // all the constant arguments have been propagated. Bail for now. 365893d3a61SAlexandros Lamprineas PendingPHIs.push_back(&I); 366893d3a61SAlexandros Lamprineas return nullptr; 367893d3a61SAlexandros Lamprineas } 3682fb51fbaSMats Petersson 3692fb51fbaSMats Petersson if (isa<PHINode>(V)) { 3702fb51fbaSMats Petersson // Perhaps it is a Transitive Phi. We will confirm later. 3712fb51fbaSMats Petersson HaveSeenIncomingPHI = true; 3722fb51fbaSMats Petersson continue; 3732fb51fbaSMats Petersson } 3742fb51fbaSMats Petersson 3752fb51fbaSMats Petersson // We can't reason about anything else. 376893d3a61SAlexandros Lamprineas return nullptr; 377893d3a61SAlexandros Lamprineas } 3782fb51fbaSMats Petersson 3792fb51fbaSMats Petersson if (!Const) 3802fb51fbaSMats Petersson return nullptr; 3812fb51fbaSMats Petersson 3822fb51fbaSMats Petersson if (!HaveSeenIncomingPHI) 3832fb51fbaSMats Petersson return Const; 3842fb51fbaSMats Petersson 3852fb51fbaSMats Petersson DenseSet<PHINode *> TransitivePHIs; 3862fb51fbaSMats Petersson if (!discoverTransitivelyIncomingValues(Const, &I, TransitivePHIs)) 3872fb51fbaSMats Petersson return nullptr; 3882fb51fbaSMats Petersson 389893d3a61SAlexandros Lamprineas return Const; 3904d13896dSAlexandros Lamprineas } 3914d13896dSAlexandros Lamprineas 3925400257dSAlexandros Lamprineas Constant *InstCostVisitor::visitFreezeInst(FreezeInst &I) { 393893d3a61SAlexandros Lamprineas assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); 394893d3a61SAlexandros Lamprineas 3955400257dSAlexandros Lamprineas if (isGuaranteedNotToBeUndefOrPoison(LastVisited->second)) 3965400257dSAlexandros Lamprineas return LastVisited->second; 3975400257dSAlexandros Lamprineas return nullptr; 3985400257dSAlexandros Lamprineas } 3995400257dSAlexandros Lamprineas 4005400257dSAlexandros Lamprineas Constant *InstCostVisitor::visitCallBase(CallBase &I) { 401daa9af17SHari Limaye assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); 402daa9af17SHari Limaye 403daa9af17SHari Limaye // Look through calls to ssa_copy intrinsics. 404daa9af17SHari Limaye if (auto *II = dyn_cast<IntrinsicInst>(&I); 405daa9af17SHari Limaye II && II->getIntrinsicID() == Intrinsic::ssa_copy) { 406daa9af17SHari Limaye return LastVisited->second; 407daa9af17SHari Limaye } 408daa9af17SHari Limaye 4095400257dSAlexandros Lamprineas Function *F = I.getCalledFunction(); 4105400257dSAlexandros Lamprineas if (!F || !canConstantFoldCallTo(&I, F)) 4115400257dSAlexandros Lamprineas return nullptr; 4125400257dSAlexandros Lamprineas 4135400257dSAlexandros Lamprineas SmallVector<Constant *, 8> Operands; 4145400257dSAlexandros Lamprineas Operands.reserve(I.getNumOperands()); 4155400257dSAlexandros Lamprineas 4165400257dSAlexandros Lamprineas for (unsigned Idx = 0, E = I.getNumOperands() - 1; Idx != E; ++Idx) { 4175400257dSAlexandros Lamprineas Value *V = I.getOperand(Idx); 418*52bffdf9SDavid Green if (isa<MetadataAsValue>(V)) 419*52bffdf9SDavid Green return nullptr; 42088e9b373SHari Limaye Constant *C = findConstantFor(V); 4215400257dSAlexandros Lamprineas if (!C) 4225400257dSAlexandros Lamprineas return nullptr; 4235400257dSAlexandros Lamprineas Operands.push_back(C); 4245400257dSAlexandros Lamprineas } 4255400257dSAlexandros Lamprineas 4265400257dSAlexandros Lamprineas auto Ops = ArrayRef(Operands.begin(), Operands.end()); 4275400257dSAlexandros Lamprineas return ConstantFoldCall(&I, F, Ops); 4285400257dSAlexandros Lamprineas } 4295400257dSAlexandros Lamprineas 4304d13896dSAlexandros Lamprineas Constant *InstCostVisitor::visitLoadInst(LoadInst &I) { 431893d3a61SAlexandros Lamprineas assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); 432893d3a61SAlexandros Lamprineas 4334d13896dSAlexandros Lamprineas if (isa<ConstantPointerNull>(LastVisited->second)) 4344d13896dSAlexandros Lamprineas return nullptr; 4354d13896dSAlexandros Lamprineas return ConstantFoldLoadFromConstPtr(LastVisited->second, I.getType(), DL); 4364d13896dSAlexandros Lamprineas } 4374d13896dSAlexandros Lamprineas 4384d13896dSAlexandros Lamprineas Constant *InstCostVisitor::visitGetElementPtrInst(GetElementPtrInst &I) { 4391d0476cbSAlexandros Lamprineas SmallVector<Constant *, 8> Operands; 4404d13896dSAlexandros Lamprineas Operands.reserve(I.getNumOperands()); 4414d13896dSAlexandros Lamprineas 4424d13896dSAlexandros Lamprineas for (unsigned Idx = 0, E = I.getNumOperands(); Idx != E; ++Idx) { 4434d13896dSAlexandros Lamprineas Value *V = I.getOperand(Idx); 44488e9b373SHari Limaye Constant *C = findConstantFor(V); 4454d13896dSAlexandros Lamprineas if (!C) 4464d13896dSAlexandros Lamprineas return nullptr; 4474d13896dSAlexandros Lamprineas Operands.push_back(C); 4484d13896dSAlexandros Lamprineas } 4494d13896dSAlexandros Lamprineas 4501d0476cbSAlexandros Lamprineas auto Ops = ArrayRef(Operands.begin(), Operands.end()); 4511d0476cbSAlexandros Lamprineas return ConstantFoldInstOperands(&I, Ops, DL); 4524d13896dSAlexandros Lamprineas } 4534d13896dSAlexandros Lamprineas 4544d13896dSAlexandros Lamprineas Constant *InstCostVisitor::visitSelectInst(SelectInst &I) { 455893d3a61SAlexandros Lamprineas assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); 456893d3a61SAlexandros Lamprineas 4576472cb1eSAlexandros Lamprineas if (I.getCondition() == LastVisited->first) { 4584d13896dSAlexandros Lamprineas Value *V = LastVisited->second->isZeroValue() ? I.getFalseValue() 4594d13896dSAlexandros Lamprineas : I.getTrueValue(); 46088e9b373SHari Limaye return findConstantFor(V); 4616472cb1eSAlexandros Lamprineas } 46288e9b373SHari Limaye if (Constant *Condition = findConstantFor(I.getCondition())) 4636472cb1eSAlexandros Lamprineas if ((I.getTrueValue() == LastVisited->first && Condition->isOneValue()) || 4646472cb1eSAlexandros Lamprineas (I.getFalseValue() == LastVisited->first && Condition->isZeroValue())) 4656472cb1eSAlexandros Lamprineas return LastVisited->second; 4666472cb1eSAlexandros Lamprineas return nullptr; 4674d13896dSAlexandros Lamprineas } 4684d13896dSAlexandros Lamprineas 4694d13896dSAlexandros Lamprineas Constant *InstCostVisitor::visitCastInst(CastInst &I) { 4704d13896dSAlexandros Lamprineas return ConstantFoldCastOperand(I.getOpcode(), LastVisited->second, 4714d13896dSAlexandros Lamprineas I.getType(), DL); 4724d13896dSAlexandros Lamprineas } 4734d13896dSAlexandros Lamprineas 4744d13896dSAlexandros Lamprineas Constant *InstCostVisitor::visitCmpInst(CmpInst &I) { 475893d3a61SAlexandros Lamprineas assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); 476893d3a61SAlexandros Lamprineas 4774d13896dSAlexandros Lamprineas Constant *Const = LastVisited->second; 4785ed3f463SHari Limaye bool ConstOnRHS = I.getOperand(1) == LastVisited->first; 4795ed3f463SHari Limaye Value *V = ConstOnRHS ? I.getOperand(0) : I.getOperand(1); 48088e9b373SHari Limaye Constant *Other = findConstantFor(V); 4815ed3f463SHari Limaye 4825ed3f463SHari Limaye if (Other) { 4835ed3f463SHari Limaye if (ConstOnRHS) 4845ed3f463SHari Limaye std::swap(Const, Other); 4855ed3f463SHari Limaye return ConstantFoldCompareInstOperands(I.getPredicate(), Const, Other, DL); 4865ed3f463SHari Limaye } 4875ed3f463SHari Limaye 4885ed3f463SHari Limaye // If we haven't found Other to be a specific constant value, we may still be 4895ed3f463SHari Limaye // able to constant fold using information from the lattice value. 4905ed3f463SHari Limaye const ValueLatticeElement &ConstLV = ValueLatticeElement::get(Const); 4915ed3f463SHari Limaye const ValueLatticeElement &OtherLV = Solver.getLatticeValueFor(V); 4925ed3f463SHari Limaye auto &V1State = ConstOnRHS ? OtherLV : ConstLV; 4935ed3f463SHari Limaye auto &V2State = ConstOnRHS ? ConstLV : OtherLV; 4945ed3f463SHari Limaye return V1State.getCompare(I.getPredicate(), I.getType(), V2State, DL); 4954d13896dSAlexandros Lamprineas } 4964d13896dSAlexandros Lamprineas 4974d13896dSAlexandros Lamprineas Constant *InstCostVisitor::visitUnaryOperator(UnaryOperator &I) { 498893d3a61SAlexandros Lamprineas assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); 499893d3a61SAlexandros Lamprineas 5004d13896dSAlexandros Lamprineas return ConstantFoldUnaryOpOperand(I.getOpcode(), LastVisited->second, DL); 5014d13896dSAlexandros Lamprineas } 5024d13896dSAlexandros Lamprineas 5034d13896dSAlexandros Lamprineas Constant *InstCostVisitor::visitBinaryOperator(BinaryOperator &I) { 504893d3a61SAlexandros Lamprineas assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); 505893d3a61SAlexandros Lamprineas 5065f30b1aaSHari Limaye bool ConstOnRHS = I.getOperand(1) == LastVisited->first; 5075f30b1aaSHari Limaye Value *V = ConstOnRHS ? I.getOperand(0) : I.getOperand(1); 50888e9b373SHari Limaye Constant *Other = findConstantFor(V); 5095f30b1aaSHari Limaye Value *OtherVal = Other ? Other : V; 5105f30b1aaSHari Limaye Value *ConstVal = LastVisited->second; 5114d13896dSAlexandros Lamprineas 5125f30b1aaSHari Limaye if (ConstOnRHS) 5135f30b1aaSHari Limaye std::swap(ConstVal, OtherVal); 5145f30b1aaSHari Limaye 5155f30b1aaSHari Limaye return dyn_cast_or_null<Constant>( 5165f30b1aaSHari Limaye simplifyBinOp(I.getOpcode(), ConstVal, OtherVal, SimplifyQuery(DL))); 5174d13896dSAlexandros Lamprineas } 5184d13896dSAlexandros Lamprineas 5198136a017SAlexandros Lamprineas Constant *FunctionSpecializer::getPromotableAlloca(AllocaInst *Alloca, 5208136a017SAlexandros Lamprineas CallInst *Call) { 52130fbb069SSjoerd Meijer Value *StoreValue = nullptr; 52230fbb069SSjoerd Meijer for (auto *User : Alloca->users()) { 52330fbb069SSjoerd Meijer // We can't use llvm::isAllocaPromotable() as that would fail because of 52430fbb069SSjoerd Meijer // the usage in the CallInst, which is what we check here. 52530fbb069SSjoerd Meijer if (User == Call) 52630fbb069SSjoerd Meijer continue; 52730fbb069SSjoerd Meijer 52830fbb069SSjoerd Meijer if (auto *Store = dyn_cast<StoreInst>(User)) { 52930fbb069SSjoerd Meijer // This is a duplicate store, bail out. 53030fbb069SSjoerd Meijer if (StoreValue || Store->isVolatile()) 53130fbb069SSjoerd Meijer return nullptr; 53230fbb069SSjoerd Meijer StoreValue = Store->getValueOperand(); 53330fbb069SSjoerd Meijer continue; 53430fbb069SSjoerd Meijer } 53530fbb069SSjoerd Meijer // Bail if there is any other unknown usage. 53630fbb069SSjoerd Meijer return nullptr; 53730fbb069SSjoerd Meijer } 538dfb98d8eSJonathon Penix 539dfb98d8eSJonathon Penix if (!StoreValue) 540dfb98d8eSJonathon Penix return nullptr; 541dfb98d8eSJonathon Penix 5428136a017SAlexandros Lamprineas return getCandidateConstant(StoreValue); 54330fbb069SSjoerd Meijer } 54430fbb069SSjoerd Meijer 54530fbb069SSjoerd Meijer // A constant stack value is an AllocaInst that has a single constant 54630fbb069SSjoerd Meijer // value stored to it. Return this constant if such an alloca stack value 54730fbb069SSjoerd Meijer // is a function argument. 5488136a017SAlexandros Lamprineas Constant *FunctionSpecializer::getConstantStackValue(CallInst *Call, 5498136a017SAlexandros Lamprineas Value *Val) { 55030fbb069SSjoerd Meijer if (!Val) 55130fbb069SSjoerd Meijer return nullptr; 55230fbb069SSjoerd Meijer Val = Val->stripPointerCasts(); 55330fbb069SSjoerd Meijer if (auto *ConstVal = dyn_cast<ConstantInt>(Val)) 55430fbb069SSjoerd Meijer return ConstVal; 55530fbb069SSjoerd Meijer auto *Alloca = dyn_cast<AllocaInst>(Val); 55630fbb069SSjoerd Meijer if (!Alloca || !Alloca->getAllocatedType()->isIntegerTy()) 55730fbb069SSjoerd Meijer return nullptr; 55830fbb069SSjoerd Meijer return getPromotableAlloca(Alloca, Call); 55930fbb069SSjoerd Meijer } 56030fbb069SSjoerd Meijer 56130fbb069SSjoerd Meijer // To support specializing recursive functions, it is important to propagate 56230fbb069SSjoerd Meijer // constant arguments because after a first iteration of specialisation, a 56330fbb069SSjoerd Meijer // reduced example may look like this: 56430fbb069SSjoerd Meijer // 56530fbb069SSjoerd Meijer // define internal void @RecursiveFn(i32* arg1) { 56630fbb069SSjoerd Meijer // %temp = alloca i32, align 4 56730fbb069SSjoerd Meijer // store i32 2 i32* %temp, align 4 56830fbb069SSjoerd Meijer // call void @RecursiveFn.1(i32* nonnull %temp) 56930fbb069SSjoerd Meijer // ret void 57030fbb069SSjoerd Meijer // } 57130fbb069SSjoerd Meijer // 57230fbb069SSjoerd Meijer // Before a next iteration, we need to propagate the constant like so 57330fbb069SSjoerd Meijer // which allows further specialization in next iterations. 57430fbb069SSjoerd Meijer // 57530fbb069SSjoerd Meijer // @funcspec.arg = internal constant i32 2 57630fbb069SSjoerd Meijer // 57730fbb069SSjoerd Meijer // define internal void @someFunc(i32* arg1) { 57830fbb069SSjoerd Meijer // call void @otherFunc(i32* nonnull @funcspec.arg) 57930fbb069SSjoerd Meijer // ret void 58030fbb069SSjoerd Meijer // } 58130fbb069SSjoerd Meijer // 582ce9d3f09SAlexandros Lamprineas // See if there are any new constant values for the callers of \p F via 583ce9d3f09SAlexandros Lamprineas // stack variables and promote them to global variables. 584ce9d3f09SAlexandros Lamprineas void FunctionSpecializer::promoteConstantStackValues(Function *F) { 585ce9d3f09SAlexandros Lamprineas for (User *U : F->users()) { 58630fbb069SSjoerd Meijer 587ce9d3f09SAlexandros Lamprineas auto *Call = dyn_cast<CallInst>(U); 58830fbb069SSjoerd Meijer if (!Call) 589b4417075SAlexandros Lamprineas continue; 590b4417075SAlexandros Lamprineas 5918136a017SAlexandros Lamprineas if (!Solver.isBlockExecutable(Call->getParent())) 5928136a017SAlexandros Lamprineas continue; 5938136a017SAlexandros Lamprineas 594b4417075SAlexandros Lamprineas for (const Use &U : Call->args()) { 595b4417075SAlexandros Lamprineas unsigned Idx = Call->getArgOperandNo(&U); 596b4417075SAlexandros Lamprineas Value *ArgOp = Call->getArgOperand(Idx); 597b4417075SAlexandros Lamprineas Type *ArgOpType = ArgOp->getType(); 598b4417075SAlexandros Lamprineas 599b4417075SAlexandros Lamprineas if (!Call->onlyReadsMemory(Idx) || !ArgOpType->isPointerTy()) 600b4417075SAlexandros Lamprineas continue; 601b4417075SAlexandros Lamprineas 6028136a017SAlexandros Lamprineas auto *ConstVal = getConstantStackValue(Call, ArgOp); 60330fbb069SSjoerd Meijer if (!ConstVal) 604b4417075SAlexandros Lamprineas continue; 60530fbb069SSjoerd Meijer 60630fbb069SSjoerd Meijer Value *GV = new GlobalVariable(M, ConstVal->getType(), true, 60730fbb069SSjoerd Meijer GlobalValue::InternalLinkage, ConstVal, 608e15d72adSAlexandros Lamprineas "specialized.arg." + Twine(++NGlobals)); 609b4417075SAlexandros Lamprineas Call->setArgOperand(Idx, GV); 61030fbb069SSjoerd Meijer } 61130fbb069SSjoerd Meijer } 61230fbb069SSjoerd Meijer } 61330fbb069SSjoerd Meijer 61430fbb069SSjoerd Meijer // ssa_copy intrinsics are introduced by the SCCP solver. These intrinsics 6158136a017SAlexandros Lamprineas // interfere with the promoteConstantStackValues() optimization. 61630fbb069SSjoerd Meijer static void removeSSACopy(Function &F) { 61730fbb069SSjoerd Meijer for (BasicBlock &BB : F) { 618d9e46beaSKazu Hirata for (Instruction &Inst : llvm::make_early_inc_range(BB)) { 619d9e46beaSKazu Hirata auto *II = dyn_cast<IntrinsicInst>(&Inst); 62030fbb069SSjoerd Meijer if (!II) 62130fbb069SSjoerd Meijer continue; 62230fbb069SSjoerd Meijer if (II->getIntrinsicID() != Intrinsic::ssa_copy) 62330fbb069SSjoerd Meijer continue; 624d9e46beaSKazu Hirata Inst.replaceAllUsesWith(II->getOperand(0)); 625d9e46beaSKazu Hirata Inst.eraseFromParent(); 62630fbb069SSjoerd Meijer } 62730fbb069SSjoerd Meijer } 62830fbb069SSjoerd Meijer } 62930fbb069SSjoerd Meijer 6308136a017SAlexandros Lamprineas /// Remove any ssa_copy intrinsics that may have been introduced. 6318136a017SAlexandros Lamprineas void FunctionSpecializer::cleanUpSSA() { 63267fde2b9SAlexandros Lamprineas for (Function *F : Specializations) 6338136a017SAlexandros Lamprineas removeSSACopy(*F); 63433830326SAlexandros Lamprineas } 63533830326SAlexandros Lamprineas 636e6b9fc4cSMomchil Velikov 637e6b9fc4cSMomchil Velikov template <> struct llvm::DenseMapInfo<SpecSig> { 638e6b9fc4cSMomchil Velikov static inline SpecSig getEmptyKey() { return {~0U, {}}; } 639e6b9fc4cSMomchil Velikov 640e6b9fc4cSMomchil Velikov static inline SpecSig getTombstoneKey() { return {~1U, {}}; } 641e6b9fc4cSMomchil Velikov 642e6b9fc4cSMomchil Velikov static unsigned getHashValue(const SpecSig &S) { 643e6b9fc4cSMomchil Velikov return static_cast<unsigned>(hash_value(S)); 644e6b9fc4cSMomchil Velikov } 645e6b9fc4cSMomchil Velikov 646e6b9fc4cSMomchil Velikov static bool isEqual(const SpecSig &LHS, const SpecSig &RHS) { 647e6b9fc4cSMomchil Velikov return LHS == RHS; 648e6b9fc4cSMomchil Velikov } 649e6b9fc4cSMomchil Velikov }; 650e6b9fc4cSMomchil Velikov 65167fde2b9SAlexandros Lamprineas FunctionSpecializer::~FunctionSpecializer() { 65267fde2b9SAlexandros Lamprineas LLVM_DEBUG( 65367fde2b9SAlexandros Lamprineas if (NumSpecsCreated > 0) 65467fde2b9SAlexandros Lamprineas dbgs() << "FnSpecialization: Created " << NumSpecsCreated 65567fde2b9SAlexandros Lamprineas << " specializations in module " << M.getName() << "\n"); 65667fde2b9SAlexandros Lamprineas // Eliminate dead code. 65767fde2b9SAlexandros Lamprineas removeDeadFunctions(); 65867fde2b9SAlexandros Lamprineas cleanUpSSA(); 65967fde2b9SAlexandros Lamprineas } 66067fde2b9SAlexandros Lamprineas 661e19a5fc6SHari Limaye /// Get the unsigned Value of given Cost object. Assumes the Cost is always 662e19a5fc6SHari Limaye /// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and 663e19a5fc6SHari Limaye /// always Valid. 664e19a5fc6SHari Limaye static unsigned getCostValue(const Cost &C) { 665e19a5fc6SHari Limaye int64_t Value = *C.getValue(); 666e19a5fc6SHari Limaye 667e19a5fc6SHari Limaye assert(Value >= 0 && "CodeSize and Latency cannot be negative"); 668e19a5fc6SHari Limaye // It is safe to down cast since we know the arguments cannot be negative and 669e19a5fc6SHari Limaye // Cost is of type int64_t. 670e19a5fc6SHari Limaye return static_cast<unsigned>(Value); 671e19a5fc6SHari Limaye } 672e19a5fc6SHari Limaye 6739907746fSSjoerd Meijer /// Attempt to specialize functions in the module to enable constant 6749907746fSSjoerd Meijer /// propagation across function boundaries. 6759907746fSSjoerd Meijer /// 6769907746fSSjoerd Meijer /// \returns true if at least one function is specialized. 6778136a017SAlexandros Lamprineas bool FunctionSpecializer::run() { 678e6b9fc4cSMomchil Velikov // Find possible specializations for each function. 679e6b9fc4cSMomchil Velikov SpecMap SM; 680e6b9fc4cSMomchil Velikov SmallVector<Spec, 32> AllSpecs; 681e6b9fc4cSMomchil Velikov unsigned NumCandidates = 0; 6828136a017SAlexandros Lamprineas for (Function &F : M) { 6838136a017SAlexandros Lamprineas if (!isCandidateFunction(&F)) 68489bcfd16SSjoerd Meijer continue; 68589bcfd16SSjoerd Meijer 686ce9d3f09SAlexandros Lamprineas auto [It, Inserted] = FunctionMetrics.try_emplace(&F); 687ce9d3f09SAlexandros Lamprineas CodeMetrics &Metrics = It->second; 688ce9d3f09SAlexandros Lamprineas //Analyze the function. 689ce9d3f09SAlexandros Lamprineas if (Inserted) { 690ce9d3f09SAlexandros Lamprineas SmallPtrSet<const Value *, 32> EphValues; 691ce9d3f09SAlexandros Lamprineas CodeMetrics::collectEphemeralValues(&F, &GetAC(F), EphValues); 692ce9d3f09SAlexandros Lamprineas for (BasicBlock &BB : F) 693ce9d3f09SAlexandros Lamprineas Metrics.analyzeBasicBlock(&BB, GetTTI(F), EphValues); 69489bcfd16SSjoerd Meijer } 69589bcfd16SSjoerd Meijer 6960d1a91e8SHari Limaye // When specializing literal constants is enabled, always require functions 6970d1a91e8SHari Limaye // to be larger than MinFunctionSize, to prevent excessive specialization. 6980d1a91e8SHari Limaye const bool RequireMinSize = 6990d1a91e8SHari Limaye !ForceSpecialization && 7000d1a91e8SHari Limaye (SpecializeLiteralConstant || !F.hasFnAttribute(Attribute::NoInline)); 7010d1a91e8SHari Limaye 702ce9d3f09SAlexandros Lamprineas // If the code metrics reveal that we shouldn't duplicate the function, 703ce9d3f09SAlexandros Lamprineas // or if the code size implies that this function is easy to get inlined, 704ce9d3f09SAlexandros Lamprineas // then we shouldn't specialize it. 705ce9d3f09SAlexandros Lamprineas if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() || 7060d1a91e8SHari Limaye (RequireMinSize && Metrics.NumInsts < MinFunctionSize)) 707ce9d3f09SAlexandros Lamprineas continue; 708b803aee6SAlexandros Lamprineas 70906664fdcSHari Limaye // When specialization on literal constants is disabled, only consider 71006664fdcSHari Limaye // recursive functions when running multiple times to save wasted analysis, 71106664fdcSHari Limaye // as we will not be able to specialize on any newly found literal constant 71206664fdcSHari Limaye // return values. 71306664fdcSHari Limaye if (!SpecializeLiteralConstant && !Inserted && !Metrics.isRecursive) 714ce9d3f09SAlexandros Lamprineas continue; 715ce9d3f09SAlexandros Lamprineas 7165bfefff1SAlexandros Lamprineas int64_t Sz = *Metrics.NumInsts.getValue(); 7175bfefff1SAlexandros Lamprineas assert(Sz > 0 && "CodeSize should be positive"); 7185bfefff1SAlexandros Lamprineas // It is safe to down cast from int64_t, NumInsts is always positive. 719d1b376fdSAlexandros Lamprineas unsigned FuncSize = static_cast<unsigned>(Sz); 7205bfefff1SAlexandros Lamprineas 721ce9d3f09SAlexandros Lamprineas LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for " 722d1b376fdSAlexandros Lamprineas << F.getName() << " is " << FuncSize << "\n"); 723ce9d3f09SAlexandros Lamprineas 724ce9d3f09SAlexandros Lamprineas if (Inserted && Metrics.isRecursive) 725ce9d3f09SAlexandros Lamprineas promoteConstantStackValues(&F); 726ce9d3f09SAlexandros Lamprineas 727d1b376fdSAlexandros Lamprineas if (!findSpecializations(&F, FuncSize, AllSpecs, SM)) { 72814384c96SMomchil Velikov LLVM_DEBUG( 729e6b9fc4cSMomchil Velikov dbgs() << "FnSpecialization: No possible specializations found for " 730e6b9fc4cSMomchil Velikov << F.getName() << "\n"); 73189bcfd16SSjoerd Meijer continue; 73289bcfd16SSjoerd Meijer } 73389bcfd16SSjoerd Meijer 734e6b9fc4cSMomchil Velikov ++NumCandidates; 735e6b9fc4cSMomchil Velikov } 7368136a017SAlexandros Lamprineas 737e6b9fc4cSMomchil Velikov if (!NumCandidates) { 738e6b9fc4cSMomchil Velikov LLVM_DEBUG( 739e6b9fc4cSMomchil Velikov dbgs() 740e6b9fc4cSMomchil Velikov << "FnSpecialization: No possible specializations found in module\n"); 741e6b9fc4cSMomchil Velikov return false; 742e6b9fc4cSMomchil Velikov } 743e6b9fc4cSMomchil Velikov 744e6b9fc4cSMomchil Velikov // Choose the most profitable specialisations, which fit in the module 745e6b9fc4cSMomchil Velikov // specialization budget, which is derived from maximum number of 746e6b9fc4cSMomchil Velikov // specializations per specialization candidate function. 74793ac2dbeSAlexandros Lamprineas auto CompareScore = [&AllSpecs](unsigned I, unsigned J) { 748575e68e5SHans Wennborg if (AllSpecs[I].Score != AllSpecs[J].Score) 74993ac2dbeSAlexandros Lamprineas return AllSpecs[I].Score > AllSpecs[J].Score; 750575e68e5SHans Wennborg return I > J; 751e6b9fc4cSMomchil Velikov }; 752e6b9fc4cSMomchil Velikov const unsigned NSpecs = 7539627bcdeSAlexandros Lamprineas std::min(NumCandidates * MaxClones, unsigned(AllSpecs.size())); 754e6b9fc4cSMomchil Velikov SmallVector<unsigned> BestSpecs(NSpecs + 1); 755e6b9fc4cSMomchil Velikov std::iota(BestSpecs.begin(), BestSpecs.begin() + NSpecs, 0); 756e6b9fc4cSMomchil Velikov if (AllSpecs.size() > NSpecs) { 757e6b9fc4cSMomchil Velikov LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed " 758e6b9fc4cSMomchil Velikov << "the maximum number of clones threshold.\n" 759e6b9fc4cSMomchil Velikov << "FnSpecialization: Specializing the " 760e6b9fc4cSMomchil Velikov << NSpecs 761e6b9fc4cSMomchil Velikov << " most profitable candidates.\n"); 76293ac2dbeSAlexandros Lamprineas std::make_heap(BestSpecs.begin(), BestSpecs.begin() + NSpecs, CompareScore); 763e6b9fc4cSMomchil Velikov for (unsigned I = NSpecs, N = AllSpecs.size(); I < N; ++I) { 764e6b9fc4cSMomchil Velikov BestSpecs[NSpecs] = I; 76593ac2dbeSAlexandros Lamprineas std::push_heap(BestSpecs.begin(), BestSpecs.end(), CompareScore); 76693ac2dbeSAlexandros Lamprineas std::pop_heap(BestSpecs.begin(), BestSpecs.end(), CompareScore); 767e6b9fc4cSMomchil Velikov } 768e6b9fc4cSMomchil Velikov } 769e6b9fc4cSMomchil Velikov 770e6b9fc4cSMomchil Velikov LLVM_DEBUG(dbgs() << "FnSpecialization: List of specializations \n"; 771e6b9fc4cSMomchil Velikov for (unsigned I = 0; I < NSpecs; ++I) { 772e6b9fc4cSMomchil Velikov const Spec &S = AllSpecs[BestSpecs[I]]; 773e6b9fc4cSMomchil Velikov dbgs() << "FnSpecialization: Function " << S.F->getName() 77493ac2dbeSAlexandros Lamprineas << " , score " << S.Score << "\n"; 775e6b9fc4cSMomchil Velikov for (const ArgInfo &Arg : S.Sig.Args) 776e6b9fc4cSMomchil Velikov dbgs() << "FnSpecialization: FormalArg = " 777e6b9fc4cSMomchil Velikov << Arg.Formal->getNameOrAsOperand() 778e6b9fc4cSMomchil Velikov << ", ActualArg = " << Arg.Actual->getNameOrAsOperand() 779e6b9fc4cSMomchil Velikov << "\n"; 780e6b9fc4cSMomchil Velikov }); 781e6b9fc4cSMomchil Velikov 782e6b9fc4cSMomchil Velikov // Create the chosen specializations. 783e6b9fc4cSMomchil Velikov SmallPtrSet<Function *, 8> OriginalFuncs; 784e6b9fc4cSMomchil Velikov SmallVector<Function *> Clones; 785e6b9fc4cSMomchil Velikov for (unsigned I = 0; I < NSpecs; ++I) { 786e6b9fc4cSMomchil Velikov Spec &S = AllSpecs[BestSpecs[I]]; 787e19a5fc6SHari Limaye 788e19a5fc6SHari Limaye // Accumulate the codesize growth for the function, now we are creating the 789e19a5fc6SHari Limaye // specialization. 790e19a5fc6SHari Limaye FunctionGrowth[S.F] += S.CodeSize; 791e19a5fc6SHari Limaye 792e6b9fc4cSMomchil Velikov S.Clone = createSpecialization(S.F, S.Sig); 793e6b9fc4cSMomchil Velikov 794e6b9fc4cSMomchil Velikov // Update the known call sites to call the clone. 795e6b9fc4cSMomchil Velikov for (CallBase *Call : S.CallSites) { 796e6b9fc4cSMomchil Velikov LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call 797e6b9fc4cSMomchil Velikov << " to call " << S.Clone->getName() << "\n"); 798e6b9fc4cSMomchil Velikov Call->setCalledFunction(S.Clone); 799e6b9fc4cSMomchil Velikov } 800e6b9fc4cSMomchil Velikov 801e6b9fc4cSMomchil Velikov Clones.push_back(S.Clone); 802e6b9fc4cSMomchil Velikov OriginalFuncs.insert(S.F); 803e6b9fc4cSMomchil Velikov } 8048136a017SAlexandros Lamprineas 8058136a017SAlexandros Lamprineas Solver.solveWhileResolvedUndefsIn(Clones); 806e6b9fc4cSMomchil Velikov 807e6b9fc4cSMomchil Velikov // Update the rest of the call sites - these are the recursive calls, calls 808e6b9fc4cSMomchil Velikov // to discarded specialisations and calls that may match a specialisation 809e6b9fc4cSMomchil Velikov // after the solver runs. 810e6b9fc4cSMomchil Velikov for (Function *F : OriginalFuncs) { 811e6b9fc4cSMomchil Velikov auto [Begin, End] = SM[F]; 812e6b9fc4cSMomchil Velikov updateCallSites(F, AllSpecs.begin() + Begin, AllSpecs.begin() + End); 8139907746fSSjoerd Meijer } 8149907746fSSjoerd Meijer 81554e5fb78SAlexandros Lamprineas for (Function *F : Clones) { 81654e5fb78SAlexandros Lamprineas if (F->getReturnType()->isVoidTy()) 81754e5fb78SAlexandros Lamprineas continue; 81854e5fb78SAlexandros Lamprineas if (F->getReturnType()->isStructTy()) { 81954e5fb78SAlexandros Lamprineas auto *STy = cast<StructType>(F->getReturnType()); 82054e5fb78SAlexandros Lamprineas if (!Solver.isStructLatticeConstant(F, STy)) 82154e5fb78SAlexandros Lamprineas continue; 82254e5fb78SAlexandros Lamprineas } else { 82354e5fb78SAlexandros Lamprineas auto It = Solver.getTrackedRetVals().find(F); 82454e5fb78SAlexandros Lamprineas assert(It != Solver.getTrackedRetVals().end() && 82554e5fb78SAlexandros Lamprineas "Return value ought to be tracked"); 82654e5fb78SAlexandros Lamprineas if (SCCPSolver::isOverdefined(It->second)) 82754e5fb78SAlexandros Lamprineas continue; 82854e5fb78SAlexandros Lamprineas } 82954e5fb78SAlexandros Lamprineas for (User *U : F->users()) { 83054e5fb78SAlexandros Lamprineas if (auto *CS = dyn_cast<CallBase>(U)) { 83154e5fb78SAlexandros Lamprineas //The user instruction does not call our function. 83254e5fb78SAlexandros Lamprineas if (CS->getCalledFunction() != F) 83354e5fb78SAlexandros Lamprineas continue; 83454e5fb78SAlexandros Lamprineas Solver.resetLatticeValueFor(CS); 83554e5fb78SAlexandros Lamprineas } 83654e5fb78SAlexandros Lamprineas } 83754e5fb78SAlexandros Lamprineas } 83854e5fb78SAlexandros Lamprineas 83954e5fb78SAlexandros Lamprineas // Rerun the solver to notify the users of the modified callsites. 84054e5fb78SAlexandros Lamprineas Solver.solveWhileResolvedUndefs(); 84154e5fb78SAlexandros Lamprineas 842ce9d3f09SAlexandros Lamprineas for (Function *F : OriginalFuncs) 843ce9d3f09SAlexandros Lamprineas if (FunctionMetrics[F].isRecursive) 844ce9d3f09SAlexandros Lamprineas promoteConstantStackValues(F); 845ce9d3f09SAlexandros Lamprineas 846e6b9fc4cSMomchil Velikov return true; 8479907746fSSjoerd Meijer } 8489907746fSSjoerd Meijer 8498136a017SAlexandros Lamprineas void FunctionSpecializer::removeDeadFunctions() { 8508136a017SAlexandros Lamprineas for (Function *F : FullySpecialized) { 85133830326SAlexandros Lamprineas LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead function " 85233830326SAlexandros Lamprineas << F->getName() << "\n"); 853e7ed43c7SMomchil Velikov if (FAM) 854e7ed43c7SMomchil Velikov FAM->clear(*F, F->getName()); 85533830326SAlexandros Lamprineas F->eraseFromParent(); 85633830326SAlexandros Lamprineas } 85733830326SAlexandros Lamprineas FullySpecialized.clear(); 85833830326SAlexandros Lamprineas } 85933830326SAlexandros Lamprineas 86030fbb069SSjoerd Meijer /// Clone the function \p F and remove the ssa_copy intrinsics added by 86130fbb069SSjoerd Meijer /// the SCCPSolver in the cloned version. 862e15d72adSAlexandros Lamprineas static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) { 8638136a017SAlexandros Lamprineas ValueToValueMapTy Mappings; 864910eb988SAlexandros Lamprineas Function *Clone = CloneFunction(F, Mappings); 865e15d72adSAlexandros Lamprineas Clone->setName(F->getName() + ".specialized." + Twine(NSpecs)); 86630fbb069SSjoerd Meijer removeSSACopy(*Clone); 86730fbb069SSjoerd Meijer return Clone; 86830fbb069SSjoerd Meijer } 86930fbb069SSjoerd Meijer 870d1b376fdSAlexandros Lamprineas bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, 871e6b9fc4cSMomchil Velikov SmallVectorImpl<Spec> &AllSpecs, 872e6b9fc4cSMomchil Velikov SpecMap &SM) { 873e6b9fc4cSMomchil Velikov // A mapping from a specialisation signature to the index of the respective 874e6b9fc4cSMomchil Velikov // entry in the all specialisation array. Used to ensure uniqueness of 875e6b9fc4cSMomchil Velikov // specialisations. 876929a8c9fSAlexandros Lamprineas DenseMap<SpecSig, unsigned> UniqueSpecs; 877e6b9fc4cSMomchil Velikov 87814384c96SMomchil Velikov // Get a list of interesting arguments. 879e6b9fc4cSMomchil Velikov SmallVector<Argument *> Args; 88014384c96SMomchil Velikov for (Argument &Arg : F->args()) 88114384c96SMomchil Velikov if (isArgumentInteresting(&Arg)) 88214384c96SMomchil Velikov Args.push_back(&Arg); 88314384c96SMomchil Velikov 884e6b9fc4cSMomchil Velikov if (Args.empty()) 88514384c96SMomchil Velikov return false; 88614384c96SMomchil Velikov 88714384c96SMomchil Velikov for (User *U : F->users()) { 88814384c96SMomchil Velikov if (!isa<CallInst>(U) && !isa<InvokeInst>(U)) 88989bcfd16SSjoerd Meijer continue; 89014384c96SMomchil Velikov auto &CS = *cast<CallBase>(U); 8918136a017SAlexandros Lamprineas 892e6b9fc4cSMomchil Velikov // The user instruction does not call our function. 8938136a017SAlexandros Lamprineas if (CS.getCalledFunction() != F) 8948136a017SAlexandros Lamprineas continue; 8958136a017SAlexandros Lamprineas 89614384c96SMomchil Velikov // If the call site has attribute minsize set, that callsite won't be 89714384c96SMomchil Velikov // specialized. 89814384c96SMomchil Velikov if (CS.hasFnAttr(Attribute::MinSize)) 89914384c96SMomchil Velikov continue; 90014384c96SMomchil Velikov 90114384c96SMomchil Velikov // If the parent of the call site will never be executed, we don't need 90214384c96SMomchil Velikov // to worry about the passed value. 90314384c96SMomchil Velikov if (!Solver.isBlockExecutable(CS.getParent())) 90414384c96SMomchil Velikov continue; 90514384c96SMomchil Velikov 906e6b9fc4cSMomchil Velikov // Examine arguments and create a specialisation candidate from the 907e6b9fc4cSMomchil Velikov // constant operands of this call site. 908e6b9fc4cSMomchil Velikov SpecSig S; 90914384c96SMomchil Velikov for (Argument *A : Args) { 91014384c96SMomchil Velikov Constant *C = getCandidateConstant(CS.getArgOperand(A->getArgNo())); 91114384c96SMomchil Velikov if (!C) 91214384c96SMomchil Velikov continue; 913e6b9fc4cSMomchil Velikov LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument " 914e6b9fc4cSMomchil Velikov << A->getName() << " : " << C->getNameOrAsOperand() 915e6b9fc4cSMomchil Velikov << "\n"); 91614384c96SMomchil Velikov S.Args.push_back({A, C}); 9178045bf9dSAlexandros Lamprineas } 918e6b9fc4cSMomchil Velikov 919e6b9fc4cSMomchil Velikov if (S.Args.empty()) 920e6b9fc4cSMomchil Velikov continue; 921e6b9fc4cSMomchil Velikov 922e6b9fc4cSMomchil Velikov // Check if we have encountered the same specialisation already. 923929a8c9fSAlexandros Lamprineas if (auto It = UniqueSpecs.find(S); It != UniqueSpecs.end()) { 924e6b9fc4cSMomchil Velikov // Existing specialisation. Add the call to the list to rewrite, unless 925e6b9fc4cSMomchil Velikov // it's a recursive call. A specialisation, generated because of a 926e6b9fc4cSMomchil Velikov // recursive call may end up as not the best specialisation for all 927e6b9fc4cSMomchil Velikov // the cloned instances of this call, which result from specialising 928e6b9fc4cSMomchil Velikov // functions. Hence we don't rewrite the call directly, but match it with 929e6b9fc4cSMomchil Velikov // the best specialisation once all specialisations are known. 930e6b9fc4cSMomchil Velikov if (CS.getFunction() == F) 931e6b9fc4cSMomchil Velikov continue; 932e6b9fc4cSMomchil Velikov const unsigned Index = It->second; 933e6b9fc4cSMomchil Velikov AllSpecs[Index].CallSites.push_back(&CS); 934e6b9fc4cSMomchil Velikov } else { 935e6b9fc4cSMomchil Velikov // Calculate the specialisation gain. 936c6931c25SHari Limaye Cost CodeSize; 937d1b376fdSAlexandros Lamprineas unsigned Score = 0; 9384d13896dSAlexandros Lamprineas InstCostVisitor Visitor = getInstCostVisitorFor(F); 939d1b376fdSAlexandros Lamprineas for (ArgInfo &A : S.Args) { 940c6931c25SHari Limaye CodeSize += Visitor.getCodeSizeSavingsForArg(A.Formal, A.Actual); 941d1b376fdSAlexandros Lamprineas Score += getInliningBonus(A.Formal, A.Actual); 942d1b376fdSAlexandros Lamprineas } 943c6931c25SHari Limaye CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs(); 944893d3a61SAlexandros Lamprineas 945e19a5fc6SHari Limaye unsigned CodeSizeSavings = getCostValue(CodeSize); 946e19a5fc6SHari Limaye unsigned SpecSize = FuncSize - CodeSizeSavings; 947e19a5fc6SHari Limaye 948c6931c25SHari Limaye auto IsProfitable = [&]() -> bool { 949d1b376fdSAlexandros Lamprineas // No check required. 950d1b376fdSAlexandros Lamprineas if (ForceSpecialization) 951d1b376fdSAlexandros Lamprineas return true; 952c6931c25SHari Limaye 953c6931c25SHari Limaye LLVM_DEBUG( 954c6931c25SHari Limaye dbgs() << "FnSpecialization: Specialization bonus {Inlining = " 955c6931c25SHari Limaye << Score << " (" << (Score * 100 / FuncSize) << "%)}\n"); 956c6931c25SHari Limaye 957d1b376fdSAlexandros Lamprineas // Minimum inlining bonus. 958d1b376fdSAlexandros Lamprineas if (Score > MinInliningBonus * FuncSize / 100) 959d1b376fdSAlexandros Lamprineas return true; 960c6931c25SHari Limaye 961c6931c25SHari Limaye LLVM_DEBUG( 962c6931c25SHari Limaye dbgs() << "FnSpecialization: Specialization bonus {CodeSize = " 963c6931c25SHari Limaye << CodeSizeSavings << " (" 964c6931c25SHari Limaye << (CodeSizeSavings * 100 / FuncSize) << "%)}\n"); 965c6931c25SHari Limaye 966d1b376fdSAlexandros Lamprineas // Minimum codesize savings. 967c6931c25SHari Limaye if (CodeSizeSavings < MinCodeSizeSavings * FuncSize / 100) 968d1b376fdSAlexandros Lamprineas return false; 969c6931c25SHari Limaye 970c6931c25SHari Limaye // Lazily compute the Latency, to avoid unnecessarily computing BFI. 971c6931c25SHari Limaye unsigned LatencySavings = 972c6931c25SHari Limaye getCostValue(Visitor.getLatencySavingsForKnownConstants()); 973c6931c25SHari Limaye 974c6931c25SHari Limaye LLVM_DEBUG( 975c6931c25SHari Limaye dbgs() << "FnSpecialization: Specialization bonus {Latency = " 976c6931c25SHari Limaye << LatencySavings << " (" 977c6931c25SHari Limaye << (LatencySavings * 100 / FuncSize) << "%)}\n"); 978c6931c25SHari Limaye 979d1b376fdSAlexandros Lamprineas // Minimum latency savings. 980c6931c25SHari Limaye if (LatencySavings < MinLatencySavings * FuncSize / 100) 981d1b376fdSAlexandros Lamprineas return false; 982386aa2abSAlexandros Lamprineas // Maximum codesize growth. 983e19a5fc6SHari Limaye if ((FunctionGrowth[F] + SpecSize) / FuncSize > MaxCodeSizeGrowth) 984386aa2abSAlexandros Lamprineas return false; 985c6931c25SHari Limaye 986c6931c25SHari Limaye Score += std::max(CodeSizeSavings, LatencySavings); 987d1b376fdSAlexandros Lamprineas return true; 988d1b376fdSAlexandros Lamprineas }; 989e6b9fc4cSMomchil Velikov 990e6b9fc4cSMomchil Velikov // Discard unprofitable specialisations. 991c6931c25SHari Limaye if (!IsProfitable()) 992e6b9fc4cSMomchil Velikov continue; 993e6b9fc4cSMomchil Velikov 994e6b9fc4cSMomchil Velikov // Create a new specialisation entry. 995e19a5fc6SHari Limaye auto &Spec = AllSpecs.emplace_back(F, S, Score, SpecSize); 996e6b9fc4cSMomchil Velikov if (CS.getFunction() != F) 997e6b9fc4cSMomchil Velikov Spec.CallSites.push_back(&CS); 998e6b9fc4cSMomchil Velikov const unsigned Index = AllSpecs.size() - 1; 999929a8c9fSAlexandros Lamprineas UniqueSpecs[S] = Index; 1000e6b9fc4cSMomchil Velikov if (auto [It, Inserted] = SM.try_emplace(F, Index, Index + 1); !Inserted) 1001e6b9fc4cSMomchil Velikov It->second.second = Index + 1; 1002e6b9fc4cSMomchil Velikov } 100389bcfd16SSjoerd Meijer } 100489bcfd16SSjoerd Meijer 1005929a8c9fSAlexandros Lamprineas return !UniqueSpecs.empty(); 100689bcfd16SSjoerd Meijer } 100789bcfd16SSjoerd Meijer 10088136a017SAlexandros Lamprineas bool FunctionSpecializer::isCandidateFunction(Function *F) { 1009cc7bb708SMomchil Velikov if (F->isDeclaration() || F->arg_empty()) 10108136a017SAlexandros Lamprineas return false; 10118136a017SAlexandros Lamprineas 10128136a017SAlexandros Lamprineas if (F->hasFnAttribute(Attribute::NoDuplicate)) 10138136a017SAlexandros Lamprineas return false; 10148136a017SAlexandros Lamprineas 10159907746fSSjoerd Meijer // Do not specialize the cloned function again. 101667fde2b9SAlexandros Lamprineas if (Specializations.contains(F)) 10179907746fSSjoerd Meijer return false; 10189907746fSSjoerd Meijer 10199907746fSSjoerd Meijer // If we're optimizing the function for size, we shouldn't specialize it. 10206ab26eabSEllis Hoag if (shouldOptimizeForSize(F, nullptr, nullptr, PGSOQueryType::IRPass)) 10219907746fSSjoerd Meijer return false; 10229907746fSSjoerd Meijer 10239907746fSSjoerd Meijer // Exit if the function is not executable. There's no point in specializing 10249907746fSSjoerd Meijer // a dead function. 10259907746fSSjoerd Meijer if (!Solver.isBlockExecutable(&F->getEntryBlock())) 10269907746fSSjoerd Meijer return false; 10279907746fSSjoerd Meijer 10282556f581SChuanqi Xu // It wastes time to specialize a function which would get inlined finally. 10292556f581SChuanqi Xu if (F->hasFnAttribute(Attribute::AlwaysInline)) 10302556f581SChuanqi Xu return false; 10312556f581SChuanqi Xu 10329907746fSSjoerd Meijer LLVM_DEBUG(dbgs() << "FnSpecialization: Try function: " << F->getName() 10339907746fSSjoerd Meijer << "\n"); 103489bcfd16SSjoerd Meijer return true; 1035cc3f40bbSChuanqi Xu } 1036cc3f40bbSChuanqi Xu 103793ac2dbeSAlexandros Lamprineas Function *FunctionSpecializer::createSpecialization(Function *F, 103893ac2dbeSAlexandros Lamprineas const SpecSig &S) { 1039e15d72adSAlexandros Lamprineas Function *Clone = cloneCandidateFunction(F, Specializations.size() + 1); 10409907746fSSjoerd Meijer 1041cc7bb708SMomchil Velikov // The original function does not neccessarily have internal linkage, but the 1042cc7bb708SMomchil Velikov // clone must. 1043cc7bb708SMomchil Velikov Clone->setLinkage(GlobalValue::InternalLinkage); 1044cc7bb708SMomchil Velikov 10459907746fSSjoerd Meijer // Initialize the lattice state of the arguments of the function clone, 10469907746fSSjoerd Meijer // marking the argument on which we specialized the function constant 10479907746fSSjoerd Meijer // with the given value. 10487ea597eaSAlexandros Lamprineas Solver.setLatticeValueForSpecializationArguments(Clone, S.Args); 10498136a017SAlexandros Lamprineas Solver.markBlockExecutable(&Clone->front()); 105054e5fb78SAlexandros Lamprineas Solver.addArgumentTrackedFunction(Clone); 105154e5fb78SAlexandros Lamprineas Solver.addTrackedFunction(Clone); 10529907746fSSjoerd Meijer 10539907746fSSjoerd Meijer // Mark all the specialized functions 105467fde2b9SAlexandros Lamprineas Specializations.insert(Clone); 105567fde2b9SAlexandros Lamprineas ++NumSpecsCreated; 10569907746fSSjoerd Meijer 10578136a017SAlexandros Lamprineas return Clone; 10589907746fSSjoerd Meijer } 10599907746fSSjoerd Meijer 1060d1b376fdSAlexandros Lamprineas /// Compute the inlining bonus for replacing argument \p A with constant \p C. 1061d1b376fdSAlexandros Lamprineas /// The below heuristic is only concerned with exposing inlining 1062d1b376fdSAlexandros Lamprineas /// opportunities via indirect call promotion. If the argument is not a 1063d1b376fdSAlexandros Lamprineas /// (potentially casted) function pointer, give up. 1064d1b376fdSAlexandros Lamprineas unsigned FunctionSpecializer::getInliningBonus(Argument *A, Constant *C) { 10651b6663a1SNikita Popov Function *CalledFunction = dyn_cast<Function>(C->stripPointerCasts()); 10669907746fSSjoerd Meijer if (!CalledFunction) 1067d1b376fdSAlexandros Lamprineas return 0; 10689907746fSSjoerd Meijer 10699907746fSSjoerd Meijer // Get TTI for the called function (used for the inline cost). 10709907746fSSjoerd Meijer auto &CalleeTTI = (GetTTI)(*CalledFunction); 10719907746fSSjoerd Meijer 10729907746fSSjoerd Meijer // Look at all the call sites whose called value is the argument. 10739907746fSSjoerd Meijer // Specializing the function on the argument would allow these indirect 10749907746fSSjoerd Meijer // calls to be promoted to direct calls. If the indirect call promotion 10759907746fSSjoerd Meijer // would likely enable the called function to be inlined, specializing is a 10769907746fSSjoerd Meijer // good idea. 10775bfefff1SAlexandros Lamprineas int InliningBonus = 0; 10789907746fSSjoerd Meijer for (User *U : A->users()) { 10799907746fSSjoerd Meijer if (!isa<CallInst>(U) && !isa<InvokeInst>(U)) 10809907746fSSjoerd Meijer continue; 10819907746fSSjoerd Meijer auto *CS = cast<CallBase>(U); 10829907746fSSjoerd Meijer if (CS->getCalledOperand() != A) 10839907746fSSjoerd Meijer continue; 108433ecc8a9SAlexandros Lamprineas if (CS->getFunctionType() != CalledFunction->getFunctionType()) 108533ecc8a9SAlexandros Lamprineas continue; 10869907746fSSjoerd Meijer 10879907746fSSjoerd Meijer // Get the cost of inlining the called function at this call site. Note 10889907746fSSjoerd Meijer // that this is only an estimate. The called function may eventually 10899907746fSSjoerd Meijer // change in a way that leads to it not being inlined here, even though 10909907746fSSjoerd Meijer // inlining looks profitable now. For example, one of its called 10919907746fSSjoerd Meijer // functions may be inlined into it, making the called function too large 10929907746fSSjoerd Meijer // to be inlined into this call site. 10939907746fSSjoerd Meijer // 10949907746fSSjoerd Meijer // We apply a boost for performing indirect call promotion by increasing 10959907746fSSjoerd Meijer // the default threshold by the threshold for indirect calls. 10969907746fSSjoerd Meijer auto Params = getInlineParams(); 10979907746fSSjoerd Meijer Params.DefaultThreshold += InlineConstants::IndirectCallThreshold; 10989907746fSSjoerd Meijer InlineCost IC = 10999907746fSSjoerd Meijer getInlineCost(*CS, CalledFunction, Params, CalleeTTI, GetAC, GetTLI); 11009907746fSSjoerd Meijer 11019907746fSSjoerd Meijer // We clamp the bonus for this call to be between zero and the default 11029907746fSSjoerd Meijer // threshold. 11039907746fSSjoerd Meijer if (IC.isAlways()) 11045bfefff1SAlexandros Lamprineas InliningBonus += Params.DefaultThreshold; 11059907746fSSjoerd Meijer else if (IC.isVariable() && IC.getCostDelta() > 0) 11065bfefff1SAlexandros Lamprineas InliningBonus += IC.getCostDelta(); 1107b803aee6SAlexandros Lamprineas 11085bfefff1SAlexandros Lamprineas LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << InliningBonus 1109b803aee6SAlexandros Lamprineas << " for user " << *U << "\n"); 11109907746fSSjoerd Meijer } 11119907746fSSjoerd Meijer 1112d1b376fdSAlexandros Lamprineas return InliningBonus > 0 ? static_cast<unsigned>(InliningBonus) : 0; 11139907746fSSjoerd Meijer } 11149907746fSSjoerd Meijer 111514384c96SMomchil Velikov /// Determine if it is possible to specialise the function for constant values 111614384c96SMomchil Velikov /// of the formal parameter \p A. 11178136a017SAlexandros Lamprineas bool FunctionSpecializer::isArgumentInteresting(Argument *A) { 111838f44ccfSMomchil Velikov // No point in specialization if the argument is unused. 111938f44ccfSMomchil Velikov if (A->user_empty()) 1120a8b0f580SMomchil Velikov return false; 1121a8b0f580SMomchil Velikov 11227ea597eaSAlexandros Lamprineas Type *Ty = A->getType(); 11237ea597eaSAlexandros Lamprineas if (!Ty->isPointerTy() && (!SpecializeLiteralConstant || 11247ea597eaSAlexandros Lamprineas (!Ty->isIntegerTy() && !Ty->isFloatingPointTy() && !Ty->isStructTy()))) 112538f44ccfSMomchil Velikov return false; 112638f44ccfSMomchil Velikov 112738f44ccfSMomchil Velikov // SCCP solver does not record an argument that will be constructed on 112838f44ccfSMomchil Velikov // stack. 112938f44ccfSMomchil Velikov if (A->hasByValAttr() && !A->getParent()->onlyReadsMemory()) 113038f44ccfSMomchil Velikov return false; 113138f44ccfSMomchil Velikov 1132cc7bb708SMomchil Velikov // For non-argument-tracked functions every argument is overdefined. 1133cc7bb708SMomchil Velikov if (!Solver.isArgumentTrackedFunction(A->getParent())) 1134cc7bb708SMomchil Velikov return true; 1135cc7bb708SMomchil Velikov 113638f44ccfSMomchil Velikov // Check the lattice value and decide if we should attemt to specialize, 113738f44ccfSMomchil Velikov // based on this argument. No point in specialization, if the lattice value 113838f44ccfSMomchil Velikov // is already a constant. 11397ea597eaSAlexandros Lamprineas bool IsOverdefined = Ty->isStructTy() 11407ea597eaSAlexandros Lamprineas ? any_of(Solver.getStructLatticeValueFor(A), SCCPSolver::isOverdefined) 11417ea597eaSAlexandros Lamprineas : SCCPSolver::isOverdefined(Solver.getLatticeValueFor(A)); 11427ea597eaSAlexandros Lamprineas 11437ea597eaSAlexandros Lamprineas LLVM_DEBUG( 11447ea597eaSAlexandros Lamprineas if (IsOverdefined) 11457ea597eaSAlexandros Lamprineas dbgs() << "FnSpecialization: Found interesting parameter " 11467ea597eaSAlexandros Lamprineas << A->getNameOrAsOperand() << "\n"; 11477ea597eaSAlexandros Lamprineas else 11487ea597eaSAlexandros Lamprineas dbgs() << "FnSpecialization: Nothing to do, parameter " 11497ea597eaSAlexandros Lamprineas << A->getNameOrAsOperand() << " is already constant\n"; 11507ea597eaSAlexandros Lamprineas ); 11517ea597eaSAlexandros Lamprineas return IsOverdefined; 11529907746fSSjoerd Meijer } 11539907746fSSjoerd Meijer 11547ea597eaSAlexandros Lamprineas /// Check if the value \p V (an actual argument) is a constant or can only 115514384c96SMomchil Velikov /// have a constant value. Return that constant. 11568136a017SAlexandros Lamprineas Constant *FunctionSpecializer::getCandidateConstant(Value *V) { 1157eba76056SSjoerd Meijer if (isa<PoisonValue>(V)) 115814384c96SMomchil Velikov return nullptr; 1159fc0fa851SSjoerd Meijer 116014384c96SMomchil Velikov // Select for possible specialisation values that are constants or 116138f44ccfSMomchil Velikov // are deduced to be constants or constant ranges with a single element. 116238f44ccfSMomchil Velikov Constant *C = dyn_cast<Constant>(V); 11637ea597eaSAlexandros Lamprineas if (!C) 11647ea597eaSAlexandros Lamprineas C = Solver.getConstantOrNull(V); 1165de24d084SMomchil Velikov 1166de24d084SMomchil Velikov // Don't specialize on (anything derived from) the address of a non-constant 1167de24d084SMomchil Velikov // global variable, unless explicitly enabled. 1168de24d084SMomchil Velikov if (C && C->getType()->isPointerTy() && !C->isNullValue()) 1169de24d084SMomchil Velikov if (auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(C)); 1170de24d084SMomchil Velikov GV && !(GV->isConstant() || SpecializeOnAddress)) 1171de24d084SMomchil Velikov return nullptr; 1172de24d084SMomchil Velikov 117314384c96SMomchil Velikov return C; 11749907746fSSjoerd Meijer } 11759907746fSSjoerd Meijer 1176e6b9fc4cSMomchil Velikov void FunctionSpecializer::updateCallSites(Function *F, const Spec *Begin, 1177e6b9fc4cSMomchil Velikov const Spec *End) { 1178e6b9fc4cSMomchil Velikov // Collect the call sites that need updating. 1179e6b9fc4cSMomchil Velikov SmallVector<CallBase *> ToUpdate; 1180e6b9fc4cSMomchil Velikov for (User *U : F->users()) 1181e6b9fc4cSMomchil Velikov if (auto *CS = dyn_cast<CallBase>(U); 1182e6b9fc4cSMomchil Velikov CS && CS->getCalledFunction() == F && 11838136a017SAlexandros Lamprineas Solver.isBlockExecutable(CS->getParent())) 11848136a017SAlexandros Lamprineas ToUpdate.push_back(CS); 1185b803aee6SAlexandros Lamprineas 11868136a017SAlexandros Lamprineas unsigned NCallsLeft = ToUpdate.size(); 11878136a017SAlexandros Lamprineas for (CallBase *CS : ToUpdate) { 11888136a017SAlexandros Lamprineas bool ShouldDecrementCount = CS->getFunction() == F; 1189b803aee6SAlexandros Lamprineas 1190e6b9fc4cSMomchil Velikov // Find the best matching specialisation. 1191e6b9fc4cSMomchil Velikov const Spec *BestSpec = nullptr; 1192e6b9fc4cSMomchil Velikov for (const Spec &S : make_range(Begin, End)) { 119393ac2dbeSAlexandros Lamprineas if (!S.Clone || (BestSpec && S.Score <= BestSpec->Score)) 1194e6b9fc4cSMomchil Velikov continue; 1195e6b9fc4cSMomchil Velikov 1196e6b9fc4cSMomchil Velikov if (any_of(S.Sig.Args, [CS, this](const ArgInfo &Arg) { 11978045bf9dSAlexandros Lamprineas unsigned ArgNo = Arg.Formal->getArgNo(); 11988136a017SAlexandros Lamprineas return getCandidateConstant(CS->getArgOperand(ArgNo)) != Arg.Actual; 11998136a017SAlexandros Lamprineas })) 12008136a017SAlexandros Lamprineas continue; 12018136a017SAlexandros Lamprineas 1202e6b9fc4cSMomchil Velikov BestSpec = &S; 12030f0cb92cSAlexandros Lamprineas } 1204e6b9fc4cSMomchil Velikov 1205e6b9fc4cSMomchil Velikov if (BestSpec) { 1206e6b9fc4cSMomchil Velikov LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *CS 1207e6b9fc4cSMomchil Velikov << " to call " << BestSpec->Clone->getName() << "\n"); 1208e6b9fc4cSMomchil Velikov CS->setCalledFunction(BestSpec->Clone); 1209e6b9fc4cSMomchil Velikov ShouldDecrementCount = true; 1210e6b9fc4cSMomchil Velikov } 1211e6b9fc4cSMomchil Velikov 12128136a017SAlexandros Lamprineas if (ShouldDecrementCount) 12138136a017SAlexandros Lamprineas --NCallsLeft; 12140f0cb92cSAlexandros Lamprineas } 12150f0cb92cSAlexandros Lamprineas 12168136a017SAlexandros Lamprineas // If the function has been completely specialized, the original function 12178136a017SAlexandros Lamprineas // is no longer needed. Mark it unreachable. 1218cc7bb708SMomchil Velikov if (NCallsLeft == 0 && Solver.isArgumentTrackedFunction(F)) { 12198136a017SAlexandros Lamprineas Solver.markFunctionUnreachable(F); 12208136a017SAlexandros Lamprineas FullySpecialized.insert(F); 12210f0cb92cSAlexandros Lamprineas } 12220f0cb92cSAlexandros Lamprineas } 1223