xref: /llvm-project/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h (revision 88e9b373c0d7184b08c755024cce0778d18f0306)
1 //===- FunctionSpecialization.h - Function Specialization -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Overview:
10 // ---------
11 // Function Specialization is a transformation which propagates the constant
12 // parameters of a function call from the caller to the callee. It is part of
13 // the Inter-Procedural Sparse Conditional Constant Propagation (IPSCCP) pass.
14 // The transformation runs iteratively a number of times which is controlled
15 // by the option `funcspec-max-iters`. Running it multiple times is needed
16 // for specializing recursive functions, but also exposes new opportunities
17 // arising from specializations which return constant values or contain calls
18 // which can be specialized.
19 //
20 // Function Specialization supports propagating constant parameters like
21 // function pointers, literal constants and addresses of global variables.
22 // By propagating function pointers, indirect calls become direct calls. This
23 // exposes inlining opportunities which we would have otherwise missed. That's
24 // why function specialization is run before the inliner in the optimization
25 // pipeline; that is by design.
26 //
27 // Cost Model:
28 // -----------
29 // The cost model facilitates a utility for estimating the specialization bonus
30 // from propagating a constant argument. This is the InstCostVisitor, a class
31 // that inherits from the InstVisitor. The bonus itself is expressed as codesize
32 // and latency savings. Codesize savings means the amount of code that becomes
33 // dead in the specialization from propagating the constant, whereas latency
34 // savings represents the cycles we are saving from replacing instructions with
35 // constant values. The InstCostVisitor overrides a set of `visit*` methods to
36 // be able to handle different types of instructions. These attempt to constant-
37 // fold the instruction in which case a constant is returned and propagated
38 // further.
39 //
40 // Function pointers are not handled by the InstCostVisitor. They are treated
41 // separately as they could expose inlining opportunities via indirect call
42 // promotion. The inlining bonus contributes to the total specialization score.
43 //
44 // For a specialization to be profitable its bonus needs to exceed a minimum
45 // threshold. There are three options for controlling the threshold which are
46 // expressed as percentages of the original function size:
47 //  * funcspec-min-codesize-savings
48 //  * funcspec-min-latency-savings
49 //  * funcspec-min-inlining-bonus
50 // There's also an option for controlling the codesize growth from recursive
51 // specializations. That is `funcspec-max-codesize-growth`.
52 //
53 // Once we have all the potential specializations with their score we need to
54 // choose the best ones, which fit in the module specialization budget. That
55 // is controlled by the option `funcspec-max-clones`. To find the best `NSpec`
56 // specializations we use a max-heap. For more details refer to D139346.
57 //
58 // Ideas:
59 // ------
60 // - With a function specialization attribute for arguments, we could have
61 //   a direct way to steer function specialization, avoiding the cost-model,
62 //   and thus control compile-times / code-size.
63 //
64 // - Perhaps a post-inlining function specialization pass could be more
65 //   aggressive on literal constants.
66 //
67 // Limitations:
68 // ------------
69 // - We are unable to consider specializations of functions called from indirect
70 //   callsites whose pointer operand has a lattice value that is known to be
71 //   constant, either from IPSCCP or previous iterations of FuncSpec. This is
72 //   because SCCP has not yet replaced the uses of the known constant.
73 //
74 // References:
75 // -----------
76 // 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable
77 // it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q
78 //
79 //===----------------------------------------------------------------------===//
80 
81 #ifndef LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H
82 #define LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H
83 
84 #include "llvm/Analysis/BlockFrequencyInfo.h"
85 #include "llvm/Analysis/CodeMetrics.h"
86 #include "llvm/Analysis/InlineCost.h"
87 #include "llvm/Analysis/TargetTransformInfo.h"
88 #include "llvm/IR/InstVisitor.h"
89 #include "llvm/Transforms/Scalar/SCCP.h"
90 #include "llvm/Transforms/Utils/Cloning.h"
91 #include "llvm/Transforms/Utils/SCCPSolver.h"
92 #include "llvm/Transforms/Utils/SizeOpts.h"
93 
94 namespace llvm {
95 // Map of potential specializations for each function. The FunctionSpecializer
96 // keeps the discovered specialisation opportunities for the module in a single
97 // vector, where the specialisations of each function form a contiguous range.
98 // This map's value is the beginning and the end of that range.
99 using SpecMap = DenseMap<Function *, std::pair<unsigned, unsigned>>;
100 
101 // Just a shorter abbreviation to improve indentation.
102 using Cost = InstructionCost;
103 
104 // Map of known constants found during the specialization bonus estimation.
105 using ConstMap = DenseMap<Value *, Constant *>;
106 
107 // Specialization signature, used to uniquely designate a specialization within
108 // a function.
109 struct SpecSig {
110   // Hashing support, used to distinguish between ordinary, empty, or tombstone
111   // keys.
112   unsigned Key = 0;
113   SmallVector<ArgInfo, 4> Args;
114 
115   bool operator==(const SpecSig &Other) const {
116     if (Key != Other.Key)
117       return false;
118     return Args == Other.Args;
119   }
120 
121   friend hash_code hash_value(const SpecSig &S) {
122     return hash_combine(hash_value(S.Key),
123                         hash_combine_range(S.Args.begin(), S.Args.end()));
124   }
125 };
126 
127 // Specialization instance.
128 struct Spec {
129   // Original function.
130   Function *F;
131 
132   // Cloned function, a specialized version of the original one.
133   Function *Clone = nullptr;
134 
135   // Specialization signature.
136   SpecSig Sig;
137 
138   // Profitability of the specialization.
139   unsigned Score;
140 
141   // Number of instructions in the specialization.
142   unsigned CodeSize;
143 
144   // List of call sites, matching this specialization.
145   SmallVector<CallBase *> CallSites;
146 
147   Spec(Function *F, const SpecSig &S, unsigned Score, unsigned CodeSize)
148       : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {}
149   Spec(Function *F, const SpecSig &&S, unsigned Score, unsigned CodeSize)
150       : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {}
151 };
152 
153 class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
154   std::function<BlockFrequencyInfo &(Function &)> GetBFI;
155   Function *F;
156   const DataLayout &DL;
157   TargetTransformInfo &TTI;
158   const SCCPSolver &Solver;
159 
160   ConstMap KnownConstants;
161   // Basic blocks known to be unreachable after constant propagation.
162   DenseSet<BasicBlock *> DeadBlocks;
163   // PHI nodes we have visited before.
164   DenseSet<Instruction *> VisitedPHIs;
165   // PHI nodes we have visited once without successfully constant folding them.
166   // Once the InstCostVisitor has processed all the specialization arguments,
167   // it should be possible to determine whether those PHIs can be folded
168   // (some of their incoming values may have become constant or dead).
169   SmallVector<Instruction *> PendingPHIs;
170 
171   ConstMap::iterator LastVisited;
172 
173 public:
174   InstCostVisitor(std::function<BlockFrequencyInfo &(Function &)> GetBFI,
175                   Function *F, const DataLayout &DL, TargetTransformInfo &TTI,
176                   SCCPSolver &Solver)
177       : GetBFI(GetBFI), F(F), DL(DL), TTI(TTI), Solver(Solver) {}
178 
179   bool isBlockExecutable(BasicBlock *BB) const {
180     return Solver.isBlockExecutable(BB) && !DeadBlocks.contains(BB);
181   }
182 
183   Cost getCodeSizeSavingsForArg(Argument *A, Constant *C);
184 
185   Cost getCodeSizeSavingsFromPendingPHIs();
186 
187   Cost getLatencySavingsForKnownConstants();
188 
189 private:
190   friend class InstVisitor<InstCostVisitor, Constant *>;
191 
192   Constant *findConstantFor(Value *V) const;
193 
194   bool canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ) const;
195 
196   Cost getCodeSizeSavingsForUser(Instruction *User, Value *Use = nullptr,
197                                  Constant *C = nullptr);
198 
199   Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList);
200   Cost estimateSwitchInst(SwitchInst &I);
201   Cost estimateBranchInst(BranchInst &I);
202 
203   // Transitively Incoming Values (TIV) is a set of Values that can "feed" a
204   // value to the initial PHI-node. It is defined like this:
205   //
206   // * the initial PHI-node belongs to TIV.
207   //
208   // * for every PHI-node in TIV, its operands belong to TIV
209   //
210   // If TIV for the initial PHI-node (P) contains more than one constant or a
211   // value that is not a PHI-node, then P cannot be folded to a constant.
212   //
213   // As soon as we detect these cases, we bail, without constructing the
214   // full TIV.
215   // Otherwise P can be folded to the one constant in TIV.
216   bool discoverTransitivelyIncomingValues(Constant *Const, PHINode *Root,
217                                           DenseSet<PHINode *> &TransitivePHIs);
218 
219   Constant *visitInstruction(Instruction &I) { return nullptr; }
220   Constant *visitPHINode(PHINode &I);
221   Constant *visitFreezeInst(FreezeInst &I);
222   Constant *visitCallBase(CallBase &I);
223   Constant *visitLoadInst(LoadInst &I);
224   Constant *visitGetElementPtrInst(GetElementPtrInst &I);
225   Constant *visitSelectInst(SelectInst &I);
226   Constant *visitCastInst(CastInst &I);
227   Constant *visitCmpInst(CmpInst &I);
228   Constant *visitUnaryOperator(UnaryOperator &I);
229   Constant *visitBinaryOperator(BinaryOperator &I);
230 };
231 
232 class FunctionSpecializer {
233 
234   /// The IPSCCP Solver.
235   SCCPSolver &Solver;
236 
237   Module &M;
238 
239   /// Analysis manager, needed to invalidate analyses.
240   FunctionAnalysisManager *FAM;
241 
242   /// Analyses used to help determine if a function should be specialized.
243   std::function<BlockFrequencyInfo &(Function &)> GetBFI;
244   std::function<const TargetLibraryInfo &(Function &)> GetTLI;
245   std::function<TargetTransformInfo &(Function &)> GetTTI;
246   std::function<AssumptionCache &(Function &)> GetAC;
247 
248   SmallPtrSet<Function *, 32> Specializations;
249   SmallPtrSet<Function *, 32> FullySpecialized;
250   DenseMap<Function *, CodeMetrics> FunctionMetrics;
251   DenseMap<Function *, unsigned> FunctionGrowth;
252   unsigned NGlobals = 0;
253 
254 public:
255   FunctionSpecializer(
256       SCCPSolver &Solver, Module &M, FunctionAnalysisManager *FAM,
257       std::function<BlockFrequencyInfo &(Function &)> GetBFI,
258       std::function<const TargetLibraryInfo &(Function &)> GetTLI,
259       std::function<TargetTransformInfo &(Function &)> GetTTI,
260       std::function<AssumptionCache &(Function &)> GetAC)
261       : Solver(Solver), M(M), FAM(FAM), GetBFI(GetBFI), GetTLI(GetTLI),
262         GetTTI(GetTTI), GetAC(GetAC) {}
263 
264   ~FunctionSpecializer();
265 
266   bool run();
267 
268   InstCostVisitor getInstCostVisitorFor(Function *F) {
269     auto &TTI = GetTTI(*F);
270     return InstCostVisitor(GetBFI, F, M.getDataLayout(), TTI, Solver);
271   }
272 
273 private:
274   Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call);
275 
276   /// A constant stack value is an AllocaInst that has a single constant
277   /// value stored to it. Return this constant if such an alloca stack value
278   /// is a function argument.
279   Constant *getConstantStackValue(CallInst *Call, Value *Val);
280 
281   /// See if there are any new constant values for the callers of \p F via
282   /// stack variables and promote them to global variables.
283   void promoteConstantStackValues(Function *F);
284 
285   /// Clean up fully specialized functions.
286   void removeDeadFunctions();
287 
288   /// Remove any ssa_copy intrinsics that may have been introduced.
289   void cleanUpSSA();
290 
291   /// @brief  Find potential specialization opportunities.
292   /// @param F Function to specialize
293   /// @param FuncSize Cost of specializing a function.
294   /// @param AllSpecs A vector to add potential specializations to.
295   /// @param SM  A map for a function's specialisation range
296   /// @return True, if any potential specializations were found
297   bool findSpecializations(Function *F, unsigned FuncSize,
298                            SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM);
299 
300   /// Compute the inlining bonus for replacing argument \p A with constant \p C.
301   unsigned getInliningBonus(Argument *A, Constant *C);
302 
303   bool isCandidateFunction(Function *F);
304 
305   /// @brief Create a specialization of \p F and prime the SCCPSolver
306   /// @param F Function to specialize
307   /// @param S Which specialization to create
308   /// @return The new, cloned function
309   Function *createSpecialization(Function *F, const SpecSig &S);
310 
311   /// Determine if it is possible to specialise the function for constant values
312   /// of the formal parameter \p A.
313   bool isArgumentInteresting(Argument *A);
314 
315   /// Check if the value \p V  (an actual argument) is a constant or can only
316   /// have a constant value. Return that constant.
317   Constant *getCandidateConstant(Value *V);
318 
319   /// @brief Find and update calls to \p F, which match a specialization
320   /// @param F Orginal function
321   /// @param Begin Start of a range of possibly matching specialisations
322   /// @param End End of a range (exclusive) of possibly matching specialisations
323   void updateCallSites(Function *F, const Spec *Begin, const Spec *End);
324 };
325 } // namespace llvm
326 
327 #endif // LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H
328