xref: /freebsd-src/contrib/llvm-project/llvm/lib/Analysis/MLInlineAdvisor.cpp (revision 1323ec571215a77ddd21294f0871979d5ad6b992)
1 //===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interface between the inliner and a learned model.
10 // It delegates model evaluation to either the AOT compiled model (the
11 // 'release' mode) or a runtime-loaded model (the 'development' case).
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Config/config.h"
15 #if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API)
16 
17 #include <limits>
18 #include <unordered_map>
19 #include <unordered_set>
20 
21 #include "llvm/ADT/SCCIterator.h"
22 #include "llvm/Analysis/CallGraph.h"
23 #include "llvm/Analysis/FunctionPropertiesAnalysis.h"
24 #include "llvm/Analysis/InlineCost.h"
25 #include "llvm/Analysis/MLInlineAdvisor.h"
26 #include "llvm/Analysis/MLModelRunner.h"
27 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
28 #include "llvm/Analysis/TargetLibraryInfo.h"
29 #include "llvm/Analysis/TargetTransformInfo.h"
30 #include "llvm/IR/InstIterator.h"
31 #include "llvm/IR/Instructions.h"
32 #include "llvm/IR/PassManager.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Path.h"
35 
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "inline-ml"
39 
40 static cl::opt<float> SizeIncreaseThreshold(
41     "ml-advisor-size-increase-threshold", cl::Hidden,
42     cl::desc("Maximum factor by which expected native size may increase before "
43              "blocking any further inlining."),
44     cl::init(2.0));
45 
46 // clang-format off
47 const std::array<std::string, NumberOfFeatures> llvm::FeatureNameMap{
48 // InlineCost features - these must come first
49 #define POPULATE_NAMES(INDEX_NAME, NAME) NAME,
50   INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES)
51 #undef POPULATE_NAMES
52 
53 // Non-cost features
54 #define POPULATE_NAMES(INDEX_NAME, NAME, COMMENT) NAME,
55   INLINE_FEATURE_ITERATOR(POPULATE_NAMES)
56 #undef POPULATE_NAMES
57 };
58 // clang-format on
59 
60 const char *const llvm::DecisionName = "inlining_decision";
61 const char *const llvm::DefaultDecisionName = "inlining_default";
62 const char *const llvm::RewardName = "delta_size";
63 
64 CallBase *getInlinableCS(Instruction &I) {
65   if (auto *CS = dyn_cast<CallBase>(&I))
66     if (Function *Callee = CS->getCalledFunction()) {
67       if (!Callee->isDeclaration()) {
68         return CS;
69       }
70     }
71   return nullptr;
72 }
73 
74 MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM,
75                                  std::unique_ptr<MLModelRunner> Runner)
76     : InlineAdvisor(
77           M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
78       ModelRunner(std::move(Runner)), CG(new CallGraph(M)),
79       InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) {
80   assert(ModelRunner);
81 
82   // Extract the 'call site height' feature - the position of a call site
83   // relative to the farthest statically reachable SCC node. We don't mutate
84   // this value while inlining happens. Empirically, this feature proved
85   // critical in behavioral cloning - i.e. training a model to mimic the manual
86   // heuristic's decisions - and, thus, equally important for training for
87   // improvement.
88   for (auto I = scc_begin(CG.get()); !I.isAtEnd(); ++I) {
89     const std::vector<CallGraphNode *> &CGNodes = *I;
90     unsigned Level = 0;
91     for (auto *CGNode : CGNodes) {
92       Function *F = CGNode->getFunction();
93       if (!F || F->isDeclaration())
94         continue;
95       for (auto &I : instructions(F)) {
96         if (auto *CS = getInlinableCS(I)) {
97           auto *Called = CS->getCalledFunction();
98           auto Pos = FunctionLevels.find(Called);
99           // In bottom up traversal, an inlinable callee is either in the
100           // same SCC, or to a function in a visited SCC. So not finding its
101           // level means we haven't visited it yet, meaning it's in this SCC.
102           if (Pos == FunctionLevels.end())
103             continue;
104           Level = std::max(Level, Pos->second + 1);
105         }
106       }
107     }
108     for (auto *CGNode : CGNodes) {
109       Function *F = CGNode->getFunction();
110       if (F && !F->isDeclaration())
111         FunctionLevels[F] = Level;
112     }
113   }
114 }
115 
116 void MLInlineAdvisor::onPassEntry() {
117   // Function passes executed between InlinerPass runs may have changed the
118   // module-wide features.
119   NodeCount = 0;
120   EdgeCount = 0;
121   for (auto &F : M)
122     if (!F.isDeclaration()) {
123       ++NodeCount;
124       EdgeCount += getLocalCalls(F);
125     }
126 }
127 
128 int64_t MLInlineAdvisor::getLocalCalls(Function &F) {
129   return FAM.getResult<FunctionPropertiesAnalysis>(F)
130       .DirectCallsToDefinedFunctions;
131 }
132 
133 // Update the internal state of the advisor, and force invalidate feature
134 // analysis. Currently, we maintain minimal (and very simple) global state - the
135 // number of functions and the number of static calls. We also keep track of the
136 // total IR size in this module, to stop misbehaving policies at a certain bloat
137 // factor (SizeIncreaseThreshold)
138 void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice,
139                                            bool CalleeWasDeleted) {
140   assert(!ForceStop);
141   Function *Caller = Advice.getCaller();
142   Function *Callee = Advice.getCallee();
143 
144   // The caller features aren't valid anymore.
145   {
146     PreservedAnalyses PA = PreservedAnalyses::all();
147     PA.abandon<FunctionPropertiesAnalysis>();
148     FAM.invalidate(*Caller, PA);
149   }
150   int64_t IRSizeAfter =
151       getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize);
152   CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize);
153   if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize)
154     ForceStop = true;
155 
156   // We can delta-update module-wide features. We know the inlining only changed
157   // the caller, and maybe the callee (by deleting the latter).
158   // Nodes are simple to update.
159   // For edges, we 'forget' the edges that the caller and callee used to have
160   // before inlining, and add back what they currently have together.
161   int64_t NewCallerAndCalleeEdges =
162       FAM.getResult<FunctionPropertiesAnalysis>(*Caller)
163           .DirectCallsToDefinedFunctions;
164 
165   if (CalleeWasDeleted)
166     --NodeCount;
167   else
168     NewCallerAndCalleeEdges +=
169         FAM.getResult<FunctionPropertiesAnalysis>(*Callee)
170             .DirectCallsToDefinedFunctions;
171   EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges);
172   assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0);
173 }
174 
175 int64_t MLInlineAdvisor::getModuleIRSize() const {
176   int64_t Ret = 0;
177   for (auto &F : CG->getModule())
178     if (!F.isDeclaration())
179       Ret += getIRSize(F);
180   return Ret;
181 }
182 
183 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
184   auto &Caller = *CB.getCaller();
185   auto &Callee = *CB.getCalledFunction();
186 
187   auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
188     return FAM.getResult<AssumptionAnalysis>(F);
189   };
190   auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee);
191   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
192 
193   auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE);
194   // If this is a "never inline" case, there won't be any changes to internal
195   // state we need to track, so we can just return the base InlineAdvice, which
196   // will do nothing interesting.
197   // Same thing if this is a recursive case.
198   if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never ||
199       &Caller == &Callee)
200     return getMandatoryAdvice(CB, false);
201 
202   bool Mandatory =
203       MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always;
204 
205   // If we need to stop, we won't want to track anymore any state changes, so
206   // we just return the base InlineAdvice, which acts as a noop.
207   if (ForceStop) {
208     ORE.emit([&] {
209       return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB)
210              << "Won't attempt inlining because module size grew too much.";
211     });
212     return std::make_unique<InlineAdvice>(this, CB, ORE, Mandatory);
213   }
214 
215   int CostEstimate = 0;
216   if (!Mandatory) {
217     auto IsCallSiteInlinable =
218         llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache);
219     if (!IsCallSiteInlinable) {
220       // We can't inline this for correctness reasons, so return the base
221       // InlineAdvice, as we don't care about tracking any state changes (which
222       // won't happen).
223       return std::make_unique<InlineAdvice>(this, CB, ORE, false);
224     }
225     CostEstimate = *IsCallSiteInlinable;
226   }
227 
228   const auto CostFeatures =
229       llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache);
230   if (!CostFeatures) {
231     return std::make_unique<InlineAdvice>(this, CB, ORE, false);
232   }
233 
234   if (Mandatory)
235     return getMandatoryAdvice(CB, true);
236 
237   auto NrCtantParams = 0;
238   for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
239     NrCtantParams += (isa<Constant>(*I));
240   }
241 
242   auto &CallerBefore = FAM.getResult<FunctionPropertiesAnalysis>(Caller);
243   auto &CalleeBefore = FAM.getResult<FunctionPropertiesAnalysis>(Callee);
244 
245   ModelRunner->setFeature(FeatureIndex::CalleeBasicBlockCount,
246                           CalleeBefore.BasicBlockCount);
247   ModelRunner->setFeature(FeatureIndex::CallSiteHeight,
248                           FunctionLevels[&Caller]);
249   ModelRunner->setFeature(FeatureIndex::NodeCount, NodeCount);
250   ModelRunner->setFeature(FeatureIndex::NrCtantParams, NrCtantParams);
251   ModelRunner->setFeature(FeatureIndex::EdgeCount, EdgeCount);
252   ModelRunner->setFeature(FeatureIndex::CallerUsers, CallerBefore.Uses);
253   ModelRunner->setFeature(FeatureIndex::CallerConditionallyExecutedBlocks,
254                           CallerBefore.BlocksReachedFromConditionalInstruction);
255   ModelRunner->setFeature(FeatureIndex::CallerBasicBlockCount,
256                           CallerBefore.BasicBlockCount);
257   ModelRunner->setFeature(FeatureIndex::CalleeConditionallyExecutedBlocks,
258                           CalleeBefore.BlocksReachedFromConditionalInstruction);
259   ModelRunner->setFeature(FeatureIndex::CalleeUsers, CalleeBefore.Uses);
260   ModelRunner->setFeature(FeatureIndex::CostEstimate, CostEstimate);
261 
262   // Add the cost features
263   for (size_t I = 0;
264        I < static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures); ++I) {
265     ModelRunner->setFeature(
266         inlineCostFeatureToMlFeature(static_cast<InlineCostFeatureIndex>(I)),
267         CostFeatures->at(I));
268   }
269 
270   return getAdviceFromModel(CB, ORE);
271 }
272 
273 std::unique_ptr<MLInlineAdvice>
274 MLInlineAdvisor::getAdviceFromModel(CallBase &CB,
275                                     OptimizationRemarkEmitter &ORE) {
276   return std::make_unique<MLInlineAdvice>(this, CB, ORE, ModelRunner->run());
277 }
278 
279 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB,
280                                                                   bool Advice) {
281   // Make sure we track inlinings in all cases - mandatory or not.
282   if (Advice && !ForceStop)
283     return getMandatoryAdviceImpl(CB);
284 
285   // If this is a "never inline" case, there won't be any changes to internal
286   // state we need to track, so we can just return the base InlineAdvice, which
287   // will do nothing interesting.
288   // Same if we are forced to stop - we don't track anymore.
289   return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice);
290 }
291 
292 std::unique_ptr<MLInlineAdvice>
293 MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
294   return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true);
295 }
296 
297 void MLInlineAdvice::reportContextForRemark(
298     DiagnosticInfoOptimizationBase &OR) {
299   using namespace ore;
300   OR << NV("Callee", Callee->getName());
301   for (size_t I = 0; I < NumberOfFeatures; ++I)
302     OR << NV(FeatureNameMap[I], getAdvisor()->getModelRunner().getFeature(I));
303   OR << NV("ShouldInline", isInliningRecommended());
304 }
305 
306 void MLInlineAdvice::recordInliningImpl() {
307   ORE.emit([&]() {
308     OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block);
309     reportContextForRemark(R);
310     return R;
311   });
312   getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false);
313 }
314 
315 void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() {
316   ORE.emit([&]() {
317     OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc,
318                          Block);
319     reportContextForRemark(R);
320     return R;
321   });
322   getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true);
323 }
324 
325 void MLInlineAdvice::recordUnsuccessfulInliningImpl(
326     const InlineResult &Result) {
327   ORE.emit([&]() {
328     OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful",
329                                DLoc, Block);
330     reportContextForRemark(R);
331     return R;
332   });
333 }
334 void MLInlineAdvice::recordUnattemptedInliningImpl() {
335   ORE.emit([&]() {
336     OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block);
337     reportContextForRemark(R);
338     return R;
339   });
340 }
341 #endif // defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API)
342