xref: /llvm-project/bolt/lib/Core/BinaryFunctionCallGraph.cpp (revision 2430a354bfb9e8c08e0dd5f294012b40afb75ce0)
1 //===- bolt/Core/BinaryFunctionCallGraph.cpp ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryFunctionCallGraph class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryFunctionCallGraph.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/Timer.h"
18 #include <stack>
19 
20 #define DEBUG_TYPE "callgraph"
21 
22 using namespace llvm;
23 
24 namespace opts {
25 
26 extern cl::opt<bool> TimeOpts;
27 extern cl::opt<unsigned> Verbosity;
28 extern cl::OptionCategory BoltCategory;
29 
30 static cl::opt<std::string>
31     DumpCGDot("dump-cg", cl::desc("dump callgraph to the given file"),
32               cl::cat(BoltCategory));
33 
34 } // namespace opts
35 
36 namespace llvm {
37 namespace bolt {
38 
addNode(BinaryFunction * BF,uint32_t Size,uint64_t Samples)39 CallGraph::NodeId BinaryFunctionCallGraph::addNode(BinaryFunction *BF,
40                                                    uint32_t Size,
41                                                    uint64_t Samples) {
42   NodeId Id = CallGraph::addNode(Size, Samples);
43   assert(size_t(Id) == Funcs.size());
44   Funcs.push_back(BF);
45   FuncToNodeId[BF] = Id;
46   assert(Funcs[Id] == BF);
47   return Id;
48 }
49 
buildTraversalOrder()50 std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() {
51   NamedRegionTimer T1("buildcgorder", "Build cg traversal order",
52                       "CG breakdown", "CG breakdown", opts::TimeOpts);
53   std::deque<BinaryFunction *> TopologicalOrder;
54   enum NodeStatus { NEW, VISITING, VISITED };
55   std::vector<NodeStatus> NodeStatus(Funcs.size());
56   std::stack<NodeId> Worklist;
57 
58   for (BinaryFunction *Func : Funcs) {
59     auto It = FuncToNodeId.find(Func);
60     assert(It != FuncToNodeId.end());
61     const NodeId Id = It->second;
62     Worklist.push(Id);
63     NodeStatus[Id] = NEW;
64   }
65 
66   while (!Worklist.empty()) {
67     const NodeId FuncId = Worklist.top();
68     Worklist.pop();
69 
70     if (NodeStatus[FuncId] == VISITED)
71       continue;
72 
73     if (NodeStatus[FuncId] == VISITING) {
74       TopologicalOrder.push_back(Funcs[FuncId]);
75       NodeStatus[FuncId] = VISITED;
76       continue;
77     }
78 
79     assert(NodeStatus[FuncId] == NEW);
80     NodeStatus[FuncId] = VISITING;
81     Worklist.push(FuncId);
82     for (const NodeId Callee : successors(FuncId)) {
83       if (NodeStatus[Callee] == VISITING || NodeStatus[Callee] == VISITED)
84         continue;
85       Worklist.push(Callee);
86     }
87   }
88 
89   return TopologicalOrder;
90 }
91 
92 BinaryFunctionCallGraph
buildCallGraph(BinaryContext & BC,CgFilterFunction Filter,bool CgFromPerfData,bool IncludeSplitCalls,bool UseFunctionHotSize,bool UseSplitHotSize,bool UseEdgeCounts,bool IgnoreRecursiveCalls)93 buildCallGraph(BinaryContext &BC, CgFilterFunction Filter, bool CgFromPerfData,
94                bool IncludeSplitCalls, bool UseFunctionHotSize,
95                bool UseSplitHotSize, bool UseEdgeCounts,
96                bool IgnoreRecursiveCalls) {
97   NamedRegionTimer T1("buildcg", "Callgraph construction", "CG breakdown",
98                       "CG breakdown", opts::TimeOpts);
99   BinaryFunctionCallGraph Cg;
100   static constexpr uint64_t COUNT_NO_PROFILE =
101       BinaryBasicBlock::COUNT_NO_PROFILE;
102 
103   // Compute function size
104   auto functionSize = [&](const BinaryFunction *Function) {
105     return UseFunctionHotSize && Function->isSplit()
106                ? Function->estimateHotSize(UseSplitHotSize)
107                : Function->estimateSize();
108   };
109 
110   // Add call graph nodes.
111   auto lookupNode = [&](BinaryFunction *Function) {
112     const CallGraph::NodeId Id = Cg.maybeGetNodeId(Function);
113     if (Id == CallGraph::InvalidId) {
114       // It's ok to use the hot size here when the function is split.  This is
115       // because emitFunctions will emit the hot part first in the order that is
116       // computed by ReorderFunctions.  The cold part will be emitted with the
117       // rest of the cold functions and code.
118       const size_t Size = functionSize(Function);
119       // NOTE: for functions without a profile, we set the number of samples
120       // to zero.  This will keep these functions from appearing in the hot
121       // section.  This is a little weird because we wouldn't be trying to
122       // create a node for a function unless it was the target of a call from
123       // a hot block.  The alternative would be to set the count to one or
124       // accumulate the number of calls from the callsite into the function
125       // samples.  Results from perfomance testing seem to favor the zero
126       // count though, so I'm leaving it this way for now.
127       return Cg.addNode(Function, Size, Function->getKnownExecutionCount());
128     }
129     return Id;
130   };
131 
132   // Add call graph edges.
133   uint64_t NotProcessed = 0;
134   uint64_t TotalCallsites = 0;
135   uint64_t NoProfileCallsites = 0;
136   uint64_t NumFallbacks = 0;
137   uint64_t RecursiveCallsites = 0;
138   for (auto &It : BC.getBinaryFunctions()) {
139     BinaryFunction *Function = &It.second;
140 
141     if (Filter(*Function))
142       continue;
143 
144     const CallGraph::NodeId SrcId = lookupNode(Function);
145     // Offset of the current basic block from the beginning of the function
146     uint64_t Offset = 0;
147 
148     auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) {
149       BinaryFunction *DstFunc =
150           DestSymbol ? BC.getFunctionForSymbol(DestSymbol) : nullptr;
151       if (!DstFunc) {
152         LLVM_DEBUG(if (opts::Verbosity > 1) dbgs()
153                    << "BOLT-DEBUG: buildCallGraph: no function for symbol\n");
154         return false;
155       }
156 
157       if (DstFunc == Function) {
158         LLVM_DEBUG(dbgs() << "BOLT-INFO: recursive call detected in "
159                           << *DstFunc << "\n");
160         ++RecursiveCallsites;
161         if (IgnoreRecursiveCalls)
162           return false;
163       }
164       if (Filter(*DstFunc)) {
165         LLVM_DEBUG(if (opts::Verbosity > 1) dbgs()
166                    << "BOLT-DEBUG: buildCallGraph: filtered " << *DstFunc
167                    << '\n');
168         return false;
169       }
170 
171       const CallGraph::NodeId DstId = lookupNode(DstFunc);
172       const bool IsValidCount = Count != COUNT_NO_PROFILE;
173       const uint64_t AdjCount = UseEdgeCounts && IsValidCount ? Count : 1;
174       if (!IsValidCount)
175         ++NoProfileCallsites;
176       Cg.incArcWeight(SrcId, DstId, AdjCount, Offset);
177       LLVM_DEBUG(if (opts::Verbosity > 1) {
178         dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function << " -> "
179                << *DstFunc << " @ " << Offset << "\n";
180       });
181       return true;
182     };
183 
184     // Pairs of (symbol, count) for each target at this callsite.
185     using TargetDesc = std::pair<const MCSymbol *, uint64_t>;
186     using CallInfoTy = std::vector<TargetDesc>;
187 
188     // Get pairs of (symbol, count) for each target at this callsite.
189     // If the call is to an unknown function the symbol will be nullptr.
190     // If there is no profiling data the count will be COUNT_NO_PROFILE.
191     auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) {
192       CallInfoTy Counts;
193       const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst);
194 
195       // If this is an indirect call use perf data directly.
196       if (!DstSym && BC.MIB->hasAnnotation(Inst, "CallProfile")) {
197         const auto &ICSP = BC.MIB->getAnnotationAs<IndirectCallSiteProfile>(
198             Inst, "CallProfile");
199         for (const IndirectCallProfile &CSI : ICSP)
200           if (CSI.Symbol)
201             Counts.emplace_back(CSI.Symbol, CSI.Count);
202       } else {
203         const uint64_t Count = BB->getExecutionCount();
204         Counts.emplace_back(DstSym, Count);
205       }
206 
207       return Counts;
208     };
209 
210     // If the function has an invalid profile, try to use the perf data
211     // directly (if requested).  If there is no perf data for this function,
212     // fall back to the CFG walker which attempts to handle missing data.
213     if (!Function->hasValidProfile() && CgFromPerfData &&
214         !Function->getAllCallSites().empty()) {
215       LLVM_DEBUG(
216           dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data"
217                  << " for " << *Function << "\n");
218       ++NumFallbacks;
219       const size_t Size = functionSize(Function);
220       for (const IndirectCallProfile &CSI : Function->getAllCallSites()) {
221         ++TotalCallsites;
222 
223         if (!CSI.Symbol)
224           continue;
225 
226         // The computed offset may exceed the hot part of the function; hence,
227         // bound it by the size.
228         Offset = CSI.Offset;
229         if (Offset > Size)
230           Offset = Size;
231 
232         if (!recordCall(CSI.Symbol, CSI.Count))
233           ++NotProcessed;
234       }
235     } else {
236       for (BinaryBasicBlock *BB : Function->getLayout().blocks()) {
237         // Don't count calls from split blocks unless requested.
238         if (BB->isSplit() && !IncludeSplitCalls)
239           continue;
240 
241         // Determine whether the block is included in Function's (hot) size
242         // See BinaryFunction::estimateHotSize
243         bool BBIncludedInFunctionSize = false;
244         if (UseFunctionHotSize && Function->isSplit()) {
245           if (UseSplitHotSize)
246             BBIncludedInFunctionSize = !BB->isSplit();
247           else
248             BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0;
249         } else {
250           BBIncludedInFunctionSize = true;
251         }
252 
253         for (MCInst &Inst : *BB) {
254           // Find call instructions and extract target symbols from each one.
255           if (BC.MIB->isCall(Inst)) {
256             const CallInfoTy CallInfo = getCallInfo(BB, Inst);
257 
258             if (!CallInfo.empty()) {
259               for (const TargetDesc &CI : CallInfo) {
260                 ++TotalCallsites;
261                 if (!recordCall(CI.first, CI.second))
262                   ++NotProcessed;
263               }
264             } else {
265               ++TotalCallsites;
266               ++NotProcessed;
267             }
268           }
269           // Increase Offset if needed
270           if (BBIncludedInFunctionSize)
271             Offset += BC.computeCodeSize(&Inst, &Inst + 1);
272         }
273       }
274     }
275   }
276 
277 #ifndef NDEBUG
278   bool PrintInfo = DebugFlag && isCurrentDebugType("callgraph");
279 #else
280   bool PrintInfo = false;
281 #endif
282   if (PrintInfo || opts::Verbosity > 0)
283     BC.outs() << format("BOLT-INFO: buildCallGraph: %u nodes, %u callsites "
284                         "(%u recursive), density = %.6lf, %u callsites not "
285                         "processed, %u callsites with invalid profile, "
286                         "used perf data for %u stale functions.\n",
287                         Cg.numNodes(), TotalCallsites, RecursiveCallsites,
288                         Cg.density(), NotProcessed, NoProfileCallsites,
289                         NumFallbacks);
290 
291   if (opts::DumpCGDot.getNumOccurrences()) {
292     Cg.printDot(opts::DumpCGDot, [&](CallGraph::NodeId Id) {
293       return Cg.nodeIdToFunc(Id)->getPrintName();
294     });
295   }
296 
297   return Cg;
298 }
299 
300 } // namespace bolt
301 } // namespace llvm
302