1*2430a354Sshaw young //===- bolt/Core/BinaryFunctionCallGraph.cpp ----------------------------===//
2*2430a354Sshaw young //
3*2430a354Sshaw young // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*2430a354Sshaw young // See https://llvm.org/LICENSE.txt for license information.
5*2430a354Sshaw young // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*2430a354Sshaw young //
7*2430a354Sshaw young //===----------------------------------------------------------------------===//
8*2430a354Sshaw young //
9*2430a354Sshaw young // This file implements the BinaryFunctionCallGraph class.
10*2430a354Sshaw young //
11*2430a354Sshaw young //===----------------------------------------------------------------------===//
12*2430a354Sshaw young
13*2430a354Sshaw young #include "bolt/Core/BinaryFunctionCallGraph.h"
14*2430a354Sshaw young #include "bolt/Core/BinaryContext.h"
15*2430a354Sshaw young #include "bolt/Core/BinaryFunction.h"
16*2430a354Sshaw young #include "llvm/Support/CommandLine.h"
17*2430a354Sshaw young #include "llvm/Support/Timer.h"
18*2430a354Sshaw young #include <stack>
19*2430a354Sshaw young
20*2430a354Sshaw young #define DEBUG_TYPE "callgraph"
21*2430a354Sshaw young
22*2430a354Sshaw young using namespace llvm;
23*2430a354Sshaw young
24*2430a354Sshaw young namespace opts {
25*2430a354Sshaw young
26*2430a354Sshaw young extern cl::opt<bool> TimeOpts;
27*2430a354Sshaw young extern cl::opt<unsigned> Verbosity;
28*2430a354Sshaw young extern cl::OptionCategory BoltCategory;
29*2430a354Sshaw young
30*2430a354Sshaw young static cl::opt<std::string>
31*2430a354Sshaw young DumpCGDot("dump-cg", cl::desc("dump callgraph to the given file"),
32*2430a354Sshaw young cl::cat(BoltCategory));
33*2430a354Sshaw young
34*2430a354Sshaw young } // namespace opts
35*2430a354Sshaw young
36*2430a354Sshaw young namespace llvm {
37*2430a354Sshaw young namespace bolt {
38*2430a354Sshaw young
addNode(BinaryFunction * BF,uint32_t Size,uint64_t Samples)39*2430a354Sshaw young CallGraph::NodeId BinaryFunctionCallGraph::addNode(BinaryFunction *BF,
40*2430a354Sshaw young uint32_t Size,
41*2430a354Sshaw young uint64_t Samples) {
42*2430a354Sshaw young NodeId Id = CallGraph::addNode(Size, Samples);
43*2430a354Sshaw young assert(size_t(Id) == Funcs.size());
44*2430a354Sshaw young Funcs.push_back(BF);
45*2430a354Sshaw young FuncToNodeId[BF] = Id;
46*2430a354Sshaw young assert(Funcs[Id] == BF);
47*2430a354Sshaw young return Id;
48*2430a354Sshaw young }
49*2430a354Sshaw young
buildTraversalOrder()50*2430a354Sshaw young std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() {
51*2430a354Sshaw young NamedRegionTimer T1("buildcgorder", "Build cg traversal order",
52*2430a354Sshaw young "CG breakdown", "CG breakdown", opts::TimeOpts);
53*2430a354Sshaw young std::deque<BinaryFunction *> TopologicalOrder;
54*2430a354Sshaw young enum NodeStatus { NEW, VISITING, VISITED };
55*2430a354Sshaw young std::vector<NodeStatus> NodeStatus(Funcs.size());
56*2430a354Sshaw young std::stack<NodeId> Worklist;
57*2430a354Sshaw young
58*2430a354Sshaw young for (BinaryFunction *Func : Funcs) {
59*2430a354Sshaw young auto It = FuncToNodeId.find(Func);
60*2430a354Sshaw young assert(It != FuncToNodeId.end());
61*2430a354Sshaw young const NodeId Id = It->second;
62*2430a354Sshaw young Worklist.push(Id);
63*2430a354Sshaw young NodeStatus[Id] = NEW;
64*2430a354Sshaw young }
65*2430a354Sshaw young
66*2430a354Sshaw young while (!Worklist.empty()) {
67*2430a354Sshaw young const NodeId FuncId = Worklist.top();
68*2430a354Sshaw young Worklist.pop();
69*2430a354Sshaw young
70*2430a354Sshaw young if (NodeStatus[FuncId] == VISITED)
71*2430a354Sshaw young continue;
72*2430a354Sshaw young
73*2430a354Sshaw young if (NodeStatus[FuncId] == VISITING) {
74*2430a354Sshaw young TopologicalOrder.push_back(Funcs[FuncId]);
75*2430a354Sshaw young NodeStatus[FuncId] = VISITED;
76*2430a354Sshaw young continue;
77*2430a354Sshaw young }
78*2430a354Sshaw young
79*2430a354Sshaw young assert(NodeStatus[FuncId] == NEW);
80*2430a354Sshaw young NodeStatus[FuncId] = VISITING;
81*2430a354Sshaw young Worklist.push(FuncId);
82*2430a354Sshaw young for (const NodeId Callee : successors(FuncId)) {
83*2430a354Sshaw young if (NodeStatus[Callee] == VISITING || NodeStatus[Callee] == VISITED)
84*2430a354Sshaw young continue;
85*2430a354Sshaw young Worklist.push(Callee);
86*2430a354Sshaw young }
87*2430a354Sshaw young }
88*2430a354Sshaw young
89*2430a354Sshaw young return TopologicalOrder;
90*2430a354Sshaw young }
91*2430a354Sshaw young
92*2430a354Sshaw young BinaryFunctionCallGraph
buildCallGraph(BinaryContext & BC,CgFilterFunction Filter,bool CgFromPerfData,bool IncludeSplitCalls,bool UseFunctionHotSize,bool UseSplitHotSize,bool UseEdgeCounts,bool IgnoreRecursiveCalls)93*2430a354Sshaw young buildCallGraph(BinaryContext &BC, CgFilterFunction Filter, bool CgFromPerfData,
94*2430a354Sshaw young bool IncludeSplitCalls, bool UseFunctionHotSize,
95*2430a354Sshaw young bool UseSplitHotSize, bool UseEdgeCounts,
96*2430a354Sshaw young bool IgnoreRecursiveCalls) {
97*2430a354Sshaw young NamedRegionTimer T1("buildcg", "Callgraph construction", "CG breakdown",
98*2430a354Sshaw young "CG breakdown", opts::TimeOpts);
99*2430a354Sshaw young BinaryFunctionCallGraph Cg;
100*2430a354Sshaw young static constexpr uint64_t COUNT_NO_PROFILE =
101*2430a354Sshaw young BinaryBasicBlock::COUNT_NO_PROFILE;
102*2430a354Sshaw young
103*2430a354Sshaw young // Compute function size
104*2430a354Sshaw young auto functionSize = [&](const BinaryFunction *Function) {
105*2430a354Sshaw young return UseFunctionHotSize && Function->isSplit()
106*2430a354Sshaw young ? Function->estimateHotSize(UseSplitHotSize)
107*2430a354Sshaw young : Function->estimateSize();
108*2430a354Sshaw young };
109*2430a354Sshaw young
110*2430a354Sshaw young // Add call graph nodes.
111*2430a354Sshaw young auto lookupNode = [&](BinaryFunction *Function) {
112*2430a354Sshaw young const CallGraph::NodeId Id = Cg.maybeGetNodeId(Function);
113*2430a354Sshaw young if (Id == CallGraph::InvalidId) {
114*2430a354Sshaw young // It's ok to use the hot size here when the function is split. This is
115*2430a354Sshaw young // because emitFunctions will emit the hot part first in the order that is
116*2430a354Sshaw young // computed by ReorderFunctions. The cold part will be emitted with the
117*2430a354Sshaw young // rest of the cold functions and code.
118*2430a354Sshaw young const size_t Size = functionSize(Function);
119*2430a354Sshaw young // NOTE: for functions without a profile, we set the number of samples
120*2430a354Sshaw young // to zero. This will keep these functions from appearing in the hot
121*2430a354Sshaw young // section. This is a little weird because we wouldn't be trying to
122*2430a354Sshaw young // create a node for a function unless it was the target of a call from
123*2430a354Sshaw young // a hot block. The alternative would be to set the count to one or
124*2430a354Sshaw young // accumulate the number of calls from the callsite into the function
125*2430a354Sshaw young // samples. Results from perfomance testing seem to favor the zero
126*2430a354Sshaw young // count though, so I'm leaving it this way for now.
127*2430a354Sshaw young return Cg.addNode(Function, Size, Function->getKnownExecutionCount());
128*2430a354Sshaw young }
129*2430a354Sshaw young return Id;
130*2430a354Sshaw young };
131*2430a354Sshaw young
132*2430a354Sshaw young // Add call graph edges.
133*2430a354Sshaw young uint64_t NotProcessed = 0;
134*2430a354Sshaw young uint64_t TotalCallsites = 0;
135*2430a354Sshaw young uint64_t NoProfileCallsites = 0;
136*2430a354Sshaw young uint64_t NumFallbacks = 0;
137*2430a354Sshaw young uint64_t RecursiveCallsites = 0;
138*2430a354Sshaw young for (auto &It : BC.getBinaryFunctions()) {
139*2430a354Sshaw young BinaryFunction *Function = &It.second;
140*2430a354Sshaw young
141*2430a354Sshaw young if (Filter(*Function))
142*2430a354Sshaw young continue;
143*2430a354Sshaw young
144*2430a354Sshaw young const CallGraph::NodeId SrcId = lookupNode(Function);
145*2430a354Sshaw young // Offset of the current basic block from the beginning of the function
146*2430a354Sshaw young uint64_t Offset = 0;
147*2430a354Sshaw young
148*2430a354Sshaw young auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) {
149*2430a354Sshaw young BinaryFunction *DstFunc =
150*2430a354Sshaw young DestSymbol ? BC.getFunctionForSymbol(DestSymbol) : nullptr;
151*2430a354Sshaw young if (!DstFunc) {
152*2430a354Sshaw young LLVM_DEBUG(if (opts::Verbosity > 1) dbgs()
153*2430a354Sshaw young << "BOLT-DEBUG: buildCallGraph: no function for symbol\n");
154*2430a354Sshaw young return false;
155*2430a354Sshaw young }
156*2430a354Sshaw young
157*2430a354Sshaw young if (DstFunc == Function) {
158*2430a354Sshaw young LLVM_DEBUG(dbgs() << "BOLT-INFO: recursive call detected in "
159*2430a354Sshaw young << *DstFunc << "\n");
160*2430a354Sshaw young ++RecursiveCallsites;
161*2430a354Sshaw young if (IgnoreRecursiveCalls)
162*2430a354Sshaw young return false;
163*2430a354Sshaw young }
164*2430a354Sshaw young if (Filter(*DstFunc)) {
165*2430a354Sshaw young LLVM_DEBUG(if (opts::Verbosity > 1) dbgs()
166*2430a354Sshaw young << "BOLT-DEBUG: buildCallGraph: filtered " << *DstFunc
167*2430a354Sshaw young << '\n');
168*2430a354Sshaw young return false;
169*2430a354Sshaw young }
170*2430a354Sshaw young
171*2430a354Sshaw young const CallGraph::NodeId DstId = lookupNode(DstFunc);
172*2430a354Sshaw young const bool IsValidCount = Count != COUNT_NO_PROFILE;
173*2430a354Sshaw young const uint64_t AdjCount = UseEdgeCounts && IsValidCount ? Count : 1;
174*2430a354Sshaw young if (!IsValidCount)
175*2430a354Sshaw young ++NoProfileCallsites;
176*2430a354Sshaw young Cg.incArcWeight(SrcId, DstId, AdjCount, Offset);
177*2430a354Sshaw young LLVM_DEBUG(if (opts::Verbosity > 1) {
178*2430a354Sshaw young dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function << " -> "
179*2430a354Sshaw young << *DstFunc << " @ " << Offset << "\n";
180*2430a354Sshaw young });
181*2430a354Sshaw young return true;
182*2430a354Sshaw young };
183*2430a354Sshaw young
184*2430a354Sshaw young // Pairs of (symbol, count) for each target at this callsite.
185*2430a354Sshaw young using TargetDesc = std::pair<const MCSymbol *, uint64_t>;
186*2430a354Sshaw young using CallInfoTy = std::vector<TargetDesc>;
187*2430a354Sshaw young
188*2430a354Sshaw young // Get pairs of (symbol, count) for each target at this callsite.
189*2430a354Sshaw young // If the call is to an unknown function the symbol will be nullptr.
190*2430a354Sshaw young // If there is no profiling data the count will be COUNT_NO_PROFILE.
191*2430a354Sshaw young auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) {
192*2430a354Sshaw young CallInfoTy Counts;
193*2430a354Sshaw young const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst);
194*2430a354Sshaw young
195*2430a354Sshaw young // If this is an indirect call use perf data directly.
196*2430a354Sshaw young if (!DstSym && BC.MIB->hasAnnotation(Inst, "CallProfile")) {
197*2430a354Sshaw young const auto &ICSP = BC.MIB->getAnnotationAs<IndirectCallSiteProfile>(
198*2430a354Sshaw young Inst, "CallProfile");
199*2430a354Sshaw young for (const IndirectCallProfile &CSI : ICSP)
200*2430a354Sshaw young if (CSI.Symbol)
201*2430a354Sshaw young Counts.emplace_back(CSI.Symbol, CSI.Count);
202*2430a354Sshaw young } else {
203*2430a354Sshaw young const uint64_t Count = BB->getExecutionCount();
204*2430a354Sshaw young Counts.emplace_back(DstSym, Count);
205*2430a354Sshaw young }
206*2430a354Sshaw young
207*2430a354Sshaw young return Counts;
208*2430a354Sshaw young };
209*2430a354Sshaw young
210*2430a354Sshaw young // If the function has an invalid profile, try to use the perf data
211*2430a354Sshaw young // directly (if requested). If there is no perf data for this function,
212*2430a354Sshaw young // fall back to the CFG walker which attempts to handle missing data.
213*2430a354Sshaw young if (!Function->hasValidProfile() && CgFromPerfData &&
214*2430a354Sshaw young !Function->getAllCallSites().empty()) {
215*2430a354Sshaw young LLVM_DEBUG(
216*2430a354Sshaw young dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data"
217*2430a354Sshaw young << " for " << *Function << "\n");
218*2430a354Sshaw young ++NumFallbacks;
219*2430a354Sshaw young const size_t Size = functionSize(Function);
220*2430a354Sshaw young for (const IndirectCallProfile &CSI : Function->getAllCallSites()) {
221*2430a354Sshaw young ++TotalCallsites;
222*2430a354Sshaw young
223*2430a354Sshaw young if (!CSI.Symbol)
224*2430a354Sshaw young continue;
225*2430a354Sshaw young
226*2430a354Sshaw young // The computed offset may exceed the hot part of the function; hence,
227*2430a354Sshaw young // bound it by the size.
228*2430a354Sshaw young Offset = CSI.Offset;
229*2430a354Sshaw young if (Offset > Size)
230*2430a354Sshaw young Offset = Size;
231*2430a354Sshaw young
232*2430a354Sshaw young if (!recordCall(CSI.Symbol, CSI.Count))
233*2430a354Sshaw young ++NotProcessed;
234*2430a354Sshaw young }
235*2430a354Sshaw young } else {
236*2430a354Sshaw young for (BinaryBasicBlock *BB : Function->getLayout().blocks()) {
237*2430a354Sshaw young // Don't count calls from split blocks unless requested.
238*2430a354Sshaw young if (BB->isSplit() && !IncludeSplitCalls)
239*2430a354Sshaw young continue;
240*2430a354Sshaw young
241*2430a354Sshaw young // Determine whether the block is included in Function's (hot) size
242*2430a354Sshaw young // See BinaryFunction::estimateHotSize
243*2430a354Sshaw young bool BBIncludedInFunctionSize = false;
244*2430a354Sshaw young if (UseFunctionHotSize && Function->isSplit()) {
245*2430a354Sshaw young if (UseSplitHotSize)
246*2430a354Sshaw young BBIncludedInFunctionSize = !BB->isSplit();
247*2430a354Sshaw young else
248*2430a354Sshaw young BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0;
249*2430a354Sshaw young } else {
250*2430a354Sshaw young BBIncludedInFunctionSize = true;
251*2430a354Sshaw young }
252*2430a354Sshaw young
253*2430a354Sshaw young for (MCInst &Inst : *BB) {
254*2430a354Sshaw young // Find call instructions and extract target symbols from each one.
255*2430a354Sshaw young if (BC.MIB->isCall(Inst)) {
256*2430a354Sshaw young const CallInfoTy CallInfo = getCallInfo(BB, Inst);
257*2430a354Sshaw young
258*2430a354Sshaw young if (!CallInfo.empty()) {
259*2430a354Sshaw young for (const TargetDesc &CI : CallInfo) {
260*2430a354Sshaw young ++TotalCallsites;
261*2430a354Sshaw young if (!recordCall(CI.first, CI.second))
262*2430a354Sshaw young ++NotProcessed;
263*2430a354Sshaw young }
264*2430a354Sshaw young } else {
265*2430a354Sshaw young ++TotalCallsites;
266*2430a354Sshaw young ++NotProcessed;
267*2430a354Sshaw young }
268*2430a354Sshaw young }
269*2430a354Sshaw young // Increase Offset if needed
270*2430a354Sshaw young if (BBIncludedInFunctionSize)
271*2430a354Sshaw young Offset += BC.computeCodeSize(&Inst, &Inst + 1);
272*2430a354Sshaw young }
273*2430a354Sshaw young }
274*2430a354Sshaw young }
275*2430a354Sshaw young }
276*2430a354Sshaw young
277*2430a354Sshaw young #ifndef NDEBUG
278*2430a354Sshaw young bool PrintInfo = DebugFlag && isCurrentDebugType("callgraph");
279*2430a354Sshaw young #else
280*2430a354Sshaw young bool PrintInfo = false;
281*2430a354Sshaw young #endif
282*2430a354Sshaw young if (PrintInfo || opts::Verbosity > 0)
283*2430a354Sshaw young BC.outs() << format("BOLT-INFO: buildCallGraph: %u nodes, %u callsites "
284*2430a354Sshaw young "(%u recursive), density = %.6lf, %u callsites not "
285*2430a354Sshaw young "processed, %u callsites with invalid profile, "
286*2430a354Sshaw young "used perf data for %u stale functions.\n",
287*2430a354Sshaw young Cg.numNodes(), TotalCallsites, RecursiveCallsites,
288*2430a354Sshaw young Cg.density(), NotProcessed, NoProfileCallsites,
289*2430a354Sshaw young NumFallbacks);
290*2430a354Sshaw young
291*2430a354Sshaw young if (opts::DumpCGDot.getNumOccurrences()) {
292*2430a354Sshaw young Cg.printDot(opts::DumpCGDot, [&](CallGraph::NodeId Id) {
293*2430a354Sshaw young return Cg.nodeIdToFunc(Id)->getPrintName();
294*2430a354Sshaw young });
295*2430a354Sshaw young }
296*2430a354Sshaw young
297*2430a354Sshaw young return Cg;
298*2430a354Sshaw young }
299*2430a354Sshaw young
300*2430a354Sshaw young } // namespace bolt
301*2430a354Sshaw young } // namespace llvm
302