xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1*06c3fb27SDimitry Andric //==-- MemProfContextDisambiguation.cpp - Disambiguate contexts -------------=//
2*06c3fb27SDimitry Andric //
3*06c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*06c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*06c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*06c3fb27SDimitry Andric //
7*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
8*06c3fb27SDimitry Andric //
9*06c3fb27SDimitry Andric // This file implements support for context disambiguation of allocation
10*06c3fb27SDimitry Andric // calls for profile guided heap optimization. Specifically, it uses Memprof
11*06c3fb27SDimitry Andric // profiles which indicate context specific allocation behavior (currently
12*06c3fb27SDimitry Andric // distinguishing cold vs hot memory allocations). Cloning is performed to
13*06c3fb27SDimitry Andric // expose the cold allocation call contexts, and the allocation calls are
14*06c3fb27SDimitry Andric // subsequently annotated with an attribute for later transformation.
15*06c3fb27SDimitry Andric //
16*06c3fb27SDimitry Andric // The transformations can be performed either directly on IR (regular LTO), or
17*06c3fb27SDimitry Andric // on a ThinLTO index (and later applied to the IR during the ThinLTO backend).
18*06c3fb27SDimitry Andric // Both types of LTO operate on a the same base graph representation, which
19*06c3fb27SDimitry Andric // uses CRTP to support either IR or Index formats.
20*06c3fb27SDimitry Andric //
21*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
22*06c3fb27SDimitry Andric 
23*06c3fb27SDimitry Andric #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
24*06c3fb27SDimitry Andric #include "llvm/ADT/DenseMap.h"
25*06c3fb27SDimitry Andric #include "llvm/ADT/DenseSet.h"
26*06c3fb27SDimitry Andric #include "llvm/ADT/MapVector.h"
27*06c3fb27SDimitry Andric #include "llvm/ADT/SetOperations.h"
28*06c3fb27SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
29*06c3fb27SDimitry Andric #include "llvm/ADT/SmallSet.h"
30*06c3fb27SDimitry Andric #include "llvm/ADT/SmallVector.h"
31*06c3fb27SDimitry Andric #include "llvm/ADT/Statistic.h"
32*06c3fb27SDimitry Andric #include "llvm/Analysis/MemoryProfileInfo.h"
33*06c3fb27SDimitry Andric #include "llvm/Analysis/ModuleSummaryAnalysis.h"
34*06c3fb27SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
35*06c3fb27SDimitry Andric #include "llvm/Bitcode/BitcodeReader.h"
36*06c3fb27SDimitry Andric #include "llvm/IR/Constants.h"
37*06c3fb27SDimitry Andric #include "llvm/IR/Instructions.h"
38*06c3fb27SDimitry Andric #include "llvm/IR/Module.h"
39*06c3fb27SDimitry Andric #include "llvm/IR/ModuleSummaryIndex.h"
40*06c3fb27SDimitry Andric #include "llvm/Pass.h"
41*06c3fb27SDimitry Andric #include "llvm/Support/CommandLine.h"
42*06c3fb27SDimitry Andric #include "llvm/Support/FileSystem.h"
43*06c3fb27SDimitry Andric #include "llvm/Support/GraphWriter.h"
44*06c3fb27SDimitry Andric #include "llvm/Support/raw_ostream.h"
45*06c3fb27SDimitry Andric #include "llvm/Transforms/IPO.h"
46*06c3fb27SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h"
47*06c3fb27SDimitry Andric #include <sstream>
48*06c3fb27SDimitry Andric #include <vector>
49*06c3fb27SDimitry Andric using namespace llvm;
50*06c3fb27SDimitry Andric using namespace llvm::memprof;
51*06c3fb27SDimitry Andric 
52*06c3fb27SDimitry Andric #define DEBUG_TYPE "memprof-context-disambiguation"
53*06c3fb27SDimitry Andric 
54*06c3fb27SDimitry Andric STATISTIC(FunctionClonesAnalysis,
55*06c3fb27SDimitry Andric           "Number of function clones created during whole program analysis");
56*06c3fb27SDimitry Andric STATISTIC(FunctionClonesThinBackend,
57*06c3fb27SDimitry Andric           "Number of function clones created during ThinLTO backend");
58*06c3fb27SDimitry Andric STATISTIC(FunctionsClonedThinBackend,
59*06c3fb27SDimitry Andric           "Number of functions that had clones created during ThinLTO backend");
60*06c3fb27SDimitry Andric STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly "
61*06c3fb27SDimitry Andric                             "cloned) during whole program analysis");
62*06c3fb27SDimitry Andric STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) "
63*06c3fb27SDimitry Andric                          "during whole program analysis");
64*06c3fb27SDimitry Andric STATISTIC(AllocTypeNotColdThinBackend,
65*06c3fb27SDimitry Andric           "Number of not cold static allocations (possibly cloned) during "
66*06c3fb27SDimitry Andric           "ThinLTO backend");
67*06c3fb27SDimitry Andric STATISTIC(AllocTypeColdThinBackend, "Number of cold static allocations "
68*06c3fb27SDimitry Andric                                     "(possibly cloned) during ThinLTO backend");
69*06c3fb27SDimitry Andric STATISTIC(OrigAllocsThinBackend,
70*06c3fb27SDimitry Andric           "Number of original (not cloned) allocations with memprof profiles "
71*06c3fb27SDimitry Andric           "during ThinLTO backend");
72*06c3fb27SDimitry Andric STATISTIC(
73*06c3fb27SDimitry Andric     AllocVersionsThinBackend,
74*06c3fb27SDimitry Andric     "Number of allocation versions (including clones) during ThinLTO backend");
75*06c3fb27SDimitry Andric STATISTIC(MaxAllocVersionsThinBackend,
76*06c3fb27SDimitry Andric           "Maximum number of allocation versions created for an original "
77*06c3fb27SDimitry Andric           "allocation during ThinLTO backend");
78*06c3fb27SDimitry Andric STATISTIC(UnclonableAllocsThinBackend,
79*06c3fb27SDimitry Andric           "Number of unclonable ambigous allocations during ThinLTO backend");
80*06c3fb27SDimitry Andric 
81*06c3fb27SDimitry Andric static cl::opt<std::string> DotFilePathPrefix(
82*06c3fb27SDimitry Andric     "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden,
83*06c3fb27SDimitry Andric     cl::value_desc("filename"),
84*06c3fb27SDimitry Andric     cl::desc("Specify the path prefix of the MemProf dot files."));
85*06c3fb27SDimitry Andric 
86*06c3fb27SDimitry Andric static cl::opt<bool> ExportToDot("memprof-export-to-dot", cl::init(false),
87*06c3fb27SDimitry Andric                                  cl::Hidden,
88*06c3fb27SDimitry Andric                                  cl::desc("Export graph to dot files."));
89*06c3fb27SDimitry Andric 
90*06c3fb27SDimitry Andric static cl::opt<bool>
91*06c3fb27SDimitry Andric     DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden,
92*06c3fb27SDimitry Andric             cl::desc("Dump CallingContextGraph to stdout after each stage."));
93*06c3fb27SDimitry Andric 
94*06c3fb27SDimitry Andric static cl::opt<bool>
95*06c3fb27SDimitry Andric     VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden,
96*06c3fb27SDimitry Andric               cl::desc("Perform verification checks on CallingContextGraph."));
97*06c3fb27SDimitry Andric 
98*06c3fb27SDimitry Andric static cl::opt<bool>
99*06c3fb27SDimitry Andric     VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden,
100*06c3fb27SDimitry Andric                 cl::desc("Perform frequent verification checks on nodes."));
101*06c3fb27SDimitry Andric 
102*06c3fb27SDimitry Andric static cl::opt<std::string> MemProfImportSummary(
103*06c3fb27SDimitry Andric     "memprof-import-summary",
104*06c3fb27SDimitry Andric     cl::desc("Import summary to use for testing the ThinLTO backend via opt"),
105*06c3fb27SDimitry Andric     cl::Hidden);
106*06c3fb27SDimitry Andric 
107*06c3fb27SDimitry Andric // Indicate we are linking with an allocator that supports hot/cold operator
108*06c3fb27SDimitry Andric // new interfaces.
109*06c3fb27SDimitry Andric cl::opt<bool> SupportsHotColdNew(
110*06c3fb27SDimitry Andric     "supports-hot-cold-new", cl::init(false), cl::Hidden,
111*06c3fb27SDimitry Andric     cl::desc("Linking with hot/cold operator new interfaces"));
112*06c3fb27SDimitry Andric 
113*06c3fb27SDimitry Andric namespace {
114*06c3fb27SDimitry Andric /// CRTP base for graphs built from either IR or ThinLTO summary index.
115*06c3fb27SDimitry Andric ///
116*06c3fb27SDimitry Andric /// The graph represents the call contexts in all memprof metadata on allocation
117*06c3fb27SDimitry Andric /// calls, with nodes for the allocations themselves, as well as for the calls
118*06c3fb27SDimitry Andric /// in each context. The graph is initially built from the allocation memprof
119*06c3fb27SDimitry Andric /// metadata (or summary) MIBs. It is then updated to match calls with callsite
120*06c3fb27SDimitry Andric /// metadata onto the nodes, updating it to reflect any inlining performed on
121*06c3fb27SDimitry Andric /// those calls.
122*06c3fb27SDimitry Andric ///
123*06c3fb27SDimitry Andric /// Each MIB (representing an allocation's call context with allocation
124*06c3fb27SDimitry Andric /// behavior) is assigned a unique context id during the graph build. The edges
125*06c3fb27SDimitry Andric /// and nodes in the graph are decorated with the context ids they carry. This
126*06c3fb27SDimitry Andric /// is used to correctly update the graph when cloning is performed so that we
127*06c3fb27SDimitry Andric /// can uniquify the context for a single (possibly cloned) allocation.
128*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
129*06c3fb27SDimitry Andric class CallsiteContextGraph {
130*06c3fb27SDimitry Andric public:
131*06c3fb27SDimitry Andric   CallsiteContextGraph() = default;
132*06c3fb27SDimitry Andric   CallsiteContextGraph(const CallsiteContextGraph &) = default;
133*06c3fb27SDimitry Andric   CallsiteContextGraph(CallsiteContextGraph &&) = default;
134*06c3fb27SDimitry Andric 
135*06c3fb27SDimitry Andric   /// Main entry point to perform analysis and transformations on graph.
136*06c3fb27SDimitry Andric   bool process();
137*06c3fb27SDimitry Andric 
138*06c3fb27SDimitry Andric   /// Perform cloning on the graph necessary to uniquely identify the allocation
139*06c3fb27SDimitry Andric   /// behavior of an allocation based on its context.
140*06c3fb27SDimitry Andric   void identifyClones();
141*06c3fb27SDimitry Andric 
142*06c3fb27SDimitry Andric   /// Assign callsite clones to functions, cloning functions as needed to
143*06c3fb27SDimitry Andric   /// accommodate the combinations of their callsite clones reached by callers.
144*06c3fb27SDimitry Andric   /// For regular LTO this clones functions and callsites in the IR, but for
145*06c3fb27SDimitry Andric   /// ThinLTO the cloning decisions are noted in the summaries and later applied
146*06c3fb27SDimitry Andric   /// in applyImport.
147*06c3fb27SDimitry Andric   bool assignFunctions();
148*06c3fb27SDimitry Andric 
149*06c3fb27SDimitry Andric   void dump() const;
150*06c3fb27SDimitry Andric   void print(raw_ostream &OS) const;
151*06c3fb27SDimitry Andric 
152*06c3fb27SDimitry Andric   friend raw_ostream &operator<<(raw_ostream &OS,
153*06c3fb27SDimitry Andric                                  const CallsiteContextGraph &CCG) {
154*06c3fb27SDimitry Andric     CCG.print(OS);
155*06c3fb27SDimitry Andric     return OS;
156*06c3fb27SDimitry Andric   }
157*06c3fb27SDimitry Andric 
158*06c3fb27SDimitry Andric   friend struct GraphTraits<
159*06c3fb27SDimitry Andric       const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
160*06c3fb27SDimitry Andric   friend struct DOTGraphTraits<
161*06c3fb27SDimitry Andric       const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
162*06c3fb27SDimitry Andric 
163*06c3fb27SDimitry Andric   void exportToDot(std::string Label) const;
164*06c3fb27SDimitry Andric 
165*06c3fb27SDimitry Andric   /// Represents a function clone via FuncTy pointer and clone number pair.
166*06c3fb27SDimitry Andric   struct FuncInfo final
167*06c3fb27SDimitry Andric       : public std::pair<FuncTy *, unsigned /*Clone number*/> {
168*06c3fb27SDimitry Andric     using Base = std::pair<FuncTy *, unsigned>;
169*06c3fb27SDimitry Andric     FuncInfo(const Base &B) : Base(B) {}
170*06c3fb27SDimitry Andric     FuncInfo(FuncTy *F = nullptr, unsigned CloneNo = 0) : Base(F, CloneNo) {}
171*06c3fb27SDimitry Andric     explicit operator bool() const { return this->first != nullptr; }
172*06c3fb27SDimitry Andric     FuncTy *func() const { return this->first; }
173*06c3fb27SDimitry Andric     unsigned cloneNo() const { return this->second; }
174*06c3fb27SDimitry Andric   };
175*06c3fb27SDimitry Andric 
176*06c3fb27SDimitry Andric   /// Represents a callsite clone via CallTy and clone number pair.
177*06c3fb27SDimitry Andric   struct CallInfo final : public std::pair<CallTy, unsigned /*Clone number*/> {
178*06c3fb27SDimitry Andric     using Base = std::pair<CallTy, unsigned>;
179*06c3fb27SDimitry Andric     CallInfo(const Base &B) : Base(B) {}
180*06c3fb27SDimitry Andric     CallInfo(CallTy Call = nullptr, unsigned CloneNo = 0)
181*06c3fb27SDimitry Andric         : Base(Call, CloneNo) {}
182*06c3fb27SDimitry Andric     explicit operator bool() const { return (bool)this->first; }
183*06c3fb27SDimitry Andric     CallTy call() const { return this->first; }
184*06c3fb27SDimitry Andric     unsigned cloneNo() const { return this->second; }
185*06c3fb27SDimitry Andric     void setCloneNo(unsigned N) { this->second = N; }
186*06c3fb27SDimitry Andric     void print(raw_ostream &OS) const {
187*06c3fb27SDimitry Andric       if (!operator bool()) {
188*06c3fb27SDimitry Andric         assert(!cloneNo());
189*06c3fb27SDimitry Andric         OS << "null Call";
190*06c3fb27SDimitry Andric         return;
191*06c3fb27SDimitry Andric       }
192*06c3fb27SDimitry Andric       call()->print(OS);
193*06c3fb27SDimitry Andric       OS << "\t(clone " << cloneNo() << ")";
194*06c3fb27SDimitry Andric     }
195*06c3fb27SDimitry Andric     void dump() const {
196*06c3fb27SDimitry Andric       print(dbgs());
197*06c3fb27SDimitry Andric       dbgs() << "\n";
198*06c3fb27SDimitry Andric     }
199*06c3fb27SDimitry Andric     friend raw_ostream &operator<<(raw_ostream &OS, const CallInfo &Call) {
200*06c3fb27SDimitry Andric       Call.print(OS);
201*06c3fb27SDimitry Andric       return OS;
202*06c3fb27SDimitry Andric     }
203*06c3fb27SDimitry Andric   };
204*06c3fb27SDimitry Andric 
205*06c3fb27SDimitry Andric   struct ContextEdge;
206*06c3fb27SDimitry Andric 
207*06c3fb27SDimitry Andric   /// Node in the Callsite Context Graph
208*06c3fb27SDimitry Andric   struct ContextNode {
209*06c3fb27SDimitry Andric     // Keep this for now since in the IR case where we have an Instruction* it
210*06c3fb27SDimitry Andric     // is not as immediately discoverable. Used for printing richer information
211*06c3fb27SDimitry Andric     // when dumping graph.
212*06c3fb27SDimitry Andric     bool IsAllocation;
213*06c3fb27SDimitry Andric 
214*06c3fb27SDimitry Andric     // Keeps track of when the Call was reset to null because there was
215*06c3fb27SDimitry Andric     // recursion.
216*06c3fb27SDimitry Andric     bool Recursive = false;
217*06c3fb27SDimitry Andric 
218*06c3fb27SDimitry Andric     // The corresponding allocation or interior call.
219*06c3fb27SDimitry Andric     CallInfo Call;
220*06c3fb27SDimitry Andric 
221*06c3fb27SDimitry Andric     // For alloc nodes this is a unique id assigned when constructed, and for
222*06c3fb27SDimitry Andric     // callsite stack nodes it is the original stack id when the node is
223*06c3fb27SDimitry Andric     // constructed from the memprof MIB metadata on the alloc nodes. Note that
224*06c3fb27SDimitry Andric     // this is only used when matching callsite metadata onto the stack nodes
225*06c3fb27SDimitry Andric     // created when processing the allocation memprof MIBs, and for labeling
226*06c3fb27SDimitry Andric     // nodes in the dot graph. Therefore we don't bother to assign a value for
227*06c3fb27SDimitry Andric     // clones.
228*06c3fb27SDimitry Andric     uint64_t OrigStackOrAllocId = 0;
229*06c3fb27SDimitry Andric 
230*06c3fb27SDimitry Andric     // This will be formed by ORing together the AllocationType enum values
231*06c3fb27SDimitry Andric     // for contexts including this node.
232*06c3fb27SDimitry Andric     uint8_t AllocTypes = 0;
233*06c3fb27SDimitry Andric 
234*06c3fb27SDimitry Andric     // Edges to all callees in the profiled call stacks.
235*06c3fb27SDimitry Andric     // TODO: Should this be a map (from Callee node) for more efficient lookup?
236*06c3fb27SDimitry Andric     std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
237*06c3fb27SDimitry Andric 
238*06c3fb27SDimitry Andric     // Edges to all callers in the profiled call stacks.
239*06c3fb27SDimitry Andric     // TODO: Should this be a map (from Caller node) for more efficient lookup?
240*06c3fb27SDimitry Andric     std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
241*06c3fb27SDimitry Andric 
242*06c3fb27SDimitry Andric     // The set of IDs for contexts including this node.
243*06c3fb27SDimitry Andric     DenseSet<uint32_t> ContextIds;
244*06c3fb27SDimitry Andric 
245*06c3fb27SDimitry Andric     // List of clones of this ContextNode, initially empty.
246*06c3fb27SDimitry Andric     std::vector<ContextNode *> Clones;
247*06c3fb27SDimitry Andric 
248*06c3fb27SDimitry Andric     // If a clone, points to the original uncloned node.
249*06c3fb27SDimitry Andric     ContextNode *CloneOf = nullptr;
250*06c3fb27SDimitry Andric 
251*06c3fb27SDimitry Andric     ContextNode(bool IsAllocation) : IsAllocation(IsAllocation), Call() {}
252*06c3fb27SDimitry Andric 
253*06c3fb27SDimitry Andric     ContextNode(bool IsAllocation, CallInfo C)
254*06c3fb27SDimitry Andric         : IsAllocation(IsAllocation), Call(C) {}
255*06c3fb27SDimitry Andric 
256*06c3fb27SDimitry Andric     void addClone(ContextNode *Clone) {
257*06c3fb27SDimitry Andric       if (CloneOf) {
258*06c3fb27SDimitry Andric         CloneOf->Clones.push_back(Clone);
259*06c3fb27SDimitry Andric         Clone->CloneOf = CloneOf;
260*06c3fb27SDimitry Andric       } else {
261*06c3fb27SDimitry Andric         Clones.push_back(Clone);
262*06c3fb27SDimitry Andric         assert(!Clone->CloneOf);
263*06c3fb27SDimitry Andric         Clone->CloneOf = this;
264*06c3fb27SDimitry Andric       }
265*06c3fb27SDimitry Andric     }
266*06c3fb27SDimitry Andric 
267*06c3fb27SDimitry Andric     ContextNode *getOrigNode() {
268*06c3fb27SDimitry Andric       if (!CloneOf)
269*06c3fb27SDimitry Andric         return this;
270*06c3fb27SDimitry Andric       return CloneOf;
271*06c3fb27SDimitry Andric     }
272*06c3fb27SDimitry Andric 
273*06c3fb27SDimitry Andric     void addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType,
274*06c3fb27SDimitry Andric                                unsigned int ContextId);
275*06c3fb27SDimitry Andric 
276*06c3fb27SDimitry Andric     ContextEdge *findEdgeFromCallee(const ContextNode *Callee);
277*06c3fb27SDimitry Andric     ContextEdge *findEdgeFromCaller(const ContextNode *Caller);
278*06c3fb27SDimitry Andric     void eraseCalleeEdge(const ContextEdge *Edge);
279*06c3fb27SDimitry Andric     void eraseCallerEdge(const ContextEdge *Edge);
280*06c3fb27SDimitry Andric 
281*06c3fb27SDimitry Andric     void setCall(CallInfo C) { Call = C; }
282*06c3fb27SDimitry Andric 
283*06c3fb27SDimitry Andric     bool hasCall() const { return (bool)Call.call(); }
284*06c3fb27SDimitry Andric 
285*06c3fb27SDimitry Andric     void printCall(raw_ostream &OS) const { Call.print(OS); }
286*06c3fb27SDimitry Andric 
287*06c3fb27SDimitry Andric     // True if this node was effectively removed from the graph, in which case
288*06c3fb27SDimitry Andric     // its context id set, caller edges, and callee edges should all be empty.
289*06c3fb27SDimitry Andric     bool isRemoved() const {
290*06c3fb27SDimitry Andric       assert(ContextIds.empty() ==
291*06c3fb27SDimitry Andric              (CalleeEdges.empty() && CallerEdges.empty()));
292*06c3fb27SDimitry Andric       return ContextIds.empty();
293*06c3fb27SDimitry Andric     }
294*06c3fb27SDimitry Andric 
295*06c3fb27SDimitry Andric     void dump() const;
296*06c3fb27SDimitry Andric     void print(raw_ostream &OS) const;
297*06c3fb27SDimitry Andric 
298*06c3fb27SDimitry Andric     friend raw_ostream &operator<<(raw_ostream &OS, const ContextNode &Node) {
299*06c3fb27SDimitry Andric       Node.print(OS);
300*06c3fb27SDimitry Andric       return OS;
301*06c3fb27SDimitry Andric     }
302*06c3fb27SDimitry Andric   };
303*06c3fb27SDimitry Andric 
304*06c3fb27SDimitry Andric   /// Edge in the Callsite Context Graph from a ContextNode N to a caller or
305*06c3fb27SDimitry Andric   /// callee.
306*06c3fb27SDimitry Andric   struct ContextEdge {
307*06c3fb27SDimitry Andric     ContextNode *Callee;
308*06c3fb27SDimitry Andric     ContextNode *Caller;
309*06c3fb27SDimitry Andric 
310*06c3fb27SDimitry Andric     // This will be formed by ORing together the AllocationType enum values
311*06c3fb27SDimitry Andric     // for contexts including this edge.
312*06c3fb27SDimitry Andric     uint8_t AllocTypes = 0;
313*06c3fb27SDimitry Andric 
314*06c3fb27SDimitry Andric     // The set of IDs for contexts including this edge.
315*06c3fb27SDimitry Andric     DenseSet<uint32_t> ContextIds;
316*06c3fb27SDimitry Andric 
317*06c3fb27SDimitry Andric     ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t AllocType,
318*06c3fb27SDimitry Andric                 DenseSet<uint32_t> ContextIds)
319*06c3fb27SDimitry Andric         : Callee(Callee), Caller(Caller), AllocTypes(AllocType),
320*06c3fb27SDimitry Andric           ContextIds(ContextIds) {}
321*06c3fb27SDimitry Andric 
322*06c3fb27SDimitry Andric     DenseSet<uint32_t> &getContextIds() { return ContextIds; }
323*06c3fb27SDimitry Andric 
324*06c3fb27SDimitry Andric     void dump() const;
325*06c3fb27SDimitry Andric     void print(raw_ostream &OS) const;
326*06c3fb27SDimitry Andric 
327*06c3fb27SDimitry Andric     friend raw_ostream &operator<<(raw_ostream &OS, const ContextEdge &Edge) {
328*06c3fb27SDimitry Andric       Edge.print(OS);
329*06c3fb27SDimitry Andric       return OS;
330*06c3fb27SDimitry Andric     }
331*06c3fb27SDimitry Andric   };
332*06c3fb27SDimitry Andric 
333*06c3fb27SDimitry Andric   /// Helper to remove callee edges that have allocation type None (due to not
334*06c3fb27SDimitry Andric   /// carrying any context ids) after transformations.
335*06c3fb27SDimitry Andric   void removeNoneTypeCalleeEdges(ContextNode *Node);
336*06c3fb27SDimitry Andric 
337*06c3fb27SDimitry Andric protected:
338*06c3fb27SDimitry Andric   /// Get a list of nodes corresponding to the stack ids in the given callsite
339*06c3fb27SDimitry Andric   /// context.
340*06c3fb27SDimitry Andric   template <class NodeT, class IteratorT>
341*06c3fb27SDimitry Andric   std::vector<uint64_t>
342*06c3fb27SDimitry Andric   getStackIdsWithContextNodes(CallStack<NodeT, IteratorT> &CallsiteContext);
343*06c3fb27SDimitry Andric 
344*06c3fb27SDimitry Andric   /// Adds nodes for the given allocation and any stack ids on its memprof MIB
345*06c3fb27SDimitry Andric   /// metadata (or summary).
346*06c3fb27SDimitry Andric   ContextNode *addAllocNode(CallInfo Call, const FuncTy *F);
347*06c3fb27SDimitry Andric 
348*06c3fb27SDimitry Andric   /// Adds nodes for the given MIB stack ids.
349*06c3fb27SDimitry Andric   template <class NodeT, class IteratorT>
350*06c3fb27SDimitry Andric   void addStackNodesForMIB(ContextNode *AllocNode,
351*06c3fb27SDimitry Andric                            CallStack<NodeT, IteratorT> &StackContext,
352*06c3fb27SDimitry Andric                            CallStack<NodeT, IteratorT> &CallsiteContext,
353*06c3fb27SDimitry Andric                            AllocationType AllocType);
354*06c3fb27SDimitry Andric 
355*06c3fb27SDimitry Andric   /// Matches all callsite metadata (or summary) to the nodes created for
356*06c3fb27SDimitry Andric   /// allocation memprof MIB metadata, synthesizing new nodes to reflect any
357*06c3fb27SDimitry Andric   /// inlining performed on those callsite instructions.
358*06c3fb27SDimitry Andric   void updateStackNodes();
359*06c3fb27SDimitry Andric 
360*06c3fb27SDimitry Andric   /// Update graph to conservatively handle any callsite stack nodes that target
361*06c3fb27SDimitry Andric   /// multiple different callee target functions.
362*06c3fb27SDimitry Andric   void handleCallsitesWithMultipleTargets();
363*06c3fb27SDimitry Andric 
364*06c3fb27SDimitry Andric   /// Save lists of calls with MemProf metadata in each function, for faster
365*06c3fb27SDimitry Andric   /// iteration.
366*06c3fb27SDimitry Andric   std::vector<std::pair<FuncTy *, std::vector<CallInfo>>>
367*06c3fb27SDimitry Andric       FuncToCallsWithMetadata;
368*06c3fb27SDimitry Andric 
369*06c3fb27SDimitry Andric   /// Map from callsite node to the enclosing caller function.
370*06c3fb27SDimitry Andric   std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
371*06c3fb27SDimitry Andric 
372*06c3fb27SDimitry Andric private:
373*06c3fb27SDimitry Andric   using EdgeIter = typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
374*06c3fb27SDimitry Andric 
375*06c3fb27SDimitry Andric   using CallContextInfo = std::tuple<CallTy, std::vector<uint64_t>,
376*06c3fb27SDimitry Andric                                      const FuncTy *, DenseSet<uint32_t>>;
377*06c3fb27SDimitry Andric 
378*06c3fb27SDimitry Andric   /// Assigns the given Node to calls at or inlined into the location with
379*06c3fb27SDimitry Andric   /// the Node's stack id, after post order traversing and processing its
380*06c3fb27SDimitry Andric   /// caller nodes. Uses the call information recorded in the given
381*06c3fb27SDimitry Andric   /// StackIdToMatchingCalls map, and creates new nodes for inlined sequences
382*06c3fb27SDimitry Andric   /// as needed. Called by updateStackNodes which sets up the given
383*06c3fb27SDimitry Andric   /// StackIdToMatchingCalls map.
384*06c3fb27SDimitry Andric   void assignStackNodesPostOrder(
385*06c3fb27SDimitry Andric       ContextNode *Node, DenseSet<const ContextNode *> &Visited,
386*06c3fb27SDimitry Andric       DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls);
387*06c3fb27SDimitry Andric 
388*06c3fb27SDimitry Andric   /// Duplicates the given set of context ids, updating the provided
389*06c3fb27SDimitry Andric   /// map from each original id with the newly generated context ids,
390*06c3fb27SDimitry Andric   /// and returning the new duplicated id set.
391*06c3fb27SDimitry Andric   DenseSet<uint32_t> duplicateContextIds(
392*06c3fb27SDimitry Andric       const DenseSet<uint32_t> &StackSequenceContextIds,
393*06c3fb27SDimitry Andric       DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
394*06c3fb27SDimitry Andric 
395*06c3fb27SDimitry Andric   /// Propagates all duplicated context ids across the graph.
396*06c3fb27SDimitry Andric   void propagateDuplicateContextIds(
397*06c3fb27SDimitry Andric       const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
398*06c3fb27SDimitry Andric 
399*06c3fb27SDimitry Andric   /// Connect the NewNode to OrigNode's callees if TowardsCallee is true,
400*06c3fb27SDimitry Andric   /// else to its callers. Also updates OrigNode's edges to remove any context
401*06c3fb27SDimitry Andric   /// ids moved to the newly created edge.
402*06c3fb27SDimitry Andric   void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
403*06c3fb27SDimitry Andric                       bool TowardsCallee);
404*06c3fb27SDimitry Andric 
405*06c3fb27SDimitry Andric   /// Get the stack id corresponding to the given Id or Index (for IR this will
406*06c3fb27SDimitry Andric   /// return itself, for a summary index this will return the id recorded in the
407*06c3fb27SDimitry Andric   /// index for that stack id index value).
408*06c3fb27SDimitry Andric   uint64_t getStackId(uint64_t IdOrIndex) const {
409*06c3fb27SDimitry Andric     return static_cast<const DerivedCCG *>(this)->getStackId(IdOrIndex);
410*06c3fb27SDimitry Andric   }
411*06c3fb27SDimitry Andric 
412*06c3fb27SDimitry Andric   /// Returns true if the given call targets the given function.
413*06c3fb27SDimitry Andric   bool calleeMatchesFunc(CallTy Call, const FuncTy *Func) {
414*06c3fb27SDimitry Andric     return static_cast<DerivedCCG *>(this)->calleeMatchesFunc(Call, Func);
415*06c3fb27SDimitry Andric   }
416*06c3fb27SDimitry Andric 
417*06c3fb27SDimitry Andric   /// Get a list of nodes corresponding to the stack ids in the given
418*06c3fb27SDimitry Andric   /// callsite's context.
419*06c3fb27SDimitry Andric   std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {
420*06c3fb27SDimitry Andric     return static_cast<DerivedCCG *>(this)->getStackIdsWithContextNodesForCall(
421*06c3fb27SDimitry Andric         Call);
422*06c3fb27SDimitry Andric   }
423*06c3fb27SDimitry Andric 
424*06c3fb27SDimitry Andric   /// Get the last stack id in the context for callsite.
425*06c3fb27SDimitry Andric   uint64_t getLastStackId(CallTy Call) {
426*06c3fb27SDimitry Andric     return static_cast<DerivedCCG *>(this)->getLastStackId(Call);
427*06c3fb27SDimitry Andric   }
428*06c3fb27SDimitry Andric 
429*06c3fb27SDimitry Andric   /// Update the allocation call to record type of allocated memory.
430*06c3fb27SDimitry Andric   void updateAllocationCall(CallInfo &Call, AllocationType AllocType) {
431*06c3fb27SDimitry Andric     AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
432*06c3fb27SDimitry Andric     static_cast<DerivedCCG *>(this)->updateAllocationCall(Call, AllocType);
433*06c3fb27SDimitry Andric   }
434*06c3fb27SDimitry Andric 
435*06c3fb27SDimitry Andric   /// Update non-allocation call to invoke (possibly cloned) function
436*06c3fb27SDimitry Andric   /// CalleeFunc.
437*06c3fb27SDimitry Andric   void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
438*06c3fb27SDimitry Andric     static_cast<DerivedCCG *>(this)->updateCall(CallerCall, CalleeFunc);
439*06c3fb27SDimitry Andric   }
440*06c3fb27SDimitry Andric 
441*06c3fb27SDimitry Andric   /// Clone the given function for the given callsite, recording mapping of all
442*06c3fb27SDimitry Andric   /// of the functions tracked calls to their new versions in the CallMap.
443*06c3fb27SDimitry Andric   /// Assigns new clones to clone number CloneNo.
444*06c3fb27SDimitry Andric   FuncInfo cloneFunctionForCallsite(
445*06c3fb27SDimitry Andric       FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
446*06c3fb27SDimitry Andric       std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) {
447*06c3fb27SDimitry Andric     return static_cast<DerivedCCG *>(this)->cloneFunctionForCallsite(
448*06c3fb27SDimitry Andric         Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo);
449*06c3fb27SDimitry Andric   }
450*06c3fb27SDimitry Andric 
451*06c3fb27SDimitry Andric   /// Gets a label to use in the dot graph for the given call clone in the given
452*06c3fb27SDimitry Andric   /// function.
453*06c3fb27SDimitry Andric   std::string getLabel(const FuncTy *Func, const CallTy Call,
454*06c3fb27SDimitry Andric                        unsigned CloneNo) const {
455*06c3fb27SDimitry Andric     return static_cast<const DerivedCCG *>(this)->getLabel(Func, Call, CloneNo);
456*06c3fb27SDimitry Andric   }
457*06c3fb27SDimitry Andric 
458*06c3fb27SDimitry Andric   /// Helpers to find the node corresponding to the given call or stackid.
459*06c3fb27SDimitry Andric   ContextNode *getNodeForInst(const CallInfo &C);
460*06c3fb27SDimitry Andric   ContextNode *getNodeForAlloc(const CallInfo &C);
461*06c3fb27SDimitry Andric   ContextNode *getNodeForStackId(uint64_t StackId);
462*06c3fb27SDimitry Andric 
463*06c3fb27SDimitry Andric   /// Removes the node information recorded for the given call.
464*06c3fb27SDimitry Andric   void unsetNodeForInst(const CallInfo &C);
465*06c3fb27SDimitry Andric 
466*06c3fb27SDimitry Andric   /// Computes the alloc type corresponding to the given context ids, by
467*06c3fb27SDimitry Andric   /// unioning their recorded alloc types.
468*06c3fb27SDimitry Andric   uint8_t computeAllocType(DenseSet<uint32_t> &ContextIds);
469*06c3fb27SDimitry Andric 
470*06c3fb27SDimitry Andric   /// Returns the alloction type of the intersection of the contexts of two
471*06c3fb27SDimitry Andric   /// nodes (based on their provided context id sets), optimized for the case
472*06c3fb27SDimitry Andric   /// when Node1Ids is smaller than Node2Ids.
473*06c3fb27SDimitry Andric   uint8_t intersectAllocTypesImpl(const DenseSet<uint32_t> &Node1Ids,
474*06c3fb27SDimitry Andric                                   const DenseSet<uint32_t> &Node2Ids);
475*06c3fb27SDimitry Andric 
476*06c3fb27SDimitry Andric   /// Returns the alloction type of the intersection of the contexts of two
477*06c3fb27SDimitry Andric   /// nodes (based on their provided context id sets).
478*06c3fb27SDimitry Andric   uint8_t intersectAllocTypes(const DenseSet<uint32_t> &Node1Ids,
479*06c3fb27SDimitry Andric                               const DenseSet<uint32_t> &Node2Ids);
480*06c3fb27SDimitry Andric 
481*06c3fb27SDimitry Andric   /// Create a clone of Edge's callee and move Edge to that new callee node,
482*06c3fb27SDimitry Andric   /// performing the necessary context id and allocation type updates.
483*06c3fb27SDimitry Andric   /// If callee's caller edge iterator is supplied, it is updated when removing
484*06c3fb27SDimitry Andric   /// the edge from that list.
485*06c3fb27SDimitry Andric   ContextNode *
486*06c3fb27SDimitry Andric   moveEdgeToNewCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
487*06c3fb27SDimitry Andric                            EdgeIter *CallerEdgeI = nullptr);
488*06c3fb27SDimitry Andric 
489*06c3fb27SDimitry Andric   /// Change the callee of Edge to existing callee clone NewCallee, performing
490*06c3fb27SDimitry Andric   /// the necessary context id and allocation type updates.
491*06c3fb27SDimitry Andric   /// If callee's caller edge iterator is supplied, it is updated when removing
492*06c3fb27SDimitry Andric   /// the edge from that list.
493*06c3fb27SDimitry Andric   void moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
494*06c3fb27SDimitry Andric                                      ContextNode *NewCallee,
495*06c3fb27SDimitry Andric                                      EdgeIter *CallerEdgeI = nullptr,
496*06c3fb27SDimitry Andric                                      bool NewClone = false);
497*06c3fb27SDimitry Andric 
498*06c3fb27SDimitry Andric   /// Recursively perform cloning on the graph for the given Node and its
499*06c3fb27SDimitry Andric   /// callers, in order to uniquely identify the allocation behavior of an
500*06c3fb27SDimitry Andric   /// allocation given its context.
501*06c3fb27SDimitry Andric   void identifyClones(ContextNode *Node,
502*06c3fb27SDimitry Andric                       DenseSet<const ContextNode *> &Visited);
503*06c3fb27SDimitry Andric 
504*06c3fb27SDimitry Andric   /// Map from each context ID to the AllocationType assigned to that context.
505*06c3fb27SDimitry Andric   std::map<uint32_t, AllocationType> ContextIdToAllocationType;
506*06c3fb27SDimitry Andric 
507*06c3fb27SDimitry Andric   /// Identifies the context node created for a stack id when adding the MIB
508*06c3fb27SDimitry Andric   /// contexts to the graph. This is used to locate the context nodes when
509*06c3fb27SDimitry Andric   /// trying to assign the corresponding callsites with those stack ids to these
510*06c3fb27SDimitry Andric   /// nodes.
511*06c3fb27SDimitry Andric   std::map<uint64_t, ContextNode *> StackEntryIdToContextNodeMap;
512*06c3fb27SDimitry Andric 
513*06c3fb27SDimitry Andric   /// Maps to track the calls to their corresponding nodes in the graph.
514*06c3fb27SDimitry Andric   MapVector<CallInfo, ContextNode *> AllocationCallToContextNodeMap;
515*06c3fb27SDimitry Andric   MapVector<CallInfo, ContextNode *> NonAllocationCallToContextNodeMap;
516*06c3fb27SDimitry Andric 
517*06c3fb27SDimitry Andric   /// Owner of all ContextNode unique_ptrs.
518*06c3fb27SDimitry Andric   std::vector<std::unique_ptr<ContextNode>> NodeOwner;
519*06c3fb27SDimitry Andric 
520*06c3fb27SDimitry Andric   /// Perform sanity checks on graph when requested.
521*06c3fb27SDimitry Andric   void check() const;
522*06c3fb27SDimitry Andric 
523*06c3fb27SDimitry Andric   /// Keeps track of the last unique context id assigned.
524*06c3fb27SDimitry Andric   unsigned int LastContextId = 0;
525*06c3fb27SDimitry Andric };
526*06c3fb27SDimitry Andric 
527*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
528*06c3fb27SDimitry Andric using ContextNode =
529*06c3fb27SDimitry Andric     typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
530*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
531*06c3fb27SDimitry Andric using ContextEdge =
532*06c3fb27SDimitry Andric     typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
533*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
534*06c3fb27SDimitry Andric using FuncInfo =
535*06c3fb27SDimitry Andric     typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
536*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
537*06c3fb27SDimitry Andric using CallInfo =
538*06c3fb27SDimitry Andric     typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
539*06c3fb27SDimitry Andric 
540*06c3fb27SDimitry Andric /// CRTP derived class for graphs built from IR (regular LTO).
541*06c3fb27SDimitry Andric class ModuleCallsiteContextGraph
542*06c3fb27SDimitry Andric     : public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
543*06c3fb27SDimitry Andric                                   Instruction *> {
544*06c3fb27SDimitry Andric public:
545*06c3fb27SDimitry Andric   ModuleCallsiteContextGraph(
546*06c3fb27SDimitry Andric       Module &M,
547*06c3fb27SDimitry Andric       function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
548*06c3fb27SDimitry Andric 
549*06c3fb27SDimitry Andric private:
550*06c3fb27SDimitry Andric   friend CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
551*06c3fb27SDimitry Andric                               Instruction *>;
552*06c3fb27SDimitry Andric 
553*06c3fb27SDimitry Andric   uint64_t getStackId(uint64_t IdOrIndex) const;
554*06c3fb27SDimitry Andric   bool calleeMatchesFunc(Instruction *Call, const Function *Func);
555*06c3fb27SDimitry Andric   uint64_t getLastStackId(Instruction *Call);
556*06c3fb27SDimitry Andric   std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *Call);
557*06c3fb27SDimitry Andric   void updateAllocationCall(CallInfo &Call, AllocationType AllocType);
558*06c3fb27SDimitry Andric   void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
559*06c3fb27SDimitry Andric   CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
560*06c3fb27SDimitry Andric                        Instruction *>::FuncInfo
561*06c3fb27SDimitry Andric   cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call,
562*06c3fb27SDimitry Andric                            std::map<CallInfo, CallInfo> &CallMap,
563*06c3fb27SDimitry Andric                            std::vector<CallInfo> &CallsWithMetadataInFunc,
564*06c3fb27SDimitry Andric                            unsigned CloneNo);
565*06c3fb27SDimitry Andric   std::string getLabel(const Function *Func, const Instruction *Call,
566*06c3fb27SDimitry Andric                        unsigned CloneNo) const;
567*06c3fb27SDimitry Andric 
568*06c3fb27SDimitry Andric   const Module &Mod;
569*06c3fb27SDimitry Andric   function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter;
570*06c3fb27SDimitry Andric };
571*06c3fb27SDimitry Andric 
572*06c3fb27SDimitry Andric /// Represents a call in the summary index graph, which can either be an
573*06c3fb27SDimitry Andric /// allocation or an interior callsite node in an allocation's context.
574*06c3fb27SDimitry Andric /// Holds a pointer to the corresponding data structure in the index.
575*06c3fb27SDimitry Andric struct IndexCall : public PointerUnion<CallsiteInfo *, AllocInfo *> {
576*06c3fb27SDimitry Andric   IndexCall() : PointerUnion() {}
577*06c3fb27SDimitry Andric   IndexCall(std::nullptr_t) : IndexCall() {}
578*06c3fb27SDimitry Andric   IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
579*06c3fb27SDimitry Andric   IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
580*06c3fb27SDimitry Andric   IndexCall(PointerUnion PT) : PointerUnion(PT) {}
581*06c3fb27SDimitry Andric 
582*06c3fb27SDimitry Andric   IndexCall *operator->() { return this; }
583*06c3fb27SDimitry Andric 
584*06c3fb27SDimitry Andric   PointerUnion<CallsiteInfo *, AllocInfo *> getBase() const { return *this; }
585*06c3fb27SDimitry Andric 
586*06c3fb27SDimitry Andric   void print(raw_ostream &OS) const {
587*06c3fb27SDimitry Andric     if (auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(getBase())) {
588*06c3fb27SDimitry Andric       OS << *AI;
589*06c3fb27SDimitry Andric     } else {
590*06c3fb27SDimitry Andric       auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(getBase());
591*06c3fb27SDimitry Andric       assert(CI);
592*06c3fb27SDimitry Andric       OS << *CI;
593*06c3fb27SDimitry Andric     }
594*06c3fb27SDimitry Andric   }
595*06c3fb27SDimitry Andric };
596*06c3fb27SDimitry Andric 
597*06c3fb27SDimitry Andric /// CRTP derived class for graphs built from summary index (ThinLTO).
598*06c3fb27SDimitry Andric class IndexCallsiteContextGraph
599*06c3fb27SDimitry Andric     : public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
600*06c3fb27SDimitry Andric                                   IndexCall> {
601*06c3fb27SDimitry Andric public:
602*06c3fb27SDimitry Andric   IndexCallsiteContextGraph(
603*06c3fb27SDimitry Andric       ModuleSummaryIndex &Index,
604*06c3fb27SDimitry Andric       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
605*06c3fb27SDimitry Andric           isPrevailing);
606*06c3fb27SDimitry Andric 
607*06c3fb27SDimitry Andric private:
608*06c3fb27SDimitry Andric   friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
609*06c3fb27SDimitry Andric                               IndexCall>;
610*06c3fb27SDimitry Andric 
611*06c3fb27SDimitry Andric   uint64_t getStackId(uint64_t IdOrIndex) const;
612*06c3fb27SDimitry Andric   bool calleeMatchesFunc(IndexCall &Call, const FunctionSummary *Func);
613*06c3fb27SDimitry Andric   uint64_t getLastStackId(IndexCall &Call);
614*06c3fb27SDimitry Andric   std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
615*06c3fb27SDimitry Andric   void updateAllocationCall(CallInfo &Call, AllocationType AllocType);
616*06c3fb27SDimitry Andric   void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
617*06c3fb27SDimitry Andric   CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
618*06c3fb27SDimitry Andric                        IndexCall>::FuncInfo
619*06c3fb27SDimitry Andric   cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call,
620*06c3fb27SDimitry Andric                            std::map<CallInfo, CallInfo> &CallMap,
621*06c3fb27SDimitry Andric                            std::vector<CallInfo> &CallsWithMetadataInFunc,
622*06c3fb27SDimitry Andric                            unsigned CloneNo);
623*06c3fb27SDimitry Andric   std::string getLabel(const FunctionSummary *Func, const IndexCall &Call,
624*06c3fb27SDimitry Andric                        unsigned CloneNo) const;
625*06c3fb27SDimitry Andric 
626*06c3fb27SDimitry Andric   // Saves mapping from function summaries containing memprof records back to
627*06c3fb27SDimitry Andric   // its VI, for use in checking and debugging.
628*06c3fb27SDimitry Andric   std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
629*06c3fb27SDimitry Andric 
630*06c3fb27SDimitry Andric   const ModuleSummaryIndex &Index;
631*06c3fb27SDimitry Andric };
632*06c3fb27SDimitry Andric } // namespace
633*06c3fb27SDimitry Andric 
634*06c3fb27SDimitry Andric namespace llvm {
635*06c3fb27SDimitry Andric template <>
636*06c3fb27SDimitry Andric struct DenseMapInfo<typename CallsiteContextGraph<
637*06c3fb27SDimitry Andric     ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo>
638*06c3fb27SDimitry Andric     : public DenseMapInfo<std::pair<Instruction *, unsigned>> {};
639*06c3fb27SDimitry Andric template <>
640*06c3fb27SDimitry Andric struct DenseMapInfo<typename CallsiteContextGraph<
641*06c3fb27SDimitry Andric     IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo>
642*06c3fb27SDimitry Andric     : public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
643*06c3fb27SDimitry Andric template <>
644*06c3fb27SDimitry Andric struct DenseMapInfo<IndexCall>
645*06c3fb27SDimitry Andric     : public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
646*06c3fb27SDimitry Andric } // end namespace llvm
647*06c3fb27SDimitry Andric 
648*06c3fb27SDimitry Andric namespace {
649*06c3fb27SDimitry Andric 
650*06c3fb27SDimitry Andric struct FieldSeparator {
651*06c3fb27SDimitry Andric   bool Skip = true;
652*06c3fb27SDimitry Andric   const char *Sep;
653*06c3fb27SDimitry Andric 
654*06c3fb27SDimitry Andric   FieldSeparator(const char *Sep = ", ") : Sep(Sep) {}
655*06c3fb27SDimitry Andric };
656*06c3fb27SDimitry Andric 
657*06c3fb27SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) {
658*06c3fb27SDimitry Andric   if (FS.Skip) {
659*06c3fb27SDimitry Andric     FS.Skip = false;
660*06c3fb27SDimitry Andric     return OS;
661*06c3fb27SDimitry Andric   }
662*06c3fb27SDimitry Andric   return OS << FS.Sep;
663*06c3fb27SDimitry Andric }
664*06c3fb27SDimitry Andric 
665*06c3fb27SDimitry Andric // Map the uint8_t alloc types (which may contain NotCold|Cold) to the alloc
666*06c3fb27SDimitry Andric // type we should actually use on the corresponding allocation.
667*06c3fb27SDimitry Andric // If we can't clone a node that has NotCold+Cold alloc type, we will fall
668*06c3fb27SDimitry Andric // back to using NotCold. So don't bother cloning to distinguish NotCold+Cold
669*06c3fb27SDimitry Andric // from NotCold.
670*06c3fb27SDimitry Andric AllocationType allocTypeToUse(uint8_t AllocTypes) {
671*06c3fb27SDimitry Andric   assert(AllocTypes != (uint8_t)AllocationType::None);
672*06c3fb27SDimitry Andric   if (AllocTypes ==
673*06c3fb27SDimitry Andric       ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
674*06c3fb27SDimitry Andric     return AllocationType::NotCold;
675*06c3fb27SDimitry Andric   else
676*06c3fb27SDimitry Andric     return (AllocationType)AllocTypes;
677*06c3fb27SDimitry Andric }
678*06c3fb27SDimitry Andric 
679*06c3fb27SDimitry Andric // Helper to check if the alloc types for all edges recorded in the
680*06c3fb27SDimitry Andric // InAllocTypes vector match the alloc types for all edges in the Edges
681*06c3fb27SDimitry Andric // vector.
682*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
683*06c3fb27SDimitry Andric bool allocTypesMatch(
684*06c3fb27SDimitry Andric     const std::vector<uint8_t> &InAllocTypes,
685*06c3fb27SDimitry Andric     const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
686*06c3fb27SDimitry Andric         &Edges) {
687*06c3fb27SDimitry Andric   return std::equal(
688*06c3fb27SDimitry Andric       InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(),
689*06c3fb27SDimitry Andric       [](const uint8_t &l,
690*06c3fb27SDimitry Andric          const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
691*06c3fb27SDimitry Andric         // Can share if one of the edges is None type - don't
692*06c3fb27SDimitry Andric         // care about the type along that edge as it doesn't
693*06c3fb27SDimitry Andric         // exist for those context ids.
694*06c3fb27SDimitry Andric         if (l == (uint8_t)AllocationType::None ||
695*06c3fb27SDimitry Andric             r->AllocTypes == (uint8_t)AllocationType::None)
696*06c3fb27SDimitry Andric           return true;
697*06c3fb27SDimitry Andric         return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
698*06c3fb27SDimitry Andric       });
699*06c3fb27SDimitry Andric }
700*06c3fb27SDimitry Andric 
701*06c3fb27SDimitry Andric } // end anonymous namespace
702*06c3fb27SDimitry Andric 
703*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
704*06c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
705*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
706*06c3fb27SDimitry Andric     const CallInfo &C) {
707*06c3fb27SDimitry Andric   ContextNode *Node = getNodeForAlloc(C);
708*06c3fb27SDimitry Andric   if (Node)
709*06c3fb27SDimitry Andric     return Node;
710*06c3fb27SDimitry Andric 
711*06c3fb27SDimitry Andric   return NonAllocationCallToContextNodeMap.lookup(C);
712*06c3fb27SDimitry Andric }
713*06c3fb27SDimitry Andric 
714*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
715*06c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
716*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
717*06c3fb27SDimitry Andric     const CallInfo &C) {
718*06c3fb27SDimitry Andric   return AllocationCallToContextNodeMap.lookup(C);
719*06c3fb27SDimitry Andric }
720*06c3fb27SDimitry Andric 
721*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
722*06c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
723*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
724*06c3fb27SDimitry Andric     uint64_t StackId) {
725*06c3fb27SDimitry Andric   auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
726*06c3fb27SDimitry Andric   if (StackEntryNode != StackEntryIdToContextNodeMap.end())
727*06c3fb27SDimitry Andric     return StackEntryNode->second;
728*06c3fb27SDimitry Andric   return nullptr;
729*06c3fb27SDimitry Andric }
730*06c3fb27SDimitry Andric 
731*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
732*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::unsetNodeForInst(
733*06c3fb27SDimitry Andric     const CallInfo &C) {
734*06c3fb27SDimitry Andric   AllocationCallToContextNodeMap.erase(C) ||
735*06c3fb27SDimitry Andric       NonAllocationCallToContextNodeMap.erase(C);
736*06c3fb27SDimitry Andric   assert(!AllocationCallToContextNodeMap.count(C) &&
737*06c3fb27SDimitry Andric          !NonAllocationCallToContextNodeMap.count(C));
738*06c3fb27SDimitry Andric }
739*06c3fb27SDimitry Andric 
740*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
741*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
742*06c3fb27SDimitry Andric     addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType,
743*06c3fb27SDimitry Andric                           unsigned int ContextId) {
744*06c3fb27SDimitry Andric   for (auto &Edge : CallerEdges) {
745*06c3fb27SDimitry Andric     if (Edge->Caller == Caller) {
746*06c3fb27SDimitry Andric       Edge->AllocTypes |= (uint8_t)AllocType;
747*06c3fb27SDimitry Andric       Edge->getContextIds().insert(ContextId);
748*06c3fb27SDimitry Andric       return;
749*06c3fb27SDimitry Andric     }
750*06c3fb27SDimitry Andric   }
751*06c3fb27SDimitry Andric   std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>(
752*06c3fb27SDimitry Andric       this, Caller, (uint8_t)AllocType, DenseSet<uint32_t>({ContextId}));
753*06c3fb27SDimitry Andric   CallerEdges.push_back(Edge);
754*06c3fb27SDimitry Andric   Caller->CalleeEdges.push_back(Edge);
755*06c3fb27SDimitry Andric }
756*06c3fb27SDimitry Andric 
757*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
758*06c3fb27SDimitry Andric void CallsiteContextGraph<
759*06c3fb27SDimitry Andric     DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
760*06c3fb27SDimitry Andric   for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) {
761*06c3fb27SDimitry Andric     auto Edge = *EI;
762*06c3fb27SDimitry Andric     if (Edge->AllocTypes == (uint8_t)AllocationType::None) {
763*06c3fb27SDimitry Andric       assert(Edge->ContextIds.empty());
764*06c3fb27SDimitry Andric       Edge->Callee->eraseCallerEdge(Edge.get());
765*06c3fb27SDimitry Andric       EI = Node->CalleeEdges.erase(EI);
766*06c3fb27SDimitry Andric     } else
767*06c3fb27SDimitry Andric       ++EI;
768*06c3fb27SDimitry Andric   }
769*06c3fb27SDimitry Andric }
770*06c3fb27SDimitry Andric 
771*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
772*06c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
773*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
774*06c3fb27SDimitry Andric     findEdgeFromCallee(const ContextNode *Callee) {
775*06c3fb27SDimitry Andric   for (const auto &Edge : CalleeEdges)
776*06c3fb27SDimitry Andric     if (Edge->Callee == Callee)
777*06c3fb27SDimitry Andric       return Edge.get();
778*06c3fb27SDimitry Andric   return nullptr;
779*06c3fb27SDimitry Andric }
780*06c3fb27SDimitry Andric 
781*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
782*06c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
783*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
784*06c3fb27SDimitry Andric     findEdgeFromCaller(const ContextNode *Caller) {
785*06c3fb27SDimitry Andric   for (const auto &Edge : CallerEdges)
786*06c3fb27SDimitry Andric     if (Edge->Caller == Caller)
787*06c3fb27SDimitry Andric       return Edge.get();
788*06c3fb27SDimitry Andric   return nullptr;
789*06c3fb27SDimitry Andric }
790*06c3fb27SDimitry Andric 
791*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
792*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
793*06c3fb27SDimitry Andric     eraseCalleeEdge(const ContextEdge *Edge) {
794*06c3fb27SDimitry Andric   auto EI =
795*06c3fb27SDimitry Andric       std::find_if(CalleeEdges.begin(), CalleeEdges.end(),
796*06c3fb27SDimitry Andric                    [Edge](const std::shared_ptr<ContextEdge> &CalleeEdge) {
797*06c3fb27SDimitry Andric                      return CalleeEdge.get() == Edge;
798*06c3fb27SDimitry Andric                    });
799*06c3fb27SDimitry Andric   assert(EI != CalleeEdges.end());
800*06c3fb27SDimitry Andric   CalleeEdges.erase(EI);
801*06c3fb27SDimitry Andric }
802*06c3fb27SDimitry Andric 
803*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
804*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
805*06c3fb27SDimitry Andric     eraseCallerEdge(const ContextEdge *Edge) {
806*06c3fb27SDimitry Andric   auto EI =
807*06c3fb27SDimitry Andric       std::find_if(CallerEdges.begin(), CallerEdges.end(),
808*06c3fb27SDimitry Andric                    [Edge](const std::shared_ptr<ContextEdge> &CallerEdge) {
809*06c3fb27SDimitry Andric                      return CallerEdge.get() == Edge;
810*06c3fb27SDimitry Andric                    });
811*06c3fb27SDimitry Andric   assert(EI != CallerEdges.end());
812*06c3fb27SDimitry Andric   CallerEdges.erase(EI);
813*06c3fb27SDimitry Andric }
814*06c3fb27SDimitry Andric 
815*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
816*06c3fb27SDimitry Andric uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
817*06c3fb27SDimitry Andric     DenseSet<uint32_t> &ContextIds) {
818*06c3fb27SDimitry Andric   uint8_t BothTypes =
819*06c3fb27SDimitry Andric       (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
820*06c3fb27SDimitry Andric   uint8_t AllocType = (uint8_t)AllocationType::None;
821*06c3fb27SDimitry Andric   for (auto Id : ContextIds) {
822*06c3fb27SDimitry Andric     AllocType |= (uint8_t)ContextIdToAllocationType[Id];
823*06c3fb27SDimitry Andric     // Bail early if alloc type reached both, no further refinement.
824*06c3fb27SDimitry Andric     if (AllocType == BothTypes)
825*06c3fb27SDimitry Andric       return AllocType;
826*06c3fb27SDimitry Andric   }
827*06c3fb27SDimitry Andric   return AllocType;
828*06c3fb27SDimitry Andric }
829*06c3fb27SDimitry Andric 
830*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
831*06c3fb27SDimitry Andric uint8_t
832*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
833*06c3fb27SDimitry Andric     const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) {
834*06c3fb27SDimitry Andric   uint8_t BothTypes =
835*06c3fb27SDimitry Andric       (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
836*06c3fb27SDimitry Andric   uint8_t AllocType = (uint8_t)AllocationType::None;
837*06c3fb27SDimitry Andric   for (auto Id : Node1Ids) {
838*06c3fb27SDimitry Andric     if (!Node2Ids.count(Id))
839*06c3fb27SDimitry Andric       continue;
840*06c3fb27SDimitry Andric     AllocType |= (uint8_t)ContextIdToAllocationType[Id];
841*06c3fb27SDimitry Andric     // Bail early if alloc type reached both, no further refinement.
842*06c3fb27SDimitry Andric     if (AllocType == BothTypes)
843*06c3fb27SDimitry Andric       return AllocType;
844*06c3fb27SDimitry Andric   }
845*06c3fb27SDimitry Andric   return AllocType;
846*06c3fb27SDimitry Andric }
847*06c3fb27SDimitry Andric 
848*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
849*06c3fb27SDimitry Andric uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
850*06c3fb27SDimitry Andric     const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) {
851*06c3fb27SDimitry Andric   if (Node1Ids.size() < Node2Ids.size())
852*06c3fb27SDimitry Andric     return intersectAllocTypesImpl(Node1Ids, Node2Ids);
853*06c3fb27SDimitry Andric   else
854*06c3fb27SDimitry Andric     return intersectAllocTypesImpl(Node2Ids, Node1Ids);
855*06c3fb27SDimitry Andric }
856*06c3fb27SDimitry Andric 
857*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
858*06c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
859*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
860*06c3fb27SDimitry Andric     CallInfo Call, const FuncTy *F) {
861*06c3fb27SDimitry Andric   assert(!getNodeForAlloc(Call));
862*06c3fb27SDimitry Andric   NodeOwner.push_back(
863*06c3fb27SDimitry Andric       std::make_unique<ContextNode>(/*IsAllocation=*/true, Call));
864*06c3fb27SDimitry Andric   ContextNode *AllocNode = NodeOwner.back().get();
865*06c3fb27SDimitry Andric   AllocationCallToContextNodeMap[Call] = AllocNode;
866*06c3fb27SDimitry Andric   NodeToCallingFunc[AllocNode] = F;
867*06c3fb27SDimitry Andric   // Use LastContextId as a uniq id for MIB allocation nodes.
868*06c3fb27SDimitry Andric   AllocNode->OrigStackOrAllocId = LastContextId;
869*06c3fb27SDimitry Andric   // Alloc type should be updated as we add in the MIBs. We should assert
870*06c3fb27SDimitry Andric   // afterwards that it is not still None.
871*06c3fb27SDimitry Andric   AllocNode->AllocTypes = (uint8_t)AllocationType::None;
872*06c3fb27SDimitry Andric 
873*06c3fb27SDimitry Andric   return AllocNode;
874*06c3fb27SDimitry Andric }
875*06c3fb27SDimitry Andric 
876*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
877*06c3fb27SDimitry Andric template <class NodeT, class IteratorT>
878*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
879*06c3fb27SDimitry Andric     ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
880*06c3fb27SDimitry Andric     CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType) {
881*06c3fb27SDimitry Andric   // Treating the hot alloc type as NotCold before the disambiguation for "hot"
882*06c3fb27SDimitry Andric   // is done.
883*06c3fb27SDimitry Andric   if (AllocType == AllocationType::Hot)
884*06c3fb27SDimitry Andric     AllocType = AllocationType::NotCold;
885*06c3fb27SDimitry Andric 
886*06c3fb27SDimitry Andric   ContextIdToAllocationType[++LastContextId] = AllocType;
887*06c3fb27SDimitry Andric 
888*06c3fb27SDimitry Andric   // Update alloc type and context ids for this MIB.
889*06c3fb27SDimitry Andric   AllocNode->AllocTypes |= (uint8_t)AllocType;
890*06c3fb27SDimitry Andric   AllocNode->ContextIds.insert(LastContextId);
891*06c3fb27SDimitry Andric 
892*06c3fb27SDimitry Andric   // Now add or update nodes for each stack id in alloc's context.
893*06c3fb27SDimitry Andric   // Later when processing the stack ids on non-alloc callsites we will adjust
894*06c3fb27SDimitry Andric   // for any inlining in the context.
895*06c3fb27SDimitry Andric   ContextNode *PrevNode = AllocNode;
896*06c3fb27SDimitry Andric   // Look for recursion (direct recursion should have been collapsed by
897*06c3fb27SDimitry Andric   // module summary analysis, here we should just be detecting mutual
898*06c3fb27SDimitry Andric   // recursion). Mark these nodes so we don't try to clone.
899*06c3fb27SDimitry Andric   SmallSet<uint64_t, 8> StackIdSet;
900*06c3fb27SDimitry Andric   // Skip any on the allocation call (inlining).
901*06c3fb27SDimitry Andric   for (auto ContextIter = StackContext.beginAfterSharedPrefix(CallsiteContext);
902*06c3fb27SDimitry Andric        ContextIter != StackContext.end(); ++ContextIter) {
903*06c3fb27SDimitry Andric     auto StackId = getStackId(*ContextIter);
904*06c3fb27SDimitry Andric     ContextNode *StackNode = getNodeForStackId(StackId);
905*06c3fb27SDimitry Andric     if (!StackNode) {
906*06c3fb27SDimitry Andric       NodeOwner.push_back(
907*06c3fb27SDimitry Andric           std::make_unique<ContextNode>(/*IsAllocation=*/false));
908*06c3fb27SDimitry Andric       StackNode = NodeOwner.back().get();
909*06c3fb27SDimitry Andric       StackEntryIdToContextNodeMap[StackId] = StackNode;
910*06c3fb27SDimitry Andric       StackNode->OrigStackOrAllocId = StackId;
911*06c3fb27SDimitry Andric     }
912*06c3fb27SDimitry Andric     auto Ins = StackIdSet.insert(StackId);
913*06c3fb27SDimitry Andric     if (!Ins.second)
914*06c3fb27SDimitry Andric       StackNode->Recursive = true;
915*06c3fb27SDimitry Andric     StackNode->ContextIds.insert(LastContextId);
916*06c3fb27SDimitry Andric     StackNode->AllocTypes |= (uint8_t)AllocType;
917*06c3fb27SDimitry Andric     PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId);
918*06c3fb27SDimitry Andric     PrevNode = StackNode;
919*06c3fb27SDimitry Andric   }
920*06c3fb27SDimitry Andric }
921*06c3fb27SDimitry Andric 
922*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
923*06c3fb27SDimitry Andric DenseSet<uint32_t>
924*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
925*06c3fb27SDimitry Andric     const DenseSet<uint32_t> &StackSequenceContextIds,
926*06c3fb27SDimitry Andric     DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
927*06c3fb27SDimitry Andric   DenseSet<uint32_t> NewContextIds;
928*06c3fb27SDimitry Andric   for (auto OldId : StackSequenceContextIds) {
929*06c3fb27SDimitry Andric     NewContextIds.insert(++LastContextId);
930*06c3fb27SDimitry Andric     OldToNewContextIds[OldId].insert(LastContextId);
931*06c3fb27SDimitry Andric     assert(ContextIdToAllocationType.count(OldId));
932*06c3fb27SDimitry Andric     // The new context has the same allocation type as original.
933*06c3fb27SDimitry Andric     ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
934*06c3fb27SDimitry Andric   }
935*06c3fb27SDimitry Andric   return NewContextIds;
936*06c3fb27SDimitry Andric }
937*06c3fb27SDimitry Andric 
938*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
939*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
940*06c3fb27SDimitry Andric     propagateDuplicateContextIds(
941*06c3fb27SDimitry Andric         const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
942*06c3fb27SDimitry Andric   // Build a set of duplicated context ids corresponding to the input id set.
943*06c3fb27SDimitry Andric   auto GetNewIds = [&OldToNewContextIds](const DenseSet<uint32_t> &ContextIds) {
944*06c3fb27SDimitry Andric     DenseSet<uint32_t> NewIds;
945*06c3fb27SDimitry Andric     for (auto Id : ContextIds)
946*06c3fb27SDimitry Andric       if (auto NewId = OldToNewContextIds.find(Id);
947*06c3fb27SDimitry Andric           NewId != OldToNewContextIds.end())
948*06c3fb27SDimitry Andric         NewIds.insert(NewId->second.begin(), NewId->second.end());
949*06c3fb27SDimitry Andric     return NewIds;
950*06c3fb27SDimitry Andric   };
951*06c3fb27SDimitry Andric 
952*06c3fb27SDimitry Andric   // Recursively update context ids sets along caller edges.
953*06c3fb27SDimitry Andric   auto UpdateCallers = [&](ContextNode *Node,
954*06c3fb27SDimitry Andric                            DenseSet<const ContextEdge *> &Visited,
955*06c3fb27SDimitry Andric                            auto &&UpdateCallers) -> void {
956*06c3fb27SDimitry Andric     for (const auto &Edge : Node->CallerEdges) {
957*06c3fb27SDimitry Andric       auto Inserted = Visited.insert(Edge.get());
958*06c3fb27SDimitry Andric       if (!Inserted.second)
959*06c3fb27SDimitry Andric         continue;
960*06c3fb27SDimitry Andric       ContextNode *NextNode = Edge->Caller;
961*06c3fb27SDimitry Andric       DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Edge->getContextIds());
962*06c3fb27SDimitry Andric       // Only need to recursively iterate to NextNode via this caller edge if
963*06c3fb27SDimitry Andric       // it resulted in any added ids to NextNode.
964*06c3fb27SDimitry Andric       if (!NewIdsToAdd.empty()) {
965*06c3fb27SDimitry Andric         Edge->getContextIds().insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
966*06c3fb27SDimitry Andric         NextNode->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
967*06c3fb27SDimitry Andric         UpdateCallers(NextNode, Visited, UpdateCallers);
968*06c3fb27SDimitry Andric       }
969*06c3fb27SDimitry Andric     }
970*06c3fb27SDimitry Andric   };
971*06c3fb27SDimitry Andric 
972*06c3fb27SDimitry Andric   DenseSet<const ContextEdge *> Visited;
973*06c3fb27SDimitry Andric   for (auto &Entry : AllocationCallToContextNodeMap) {
974*06c3fb27SDimitry Andric     auto *Node = Entry.second;
975*06c3fb27SDimitry Andric     // Update ids on the allocation nodes before calling the recursive
976*06c3fb27SDimitry Andric     // update along caller edges, since this simplifies the logic during
977*06c3fb27SDimitry Andric     // that traversal.
978*06c3fb27SDimitry Andric     DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Node->ContextIds);
979*06c3fb27SDimitry Andric     Node->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
980*06c3fb27SDimitry Andric     UpdateCallers(Node, Visited, UpdateCallers);
981*06c3fb27SDimitry Andric   }
982*06c3fb27SDimitry Andric }
983*06c3fb27SDimitry Andric 
984*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
985*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
986*06c3fb27SDimitry Andric     ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee) {
987*06c3fb27SDimitry Andric   // Make a copy of the context ids, since this will be adjusted below as they
988*06c3fb27SDimitry Andric   // are moved.
989*06c3fb27SDimitry Andric   DenseSet<uint32_t> RemainingContextIds = NewNode->ContextIds;
990*06c3fb27SDimitry Andric   auto &OrigEdges =
991*06c3fb27SDimitry Andric       TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
992*06c3fb27SDimitry Andric   // Increment iterator in loop so that we can remove edges as needed.
993*06c3fb27SDimitry Andric   for (auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
994*06c3fb27SDimitry Andric     auto Edge = *EI;
995*06c3fb27SDimitry Andric     // Remove any matching context ids from Edge, return set that were found and
996*06c3fb27SDimitry Andric     // removed, these are the new edge's context ids. Also update the remaining
997*06c3fb27SDimitry Andric     // (not found ids).
998*06c3fb27SDimitry Andric     DenseSet<uint32_t> NewEdgeContextIds, NotFoundContextIds;
999*06c3fb27SDimitry Andric     set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1000*06c3fb27SDimitry Andric                  NotFoundContextIds);
1001*06c3fb27SDimitry Andric     RemainingContextIds.swap(NotFoundContextIds);
1002*06c3fb27SDimitry Andric     // If no matching context ids for this edge, skip it.
1003*06c3fb27SDimitry Andric     if (NewEdgeContextIds.empty()) {
1004*06c3fb27SDimitry Andric       ++EI;
1005*06c3fb27SDimitry Andric       continue;
1006*06c3fb27SDimitry Andric     }
1007*06c3fb27SDimitry Andric     if (TowardsCallee) {
1008*06c3fb27SDimitry Andric       auto NewEdge = std::make_shared<ContextEdge>(
1009*06c3fb27SDimitry Andric           Edge->Callee, NewNode, computeAllocType(NewEdgeContextIds),
1010*06c3fb27SDimitry Andric           NewEdgeContextIds);
1011*06c3fb27SDimitry Andric       NewNode->CalleeEdges.push_back(NewEdge);
1012*06c3fb27SDimitry Andric       NewEdge->Callee->CallerEdges.push_back(NewEdge);
1013*06c3fb27SDimitry Andric     } else {
1014*06c3fb27SDimitry Andric       auto NewEdge = std::make_shared<ContextEdge>(
1015*06c3fb27SDimitry Andric           NewNode, Edge->Caller, computeAllocType(NewEdgeContextIds),
1016*06c3fb27SDimitry Andric           NewEdgeContextIds);
1017*06c3fb27SDimitry Andric       NewNode->CallerEdges.push_back(NewEdge);
1018*06c3fb27SDimitry Andric       NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1019*06c3fb27SDimitry Andric     }
1020*06c3fb27SDimitry Andric     // Remove old edge if context ids empty.
1021*06c3fb27SDimitry Andric     if (Edge->getContextIds().empty()) {
1022*06c3fb27SDimitry Andric       if (TowardsCallee) {
1023*06c3fb27SDimitry Andric         Edge->Callee->eraseCallerEdge(Edge.get());
1024*06c3fb27SDimitry Andric         EI = OrigNode->CalleeEdges.erase(EI);
1025*06c3fb27SDimitry Andric       } else {
1026*06c3fb27SDimitry Andric         Edge->Caller->eraseCalleeEdge(Edge.get());
1027*06c3fb27SDimitry Andric         EI = OrigNode->CallerEdges.erase(EI);
1028*06c3fb27SDimitry Andric       }
1029*06c3fb27SDimitry Andric       continue;
1030*06c3fb27SDimitry Andric     }
1031*06c3fb27SDimitry Andric     ++EI;
1032*06c3fb27SDimitry Andric   }
1033*06c3fb27SDimitry Andric }
1034*06c3fb27SDimitry Andric 
1035*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1036*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1037*06c3fb27SDimitry Andric     assignStackNodesPostOrder(ContextNode *Node,
1038*06c3fb27SDimitry Andric                               DenseSet<const ContextNode *> &Visited,
1039*06c3fb27SDimitry Andric                               DenseMap<uint64_t, std::vector<CallContextInfo>>
1040*06c3fb27SDimitry Andric                                   &StackIdToMatchingCalls) {
1041*06c3fb27SDimitry Andric   auto Inserted = Visited.insert(Node);
1042*06c3fb27SDimitry Andric   if (!Inserted.second)
1043*06c3fb27SDimitry Andric     return;
1044*06c3fb27SDimitry Andric   // Post order traversal. Iterate over a copy since we may add nodes and
1045*06c3fb27SDimitry Andric   // therefore new callers during the recursive call, invalidating any
1046*06c3fb27SDimitry Andric   // iterator over the original edge vector. We don't need to process these
1047*06c3fb27SDimitry Andric   // new nodes as they were already processed on creation.
1048*06c3fb27SDimitry Andric   auto CallerEdges = Node->CallerEdges;
1049*06c3fb27SDimitry Andric   for (auto &Edge : CallerEdges) {
1050*06c3fb27SDimitry Andric     // Skip any that have been removed during the recursion.
1051*06c3fb27SDimitry Andric     if (!Edge)
1052*06c3fb27SDimitry Andric       continue;
1053*06c3fb27SDimitry Andric     assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls);
1054*06c3fb27SDimitry Andric   }
1055*06c3fb27SDimitry Andric 
1056*06c3fb27SDimitry Andric   // If this node's stack id is in the map, update the graph to contain new
1057*06c3fb27SDimitry Andric   // nodes representing any inlining at interior callsites. Note we move the
1058*06c3fb27SDimitry Andric   // associated context ids over to the new nodes.
1059*06c3fb27SDimitry Andric 
1060*06c3fb27SDimitry Andric   // Ignore this node if it is for an allocation or we didn't record any
1061*06c3fb27SDimitry Andric   // stack id lists ending at it.
1062*06c3fb27SDimitry Andric   if (Node->IsAllocation ||
1063*06c3fb27SDimitry Andric       !StackIdToMatchingCalls.count(Node->OrigStackOrAllocId))
1064*06c3fb27SDimitry Andric     return;
1065*06c3fb27SDimitry Andric 
1066*06c3fb27SDimitry Andric   auto &Calls = StackIdToMatchingCalls[Node->OrigStackOrAllocId];
1067*06c3fb27SDimitry Andric   // Handle the simple case first. A single call with a single stack id.
1068*06c3fb27SDimitry Andric   // In this case there is no need to create any new context nodes, simply
1069*06c3fb27SDimitry Andric   // assign the context node for stack id to this Call.
1070*06c3fb27SDimitry Andric   if (Calls.size() == 1) {
1071*06c3fb27SDimitry Andric     auto &[Call, Ids, Func, SavedContextIds] = Calls[0];
1072*06c3fb27SDimitry Andric     if (Ids.size() == 1) {
1073*06c3fb27SDimitry Andric       assert(SavedContextIds.empty());
1074*06c3fb27SDimitry Andric       // It should be this Node
1075*06c3fb27SDimitry Andric       assert(Node == getNodeForStackId(Ids[0]));
1076*06c3fb27SDimitry Andric       if (Node->Recursive)
1077*06c3fb27SDimitry Andric         return;
1078*06c3fb27SDimitry Andric       Node->setCall(Call);
1079*06c3fb27SDimitry Andric       NonAllocationCallToContextNodeMap[Call] = Node;
1080*06c3fb27SDimitry Andric       NodeToCallingFunc[Node] = Func;
1081*06c3fb27SDimitry Andric       return;
1082*06c3fb27SDimitry Andric     }
1083*06c3fb27SDimitry Andric   }
1084*06c3fb27SDimitry Andric 
1085*06c3fb27SDimitry Andric   // Find the node for the last stack id, which should be the same
1086*06c3fb27SDimitry Andric   // across all calls recorded for this id, and is this node's id.
1087*06c3fb27SDimitry Andric   uint64_t LastId = Node->OrigStackOrAllocId;
1088*06c3fb27SDimitry Andric   ContextNode *LastNode = getNodeForStackId(LastId);
1089*06c3fb27SDimitry Andric   // We should only have kept stack ids that had nodes.
1090*06c3fb27SDimitry Andric   assert(LastNode);
1091*06c3fb27SDimitry Andric 
1092*06c3fb27SDimitry Andric   for (unsigned I = 0; I < Calls.size(); I++) {
1093*06c3fb27SDimitry Andric     auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
1094*06c3fb27SDimitry Andric     // Skip any for which we didn't assign any ids, these don't get a node in
1095*06c3fb27SDimitry Andric     // the graph.
1096*06c3fb27SDimitry Andric     if (SavedContextIds.empty())
1097*06c3fb27SDimitry Andric       continue;
1098*06c3fb27SDimitry Andric 
1099*06c3fb27SDimitry Andric     assert(LastId == Ids.back());
1100*06c3fb27SDimitry Andric 
1101*06c3fb27SDimitry Andric     ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1102*06c3fb27SDimitry Andric     assert(FirstNode);
1103*06c3fb27SDimitry Andric 
1104*06c3fb27SDimitry Andric     // Recompute the context ids for this stack id sequence (the
1105*06c3fb27SDimitry Andric     // intersection of the context ids of the corresponding nodes).
1106*06c3fb27SDimitry Andric     // Start with the ids we saved in the map for this call, which could be
1107*06c3fb27SDimitry Andric     // duplicated context ids. We have to recompute as we might have overlap
1108*06c3fb27SDimitry Andric     // overlap between the saved context ids for different last nodes, and
1109*06c3fb27SDimitry Andric     // removed them already during the post order traversal.
1110*06c3fb27SDimitry Andric     set_intersect(SavedContextIds, FirstNode->ContextIds);
1111*06c3fb27SDimitry Andric     ContextNode *PrevNode = nullptr;
1112*06c3fb27SDimitry Andric     for (auto Id : Ids) {
1113*06c3fb27SDimitry Andric       ContextNode *CurNode = getNodeForStackId(Id);
1114*06c3fb27SDimitry Andric       // We should only have kept stack ids that had nodes and weren't
1115*06c3fb27SDimitry Andric       // recursive.
1116*06c3fb27SDimitry Andric       assert(CurNode);
1117*06c3fb27SDimitry Andric       assert(!CurNode->Recursive);
1118*06c3fb27SDimitry Andric       if (!PrevNode) {
1119*06c3fb27SDimitry Andric         PrevNode = CurNode;
1120*06c3fb27SDimitry Andric         continue;
1121*06c3fb27SDimitry Andric       }
1122*06c3fb27SDimitry Andric       auto *Edge = CurNode->findEdgeFromCallee(PrevNode);
1123*06c3fb27SDimitry Andric       if (!Edge) {
1124*06c3fb27SDimitry Andric         SavedContextIds.clear();
1125*06c3fb27SDimitry Andric         break;
1126*06c3fb27SDimitry Andric       }
1127*06c3fb27SDimitry Andric       PrevNode = CurNode;
1128*06c3fb27SDimitry Andric       set_intersect(SavedContextIds, Edge->getContextIds());
1129*06c3fb27SDimitry Andric 
1130*06c3fb27SDimitry Andric       // If we now have no context ids for clone, skip this call.
1131*06c3fb27SDimitry Andric       if (SavedContextIds.empty())
1132*06c3fb27SDimitry Andric         break;
1133*06c3fb27SDimitry Andric     }
1134*06c3fb27SDimitry Andric     if (SavedContextIds.empty())
1135*06c3fb27SDimitry Andric       continue;
1136*06c3fb27SDimitry Andric 
1137*06c3fb27SDimitry Andric     // Create new context node.
1138*06c3fb27SDimitry Andric     NodeOwner.push_back(
1139*06c3fb27SDimitry Andric         std::make_unique<ContextNode>(/*IsAllocation=*/false, Call));
1140*06c3fb27SDimitry Andric     ContextNode *NewNode = NodeOwner.back().get();
1141*06c3fb27SDimitry Andric     NodeToCallingFunc[NewNode] = Func;
1142*06c3fb27SDimitry Andric     NonAllocationCallToContextNodeMap[Call] = NewNode;
1143*06c3fb27SDimitry Andric     NewNode->ContextIds = SavedContextIds;
1144*06c3fb27SDimitry Andric     NewNode->AllocTypes = computeAllocType(NewNode->ContextIds);
1145*06c3fb27SDimitry Andric 
1146*06c3fb27SDimitry Andric     // Connect to callees of innermost stack frame in inlined call chain.
1147*06c3fb27SDimitry Andric     // This updates context ids for FirstNode's callee's to reflect those
1148*06c3fb27SDimitry Andric     // moved to NewNode.
1149*06c3fb27SDimitry Andric     connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true);
1150*06c3fb27SDimitry Andric 
1151*06c3fb27SDimitry Andric     // Connect to callers of outermost stack frame in inlined call chain.
1152*06c3fb27SDimitry Andric     // This updates context ids for FirstNode's caller's to reflect those
1153*06c3fb27SDimitry Andric     // moved to NewNode.
1154*06c3fb27SDimitry Andric     connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false);
1155*06c3fb27SDimitry Andric 
1156*06c3fb27SDimitry Andric     // Now we need to remove context ids from edges/nodes between First and
1157*06c3fb27SDimitry Andric     // Last Node.
1158*06c3fb27SDimitry Andric     PrevNode = nullptr;
1159*06c3fb27SDimitry Andric     for (auto Id : Ids) {
1160*06c3fb27SDimitry Andric       ContextNode *CurNode = getNodeForStackId(Id);
1161*06c3fb27SDimitry Andric       // We should only have kept stack ids that had nodes.
1162*06c3fb27SDimitry Andric       assert(CurNode);
1163*06c3fb27SDimitry Andric 
1164*06c3fb27SDimitry Andric       // Remove the context ids moved to NewNode from CurNode, and the
1165*06c3fb27SDimitry Andric       // edge from the prior node.
1166*06c3fb27SDimitry Andric       set_subtract(CurNode->ContextIds, NewNode->ContextIds);
1167*06c3fb27SDimitry Andric       if (PrevNode) {
1168*06c3fb27SDimitry Andric         auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1169*06c3fb27SDimitry Andric         assert(PrevEdge);
1170*06c3fb27SDimitry Andric         set_subtract(PrevEdge->getContextIds(), NewNode->ContextIds);
1171*06c3fb27SDimitry Andric         if (PrevEdge->getContextIds().empty()) {
1172*06c3fb27SDimitry Andric           PrevNode->eraseCallerEdge(PrevEdge);
1173*06c3fb27SDimitry Andric           CurNode->eraseCalleeEdge(PrevEdge);
1174*06c3fb27SDimitry Andric         }
1175*06c3fb27SDimitry Andric       }
1176*06c3fb27SDimitry Andric       PrevNode = CurNode;
1177*06c3fb27SDimitry Andric     }
1178*06c3fb27SDimitry Andric   }
1179*06c3fb27SDimitry Andric }
1180*06c3fb27SDimitry Andric 
1181*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1182*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1183*06c3fb27SDimitry Andric   // Map of stack id to all calls with that as the last (outermost caller)
1184*06c3fb27SDimitry Andric   // callsite id that has a context node (some might not due to pruning
1185*06c3fb27SDimitry Andric   // performed during matching of the allocation profile contexts).
1186*06c3fb27SDimitry Andric   // The CallContextInfo contains the Call and a list of its stack ids with
1187*06c3fb27SDimitry Andric   // ContextNodes, the function containing Call, and the set of context ids
1188*06c3fb27SDimitry Andric   // the analysis will eventually identify for use in any new node created
1189*06c3fb27SDimitry Andric   // for that callsite.
1190*06c3fb27SDimitry Andric   DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
1191*06c3fb27SDimitry Andric   for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1192*06c3fb27SDimitry Andric     for (auto &Call : CallsWithMetadata) {
1193*06c3fb27SDimitry Andric       // Ignore allocations, already handled.
1194*06c3fb27SDimitry Andric       if (AllocationCallToContextNodeMap.count(Call))
1195*06c3fb27SDimitry Andric         continue;
1196*06c3fb27SDimitry Andric       auto StackIdsWithContextNodes =
1197*06c3fb27SDimitry Andric           getStackIdsWithContextNodesForCall(Call.call());
1198*06c3fb27SDimitry Andric       // If there were no nodes created for MIBs on allocs (maybe this was in
1199*06c3fb27SDimitry Andric       // the unambiguous part of the MIB stack that was pruned), ignore.
1200*06c3fb27SDimitry Andric       if (StackIdsWithContextNodes.empty())
1201*06c3fb27SDimitry Andric         continue;
1202*06c3fb27SDimitry Andric       // Otherwise, record this Call along with the list of ids for the last
1203*06c3fb27SDimitry Andric       // (outermost caller) stack id with a node.
1204*06c3fb27SDimitry Andric       StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1205*06c3fb27SDimitry Andric           {Call.call(), StackIdsWithContextNodes, Func, {}});
1206*06c3fb27SDimitry Andric     }
1207*06c3fb27SDimitry Andric   }
1208*06c3fb27SDimitry Andric 
1209*06c3fb27SDimitry Andric   // First make a pass through all stack ids that correspond to a call,
1210*06c3fb27SDimitry Andric   // as identified in the above loop. Compute the context ids corresponding to
1211*06c3fb27SDimitry Andric   // each of these calls when they correspond to multiple stack ids due to
1212*06c3fb27SDimitry Andric   // due to inlining. Perform any duplication of context ids required when
1213*06c3fb27SDimitry Andric   // there is more than one call with the same stack ids. Their (possibly newly
1214*06c3fb27SDimitry Andric   // duplicated) context ids are saved in the StackIdToMatchingCalls map.
1215*06c3fb27SDimitry Andric   DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
1216*06c3fb27SDimitry Andric   for (auto &It : StackIdToMatchingCalls) {
1217*06c3fb27SDimitry Andric     auto &Calls = It.getSecond();
1218*06c3fb27SDimitry Andric     // Skip single calls with a single stack id. These don't need a new node.
1219*06c3fb27SDimitry Andric     if (Calls.size() == 1) {
1220*06c3fb27SDimitry Andric       auto &Ids = std::get<1>(Calls[0]);
1221*06c3fb27SDimitry Andric       if (Ids.size() == 1)
1222*06c3fb27SDimitry Andric         continue;
1223*06c3fb27SDimitry Andric     }
1224*06c3fb27SDimitry Andric     // In order to do the best and maximal matching of inlined calls to context
1225*06c3fb27SDimitry Andric     // node sequences we will sort the vectors of stack ids in descending order
1226*06c3fb27SDimitry Andric     // of length, and within each length, lexicographically by stack id. The
1227*06c3fb27SDimitry Andric     // latter is so that we can specially handle calls that have identical stack
1228*06c3fb27SDimitry Andric     // id sequences (either due to cloning or artificially because of the MIB
1229*06c3fb27SDimitry Andric     // context pruning).
1230*06c3fb27SDimitry Andric     std::stable_sort(Calls.begin(), Calls.end(),
1231*06c3fb27SDimitry Andric                      [](const CallContextInfo &A, const CallContextInfo &B) {
1232*06c3fb27SDimitry Andric                        auto &IdsA = std::get<1>(A);
1233*06c3fb27SDimitry Andric                        auto &IdsB = std::get<1>(B);
1234*06c3fb27SDimitry Andric                        return IdsA.size() > IdsB.size() ||
1235*06c3fb27SDimitry Andric                               (IdsA.size() == IdsB.size() && IdsA < IdsB);
1236*06c3fb27SDimitry Andric                      });
1237*06c3fb27SDimitry Andric 
1238*06c3fb27SDimitry Andric     // Find the node for the last stack id, which should be the same
1239*06c3fb27SDimitry Andric     // across all calls recorded for this id, and is the id for this
1240*06c3fb27SDimitry Andric     // entry in the StackIdToMatchingCalls map.
1241*06c3fb27SDimitry Andric     uint64_t LastId = It.getFirst();
1242*06c3fb27SDimitry Andric     ContextNode *LastNode = getNodeForStackId(LastId);
1243*06c3fb27SDimitry Andric     // We should only have kept stack ids that had nodes.
1244*06c3fb27SDimitry Andric     assert(LastNode);
1245*06c3fb27SDimitry Andric 
1246*06c3fb27SDimitry Andric     if (LastNode->Recursive)
1247*06c3fb27SDimitry Andric       continue;
1248*06c3fb27SDimitry Andric 
1249*06c3fb27SDimitry Andric     // Initialize the context ids with the last node's. We will subsequently
1250*06c3fb27SDimitry Andric     // refine the context ids by computing the intersection along all edges.
1251*06c3fb27SDimitry Andric     DenseSet<uint32_t> LastNodeContextIds = LastNode->ContextIds;
1252*06c3fb27SDimitry Andric     assert(!LastNodeContextIds.empty());
1253*06c3fb27SDimitry Andric 
1254*06c3fb27SDimitry Andric     for (unsigned I = 0; I < Calls.size(); I++) {
1255*06c3fb27SDimitry Andric       auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
1256*06c3fb27SDimitry Andric       assert(SavedContextIds.empty());
1257*06c3fb27SDimitry Andric       assert(LastId == Ids.back());
1258*06c3fb27SDimitry Andric 
1259*06c3fb27SDimitry Andric       // First compute the context ids for this stack id sequence (the
1260*06c3fb27SDimitry Andric       // intersection of the context ids of the corresponding nodes).
1261*06c3fb27SDimitry Andric       // Start with the remaining saved ids for the last node.
1262*06c3fb27SDimitry Andric       assert(!LastNodeContextIds.empty());
1263*06c3fb27SDimitry Andric       DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
1264*06c3fb27SDimitry Andric 
1265*06c3fb27SDimitry Andric       ContextNode *PrevNode = LastNode;
1266*06c3fb27SDimitry Andric       ContextNode *CurNode = LastNode;
1267*06c3fb27SDimitry Andric       bool Skip = false;
1268*06c3fb27SDimitry Andric 
1269*06c3fb27SDimitry Andric       // Iterate backwards through the stack Ids, starting after the last Id
1270*06c3fb27SDimitry Andric       // in the list, which was handled once outside for all Calls.
1271*06c3fb27SDimitry Andric       for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1272*06c3fb27SDimitry Andric         auto Id = *IdIter;
1273*06c3fb27SDimitry Andric         CurNode = getNodeForStackId(Id);
1274*06c3fb27SDimitry Andric         // We should only have kept stack ids that had nodes.
1275*06c3fb27SDimitry Andric         assert(CurNode);
1276*06c3fb27SDimitry Andric 
1277*06c3fb27SDimitry Andric         if (CurNode->Recursive) {
1278*06c3fb27SDimitry Andric           Skip = true;
1279*06c3fb27SDimitry Andric           break;
1280*06c3fb27SDimitry Andric         }
1281*06c3fb27SDimitry Andric 
1282*06c3fb27SDimitry Andric         auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1283*06c3fb27SDimitry Andric         // If there is no edge then the nodes belong to different MIB contexts,
1284*06c3fb27SDimitry Andric         // and we should skip this inlined context sequence. For example, this
1285*06c3fb27SDimitry Andric         // particular inlined context may include stack ids A->B, and we may
1286*06c3fb27SDimitry Andric         // indeed have nodes for both A and B, but it is possible that they were
1287*06c3fb27SDimitry Andric         // never profiled in sequence in a single MIB for any allocation (i.e.
1288*06c3fb27SDimitry Andric         // we might have profiled an allocation that involves the callsite A,
1289*06c3fb27SDimitry Andric         // but through a different one of its callee callsites, and we might
1290*06c3fb27SDimitry Andric         // have profiled an allocation that involves callsite B, but reached
1291*06c3fb27SDimitry Andric         // from a different caller callsite).
1292*06c3fb27SDimitry Andric         if (!Edge) {
1293*06c3fb27SDimitry Andric           Skip = true;
1294*06c3fb27SDimitry Andric           break;
1295*06c3fb27SDimitry Andric         }
1296*06c3fb27SDimitry Andric         PrevNode = CurNode;
1297*06c3fb27SDimitry Andric 
1298*06c3fb27SDimitry Andric         // Update the context ids, which is the intersection of the ids along
1299*06c3fb27SDimitry Andric         // all edges in the sequence.
1300*06c3fb27SDimitry Andric         set_intersect(StackSequenceContextIds, Edge->getContextIds());
1301*06c3fb27SDimitry Andric 
1302*06c3fb27SDimitry Andric         // If we now have no context ids for clone, skip this call.
1303*06c3fb27SDimitry Andric         if (StackSequenceContextIds.empty()) {
1304*06c3fb27SDimitry Andric           Skip = true;
1305*06c3fb27SDimitry Andric           break;
1306*06c3fb27SDimitry Andric         }
1307*06c3fb27SDimitry Andric       }
1308*06c3fb27SDimitry Andric       if (Skip)
1309*06c3fb27SDimitry Andric         continue;
1310*06c3fb27SDimitry Andric 
1311*06c3fb27SDimitry Andric       // If some of this call's stack ids did not have corresponding nodes (due
1312*06c3fb27SDimitry Andric       // to pruning), don't include any context ids for contexts that extend
1313*06c3fb27SDimitry Andric       // beyond these nodes. Otherwise we would be matching part of unrelated /
1314*06c3fb27SDimitry Andric       // not fully matching stack contexts. To do this, subtract any context ids
1315*06c3fb27SDimitry Andric       // found in caller nodes of the last node found above.
1316*06c3fb27SDimitry Andric       if (Ids.back() != getLastStackId(Call)) {
1317*06c3fb27SDimitry Andric         for (const auto &PE : CurNode->CallerEdges) {
1318*06c3fb27SDimitry Andric           set_subtract(StackSequenceContextIds, PE->getContextIds());
1319*06c3fb27SDimitry Andric           if (StackSequenceContextIds.empty())
1320*06c3fb27SDimitry Andric             break;
1321*06c3fb27SDimitry Andric         }
1322*06c3fb27SDimitry Andric         // If we now have no context ids for clone, skip this call.
1323*06c3fb27SDimitry Andric         if (StackSequenceContextIds.empty())
1324*06c3fb27SDimitry Andric           continue;
1325*06c3fb27SDimitry Andric       }
1326*06c3fb27SDimitry Andric 
1327*06c3fb27SDimitry Andric       // Check if the next set of stack ids is the same (since the Calls vector
1328*06c3fb27SDimitry Andric       // of tuples is sorted by the stack ids we can just look at the next one).
1329*06c3fb27SDimitry Andric       bool DuplicateContextIds = false;
1330*06c3fb27SDimitry Andric       if (I + 1 < Calls.size()) {
1331*06c3fb27SDimitry Andric         auto NextIds = std::get<1>(Calls[I + 1]);
1332*06c3fb27SDimitry Andric         DuplicateContextIds = Ids == NextIds;
1333*06c3fb27SDimitry Andric       }
1334*06c3fb27SDimitry Andric 
1335*06c3fb27SDimitry Andric       // If we don't have duplicate context ids, then we can assign all the
1336*06c3fb27SDimitry Andric       // context ids computed for the original node sequence to this call.
1337*06c3fb27SDimitry Andric       // If there are duplicate calls with the same stack ids then we synthesize
1338*06c3fb27SDimitry Andric       // new context ids that are duplicates of the originals. These are
1339*06c3fb27SDimitry Andric       // assigned to SavedContextIds, which is a reference into the map entry
1340*06c3fb27SDimitry Andric       // for this call, allowing us to access these ids later on.
1341*06c3fb27SDimitry Andric       OldToNewContextIds.reserve(OldToNewContextIds.size() +
1342*06c3fb27SDimitry Andric                                  StackSequenceContextIds.size());
1343*06c3fb27SDimitry Andric       SavedContextIds =
1344*06c3fb27SDimitry Andric           DuplicateContextIds
1345*06c3fb27SDimitry Andric               ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
1346*06c3fb27SDimitry Andric               : StackSequenceContextIds;
1347*06c3fb27SDimitry Andric       assert(!SavedContextIds.empty());
1348*06c3fb27SDimitry Andric 
1349*06c3fb27SDimitry Andric       if (!DuplicateContextIds) {
1350*06c3fb27SDimitry Andric         // Update saved last node's context ids to remove those that are
1351*06c3fb27SDimitry Andric         // assigned to other calls, so that it is ready for the next call at
1352*06c3fb27SDimitry Andric         // this stack id.
1353*06c3fb27SDimitry Andric         set_subtract(LastNodeContextIds, StackSequenceContextIds);
1354*06c3fb27SDimitry Andric         if (LastNodeContextIds.empty())
1355*06c3fb27SDimitry Andric           break;
1356*06c3fb27SDimitry Andric       }
1357*06c3fb27SDimitry Andric     }
1358*06c3fb27SDimitry Andric   }
1359*06c3fb27SDimitry Andric 
1360*06c3fb27SDimitry Andric   // Propagate the duplicate context ids over the graph.
1361*06c3fb27SDimitry Andric   propagateDuplicateContextIds(OldToNewContextIds);
1362*06c3fb27SDimitry Andric 
1363*06c3fb27SDimitry Andric   if (VerifyCCG)
1364*06c3fb27SDimitry Andric     check();
1365*06c3fb27SDimitry Andric 
1366*06c3fb27SDimitry Andric   // Now perform a post-order traversal over the graph, starting with the
1367*06c3fb27SDimitry Andric   // allocation nodes, essentially processing nodes from callers to callees.
1368*06c3fb27SDimitry Andric   // For any that contains an id in the map, update the graph to contain new
1369*06c3fb27SDimitry Andric   // nodes representing any inlining at interior callsites. Note we move the
1370*06c3fb27SDimitry Andric   // associated context ids over to the new nodes.
1371*06c3fb27SDimitry Andric   DenseSet<const ContextNode *> Visited;
1372*06c3fb27SDimitry Andric   for (auto &Entry : AllocationCallToContextNodeMap)
1373*06c3fb27SDimitry Andric     assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls);
1374*06c3fb27SDimitry Andric }
1375*06c3fb27SDimitry Andric 
1376*06c3fb27SDimitry Andric uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) {
1377*06c3fb27SDimitry Andric   CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
1378*06c3fb27SDimitry Andric       Call->getMetadata(LLVMContext::MD_callsite));
1379*06c3fb27SDimitry Andric   return CallsiteContext.back();
1380*06c3fb27SDimitry Andric }
1381*06c3fb27SDimitry Andric 
1382*06c3fb27SDimitry Andric uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {
1383*06c3fb27SDimitry Andric   assert(isa<CallsiteInfo *>(Call.getBase()));
1384*06c3fb27SDimitry Andric   CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
1385*06c3fb27SDimitry Andric       CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase()));
1386*06c3fb27SDimitry Andric   // Need to convert index into stack id.
1387*06c3fb27SDimitry Andric   return Index.getStackIdAtIndex(CallsiteContext.back());
1388*06c3fb27SDimitry Andric }
1389*06c3fb27SDimitry Andric 
1390*06c3fb27SDimitry Andric static const std::string MemProfCloneSuffix = ".memprof.";
1391*06c3fb27SDimitry Andric 
1392*06c3fb27SDimitry Andric static std::string getMemProfFuncName(Twine Base, unsigned CloneNo) {
1393*06c3fb27SDimitry Andric   // We use CloneNo == 0 to refer to the original version, which doesn't get
1394*06c3fb27SDimitry Andric   // renamed with a suffix.
1395*06c3fb27SDimitry Andric   if (!CloneNo)
1396*06c3fb27SDimitry Andric     return Base.str();
1397*06c3fb27SDimitry Andric   return (Base + MemProfCloneSuffix + Twine(CloneNo)).str();
1398*06c3fb27SDimitry Andric }
1399*06c3fb27SDimitry Andric 
1400*06c3fb27SDimitry Andric std::string ModuleCallsiteContextGraph::getLabel(const Function *Func,
1401*06c3fb27SDimitry Andric                                                  const Instruction *Call,
1402*06c3fb27SDimitry Andric                                                  unsigned CloneNo) const {
1403*06c3fb27SDimitry Andric   return (Twine(Call->getFunction()->getName()) + " -> " +
1404*06c3fb27SDimitry Andric           cast<CallBase>(Call)->getCalledFunction()->getName())
1405*06c3fb27SDimitry Andric       .str();
1406*06c3fb27SDimitry Andric }
1407*06c3fb27SDimitry Andric 
1408*06c3fb27SDimitry Andric std::string IndexCallsiteContextGraph::getLabel(const FunctionSummary *Func,
1409*06c3fb27SDimitry Andric                                                 const IndexCall &Call,
1410*06c3fb27SDimitry Andric                                                 unsigned CloneNo) const {
1411*06c3fb27SDimitry Andric   auto VI = FSToVIMap.find(Func);
1412*06c3fb27SDimitry Andric   assert(VI != FSToVIMap.end());
1413*06c3fb27SDimitry Andric   if (isa<AllocInfo *>(Call.getBase()))
1414*06c3fb27SDimitry Andric     return (VI->second.name() + " -> alloc").str();
1415*06c3fb27SDimitry Andric   else {
1416*06c3fb27SDimitry Andric     auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(Call.getBase());
1417*06c3fb27SDimitry Andric     return (VI->second.name() + " -> " +
1418*06c3fb27SDimitry Andric             getMemProfFuncName(Callsite->Callee.name(),
1419*06c3fb27SDimitry Andric                                Callsite->Clones[CloneNo]))
1420*06c3fb27SDimitry Andric         .str();
1421*06c3fb27SDimitry Andric   }
1422*06c3fb27SDimitry Andric }
1423*06c3fb27SDimitry Andric 
1424*06c3fb27SDimitry Andric std::vector<uint64_t>
1425*06c3fb27SDimitry Andric ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
1426*06c3fb27SDimitry Andric     Instruction *Call) {
1427*06c3fb27SDimitry Andric   CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
1428*06c3fb27SDimitry Andric       Call->getMetadata(LLVMContext::MD_callsite));
1429*06c3fb27SDimitry Andric   return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
1430*06c3fb27SDimitry Andric       CallsiteContext);
1431*06c3fb27SDimitry Andric }
1432*06c3fb27SDimitry Andric 
1433*06c3fb27SDimitry Andric std::vector<uint64_t>
1434*06c3fb27SDimitry Andric IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) {
1435*06c3fb27SDimitry Andric   assert(isa<CallsiteInfo *>(Call.getBase()));
1436*06c3fb27SDimitry Andric   CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
1437*06c3fb27SDimitry Andric       CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase()));
1438*06c3fb27SDimitry Andric   return getStackIdsWithContextNodes<CallsiteInfo,
1439*06c3fb27SDimitry Andric                                      SmallVector<unsigned>::const_iterator>(
1440*06c3fb27SDimitry Andric       CallsiteContext);
1441*06c3fb27SDimitry Andric }
1442*06c3fb27SDimitry Andric 
1443*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1444*06c3fb27SDimitry Andric template <class NodeT, class IteratorT>
1445*06c3fb27SDimitry Andric std::vector<uint64_t>
1446*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
1447*06c3fb27SDimitry Andric     CallStack<NodeT, IteratorT> &CallsiteContext) {
1448*06c3fb27SDimitry Andric   std::vector<uint64_t> StackIds;
1449*06c3fb27SDimitry Andric   for (auto IdOrIndex : CallsiteContext) {
1450*06c3fb27SDimitry Andric     auto StackId = getStackId(IdOrIndex);
1451*06c3fb27SDimitry Andric     ContextNode *Node = getNodeForStackId(StackId);
1452*06c3fb27SDimitry Andric     if (!Node)
1453*06c3fb27SDimitry Andric       break;
1454*06c3fb27SDimitry Andric     StackIds.push_back(StackId);
1455*06c3fb27SDimitry Andric   }
1456*06c3fb27SDimitry Andric   return StackIds;
1457*06c3fb27SDimitry Andric }
1458*06c3fb27SDimitry Andric 
1459*06c3fb27SDimitry Andric ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
1460*06c3fb27SDimitry Andric     Module &M, function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
1461*06c3fb27SDimitry Andric     : Mod(M), OREGetter(OREGetter) {
1462*06c3fb27SDimitry Andric   for (auto &F : M) {
1463*06c3fb27SDimitry Andric     std::vector<CallInfo> CallsWithMetadata;
1464*06c3fb27SDimitry Andric     for (auto &BB : F) {
1465*06c3fb27SDimitry Andric       for (auto &I : BB) {
1466*06c3fb27SDimitry Andric         if (!isa<CallBase>(I))
1467*06c3fb27SDimitry Andric           continue;
1468*06c3fb27SDimitry Andric         if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) {
1469*06c3fb27SDimitry Andric           CallsWithMetadata.push_back(&I);
1470*06c3fb27SDimitry Andric           auto *AllocNode = addAllocNode(&I, &F);
1471*06c3fb27SDimitry Andric           auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite);
1472*06c3fb27SDimitry Andric           assert(CallsiteMD);
1473*06c3fb27SDimitry Andric           CallStack<MDNode, MDNode::op_iterator> CallsiteContext(CallsiteMD);
1474*06c3fb27SDimitry Andric           // Add all of the MIBs and their stack nodes.
1475*06c3fb27SDimitry Andric           for (auto &MDOp : MemProfMD->operands()) {
1476*06c3fb27SDimitry Andric             auto *MIBMD = cast<const MDNode>(MDOp);
1477*06c3fb27SDimitry Andric             MDNode *StackNode = getMIBStackNode(MIBMD);
1478*06c3fb27SDimitry Andric             assert(StackNode);
1479*06c3fb27SDimitry Andric             CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode);
1480*06c3fb27SDimitry Andric             addStackNodesForMIB<MDNode, MDNode::op_iterator>(
1481*06c3fb27SDimitry Andric                 AllocNode, StackContext, CallsiteContext,
1482*06c3fb27SDimitry Andric                 getMIBAllocType(MIBMD));
1483*06c3fb27SDimitry Andric           }
1484*06c3fb27SDimitry Andric           assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
1485*06c3fb27SDimitry Andric           // Memprof and callsite metadata on memory allocations no longer
1486*06c3fb27SDimitry Andric           // needed.
1487*06c3fb27SDimitry Andric           I.setMetadata(LLVMContext::MD_memprof, nullptr);
1488*06c3fb27SDimitry Andric           I.setMetadata(LLVMContext::MD_callsite, nullptr);
1489*06c3fb27SDimitry Andric         }
1490*06c3fb27SDimitry Andric         // For callsite metadata, add to list for this function for later use.
1491*06c3fb27SDimitry Andric         else if (I.getMetadata(LLVMContext::MD_callsite))
1492*06c3fb27SDimitry Andric           CallsWithMetadata.push_back(&I);
1493*06c3fb27SDimitry Andric       }
1494*06c3fb27SDimitry Andric     }
1495*06c3fb27SDimitry Andric     if (!CallsWithMetadata.empty())
1496*06c3fb27SDimitry Andric       FuncToCallsWithMetadata.push_back({&F, CallsWithMetadata});
1497*06c3fb27SDimitry Andric   }
1498*06c3fb27SDimitry Andric 
1499*06c3fb27SDimitry Andric   if (DumpCCG) {
1500*06c3fb27SDimitry Andric     dbgs() << "CCG before updating call stack chains:\n";
1501*06c3fb27SDimitry Andric     dbgs() << *this;
1502*06c3fb27SDimitry Andric   }
1503*06c3fb27SDimitry Andric 
1504*06c3fb27SDimitry Andric   if (ExportToDot)
1505*06c3fb27SDimitry Andric     exportToDot("prestackupdate");
1506*06c3fb27SDimitry Andric 
1507*06c3fb27SDimitry Andric   updateStackNodes();
1508*06c3fb27SDimitry Andric 
1509*06c3fb27SDimitry Andric   handleCallsitesWithMultipleTargets();
1510*06c3fb27SDimitry Andric 
1511*06c3fb27SDimitry Andric   // Strip off remaining callsite metadata, no longer needed.
1512*06c3fb27SDimitry Andric   for (auto &FuncEntry : FuncToCallsWithMetadata)
1513*06c3fb27SDimitry Andric     for (auto &Call : FuncEntry.second)
1514*06c3fb27SDimitry Andric       Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr);
1515*06c3fb27SDimitry Andric }
1516*06c3fb27SDimitry Andric 
1517*06c3fb27SDimitry Andric IndexCallsiteContextGraph::IndexCallsiteContextGraph(
1518*06c3fb27SDimitry Andric     ModuleSummaryIndex &Index,
1519*06c3fb27SDimitry Andric     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1520*06c3fb27SDimitry Andric         isPrevailing)
1521*06c3fb27SDimitry Andric     : Index(Index) {
1522*06c3fb27SDimitry Andric   for (auto &I : Index) {
1523*06c3fb27SDimitry Andric     auto VI = Index.getValueInfo(I);
1524*06c3fb27SDimitry Andric     for (auto &S : VI.getSummaryList()) {
1525*06c3fb27SDimitry Andric       // We should only add the prevailing nodes. Otherwise we may try to clone
1526*06c3fb27SDimitry Andric       // in a weak copy that won't be linked (and may be different than the
1527*06c3fb27SDimitry Andric       // prevailing version).
1528*06c3fb27SDimitry Andric       // We only keep the memprof summary on the prevailing copy now when
1529*06c3fb27SDimitry Andric       // building the combined index, as a space optimization, however don't
1530*06c3fb27SDimitry Andric       // rely on this optimization. The linker doesn't resolve local linkage
1531*06c3fb27SDimitry Andric       // values so don't check whether those are prevailing.
1532*06c3fb27SDimitry Andric       if (!GlobalValue::isLocalLinkage(S->linkage()) &&
1533*06c3fb27SDimitry Andric           !isPrevailing(VI.getGUID(), S.get()))
1534*06c3fb27SDimitry Andric         continue;
1535*06c3fb27SDimitry Andric       auto *FS = dyn_cast<FunctionSummary>(S.get());
1536*06c3fb27SDimitry Andric       if (!FS)
1537*06c3fb27SDimitry Andric         continue;
1538*06c3fb27SDimitry Andric       std::vector<CallInfo> CallsWithMetadata;
1539*06c3fb27SDimitry Andric       if (!FS->allocs().empty()) {
1540*06c3fb27SDimitry Andric         for (auto &AN : FS->mutableAllocs()) {
1541*06c3fb27SDimitry Andric           // This can happen because of recursion elimination handling that
1542*06c3fb27SDimitry Andric           // currently exists in ModuleSummaryAnalysis. Skip these for now.
1543*06c3fb27SDimitry Andric           // We still added them to the summary because we need to be able to
1544*06c3fb27SDimitry Andric           // correlate properly in applyImport in the backends.
1545*06c3fb27SDimitry Andric           if (AN.MIBs.empty())
1546*06c3fb27SDimitry Andric             continue;
1547*06c3fb27SDimitry Andric           CallsWithMetadata.push_back({&AN});
1548*06c3fb27SDimitry Andric           auto *AllocNode = addAllocNode({&AN}, FS);
1549*06c3fb27SDimitry Andric           // Pass an empty CallStack to the CallsiteContext (second)
1550*06c3fb27SDimitry Andric           // parameter, since for ThinLTO we already collapsed out the inlined
1551*06c3fb27SDimitry Andric           // stack ids on the allocation call during ModuleSummaryAnalysis.
1552*06c3fb27SDimitry Andric           CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
1553*06c3fb27SDimitry Andric               EmptyContext;
1554*06c3fb27SDimitry Andric           // Now add all of the MIBs and their stack nodes.
1555*06c3fb27SDimitry Andric           for (auto &MIB : AN.MIBs) {
1556*06c3fb27SDimitry Andric             CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
1557*06c3fb27SDimitry Andric                 StackContext(&MIB);
1558*06c3fb27SDimitry Andric             addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
1559*06c3fb27SDimitry Andric                 AllocNode, StackContext, EmptyContext, MIB.AllocType);
1560*06c3fb27SDimitry Andric           }
1561*06c3fb27SDimitry Andric           assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
1562*06c3fb27SDimitry Andric           // Initialize version 0 on the summary alloc node to the current alloc
1563*06c3fb27SDimitry Andric           // type, unless it has both types in which case make it default, so
1564*06c3fb27SDimitry Andric           // that in the case where we aren't able to clone the original version
1565*06c3fb27SDimitry Andric           // always ends up with the default allocation behavior.
1566*06c3fb27SDimitry Andric           AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes);
1567*06c3fb27SDimitry Andric         }
1568*06c3fb27SDimitry Andric       }
1569*06c3fb27SDimitry Andric       // For callsite metadata, add to list for this function for later use.
1570*06c3fb27SDimitry Andric       if (!FS->callsites().empty())
1571*06c3fb27SDimitry Andric         for (auto &SN : FS->mutableCallsites())
1572*06c3fb27SDimitry Andric           CallsWithMetadata.push_back({&SN});
1573*06c3fb27SDimitry Andric 
1574*06c3fb27SDimitry Andric       if (!CallsWithMetadata.empty())
1575*06c3fb27SDimitry Andric         FuncToCallsWithMetadata.push_back({FS, CallsWithMetadata});
1576*06c3fb27SDimitry Andric 
1577*06c3fb27SDimitry Andric       if (!FS->allocs().empty() || !FS->callsites().empty())
1578*06c3fb27SDimitry Andric         FSToVIMap[FS] = VI;
1579*06c3fb27SDimitry Andric     }
1580*06c3fb27SDimitry Andric   }
1581*06c3fb27SDimitry Andric 
1582*06c3fb27SDimitry Andric   if (DumpCCG) {
1583*06c3fb27SDimitry Andric     dbgs() << "CCG before updating call stack chains:\n";
1584*06c3fb27SDimitry Andric     dbgs() << *this;
1585*06c3fb27SDimitry Andric   }
1586*06c3fb27SDimitry Andric 
1587*06c3fb27SDimitry Andric   if (ExportToDot)
1588*06c3fb27SDimitry Andric     exportToDot("prestackupdate");
1589*06c3fb27SDimitry Andric 
1590*06c3fb27SDimitry Andric   updateStackNodes();
1591*06c3fb27SDimitry Andric 
1592*06c3fb27SDimitry Andric   handleCallsitesWithMultipleTargets();
1593*06c3fb27SDimitry Andric }
1594*06c3fb27SDimitry Andric 
1595*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1596*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy,
1597*06c3fb27SDimitry Andric                           CallTy>::handleCallsitesWithMultipleTargets() {
1598*06c3fb27SDimitry Andric   // Look for and workaround callsites that call multiple functions.
1599*06c3fb27SDimitry Andric   // This can happen for indirect calls, which needs better handling, and in
1600*06c3fb27SDimitry Andric   // more rare cases (e.g. macro expansion).
1601*06c3fb27SDimitry Andric   // TODO: To fix this for indirect calls we will want to perform speculative
1602*06c3fb27SDimitry Andric   // devirtualization using either the normal PGO info with ICP, or using the
1603*06c3fb27SDimitry Andric   // information in the profiled MemProf contexts. We can do this prior to
1604*06c3fb27SDimitry Andric   // this transformation for regular LTO, and for ThinLTO we can simulate that
1605*06c3fb27SDimitry Andric   // effect in the summary and perform the actual speculative devirtualization
1606*06c3fb27SDimitry Andric   // while cloning in the ThinLTO backend.
1607*06c3fb27SDimitry Andric   for (auto Entry = NonAllocationCallToContextNodeMap.begin();
1608*06c3fb27SDimitry Andric        Entry != NonAllocationCallToContextNodeMap.end();) {
1609*06c3fb27SDimitry Andric     auto *Node = Entry->second;
1610*06c3fb27SDimitry Andric     assert(Node->Clones.empty());
1611*06c3fb27SDimitry Andric     // Check all node callees and see if in the same function.
1612*06c3fb27SDimitry Andric     bool Removed = false;
1613*06c3fb27SDimitry Andric     auto Call = Node->Call.call();
1614*06c3fb27SDimitry Andric     for (auto &Edge : Node->CalleeEdges) {
1615*06c3fb27SDimitry Andric       if (!Edge->Callee->hasCall())
1616*06c3fb27SDimitry Andric         continue;
1617*06c3fb27SDimitry Andric       assert(NodeToCallingFunc.count(Edge->Callee));
1618*06c3fb27SDimitry Andric       // Check if the called function matches that of the callee node.
1619*06c3fb27SDimitry Andric       if (calleeMatchesFunc(Call, NodeToCallingFunc[Edge->Callee]))
1620*06c3fb27SDimitry Andric         continue;
1621*06c3fb27SDimitry Andric       // Work around by setting Node to have a null call, so it gets
1622*06c3fb27SDimitry Andric       // skipped during cloning. Otherwise assignFunctions will assert
1623*06c3fb27SDimitry Andric       // because its data structures are not designed to handle this case.
1624*06c3fb27SDimitry Andric       Entry = NonAllocationCallToContextNodeMap.erase(Entry);
1625*06c3fb27SDimitry Andric       Node->setCall(CallInfo());
1626*06c3fb27SDimitry Andric       Removed = true;
1627*06c3fb27SDimitry Andric       break;
1628*06c3fb27SDimitry Andric     }
1629*06c3fb27SDimitry Andric     if (!Removed)
1630*06c3fb27SDimitry Andric       Entry++;
1631*06c3fb27SDimitry Andric   }
1632*06c3fb27SDimitry Andric }
1633*06c3fb27SDimitry Andric 
1634*06c3fb27SDimitry Andric uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {
1635*06c3fb27SDimitry Andric   // In the Module (IR) case this is already the Id.
1636*06c3fb27SDimitry Andric   return IdOrIndex;
1637*06c3fb27SDimitry Andric }
1638*06c3fb27SDimitry Andric 
1639*06c3fb27SDimitry Andric uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {
1640*06c3fb27SDimitry Andric   // In the Index case this is an index into the stack id list in the summary
1641*06c3fb27SDimitry Andric   // index, convert it to an Id.
1642*06c3fb27SDimitry Andric   return Index.getStackIdAtIndex(IdOrIndex);
1643*06c3fb27SDimitry Andric }
1644*06c3fb27SDimitry Andric 
1645*06c3fb27SDimitry Andric bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call,
1646*06c3fb27SDimitry Andric                                                    const Function *Func) {
1647*06c3fb27SDimitry Andric   auto *CB = dyn_cast<CallBase>(Call);
1648*06c3fb27SDimitry Andric   if (!CB->getCalledOperand())
1649*06c3fb27SDimitry Andric     return false;
1650*06c3fb27SDimitry Andric   auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
1651*06c3fb27SDimitry Andric   auto *CalleeFunc = dyn_cast<Function>(CalleeVal);
1652*06c3fb27SDimitry Andric   if (CalleeFunc == Func)
1653*06c3fb27SDimitry Andric     return true;
1654*06c3fb27SDimitry Andric   auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
1655*06c3fb27SDimitry Andric   return Alias && Alias->getAliasee() == Func;
1656*06c3fb27SDimitry Andric }
1657*06c3fb27SDimitry Andric 
1658*06c3fb27SDimitry Andric bool IndexCallsiteContextGraph::calleeMatchesFunc(IndexCall &Call,
1659*06c3fb27SDimitry Andric                                                   const FunctionSummary *Func) {
1660*06c3fb27SDimitry Andric   ValueInfo Callee =
1661*06c3fb27SDimitry Andric       dyn_cast_if_present<CallsiteInfo *>(Call.getBase())->Callee;
1662*06c3fb27SDimitry Andric   // If there is no summary list then this is a call to an externally defined
1663*06c3fb27SDimitry Andric   // symbol.
1664*06c3fb27SDimitry Andric   AliasSummary *Alias =
1665*06c3fb27SDimitry Andric       Callee.getSummaryList().empty()
1666*06c3fb27SDimitry Andric           ? nullptr
1667*06c3fb27SDimitry Andric           : dyn_cast<AliasSummary>(Callee.getSummaryList()[0].get());
1668*06c3fb27SDimitry Andric   assert(FSToVIMap.count(Func));
1669*06c3fb27SDimitry Andric   return Callee == FSToVIMap[Func] ||
1670*06c3fb27SDimitry Andric          // If callee is an alias, check the aliasee, since only function
1671*06c3fb27SDimitry Andric          // summary base objects will contain the stack node summaries and thus
1672*06c3fb27SDimitry Andric          // get a context node.
1673*06c3fb27SDimitry Andric          (Alias && Alias->getAliaseeVI() == FSToVIMap[Func]);
1674*06c3fb27SDimitry Andric }
1675*06c3fb27SDimitry Andric 
1676*06c3fb27SDimitry Andric static std::string getAllocTypeString(uint8_t AllocTypes) {
1677*06c3fb27SDimitry Andric   if (!AllocTypes)
1678*06c3fb27SDimitry Andric     return "None";
1679*06c3fb27SDimitry Andric   std::string Str;
1680*06c3fb27SDimitry Andric   if (AllocTypes & (uint8_t)AllocationType::NotCold)
1681*06c3fb27SDimitry Andric     Str += "NotCold";
1682*06c3fb27SDimitry Andric   if (AllocTypes & (uint8_t)AllocationType::Cold)
1683*06c3fb27SDimitry Andric     Str += "Cold";
1684*06c3fb27SDimitry Andric   return Str;
1685*06c3fb27SDimitry Andric }
1686*06c3fb27SDimitry Andric 
1687*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1688*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
1689*06c3fb27SDimitry Andric     const {
1690*06c3fb27SDimitry Andric   print(dbgs());
1691*06c3fb27SDimitry Andric   dbgs() << "\n";
1692*06c3fb27SDimitry Andric }
1693*06c3fb27SDimitry Andric 
1694*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1695*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
1696*06c3fb27SDimitry Andric     raw_ostream &OS) const {
1697*06c3fb27SDimitry Andric   OS << "Node " << this << "\n";
1698*06c3fb27SDimitry Andric   OS << "\t";
1699*06c3fb27SDimitry Andric   printCall(OS);
1700*06c3fb27SDimitry Andric   if (Recursive)
1701*06c3fb27SDimitry Andric     OS << " (recursive)";
1702*06c3fb27SDimitry Andric   OS << "\n";
1703*06c3fb27SDimitry Andric   OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n";
1704*06c3fb27SDimitry Andric   OS << "\tContextIds:";
1705*06c3fb27SDimitry Andric   std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
1706*06c3fb27SDimitry Andric   std::sort(SortedIds.begin(), SortedIds.end());
1707*06c3fb27SDimitry Andric   for (auto Id : SortedIds)
1708*06c3fb27SDimitry Andric     OS << " " << Id;
1709*06c3fb27SDimitry Andric   OS << "\n";
1710*06c3fb27SDimitry Andric   OS << "\tCalleeEdges:\n";
1711*06c3fb27SDimitry Andric   for (auto &Edge : CalleeEdges)
1712*06c3fb27SDimitry Andric     OS << "\t\t" << *Edge << "\n";
1713*06c3fb27SDimitry Andric   OS << "\tCallerEdges:\n";
1714*06c3fb27SDimitry Andric   for (auto &Edge : CallerEdges)
1715*06c3fb27SDimitry Andric     OS << "\t\t" << *Edge << "\n";
1716*06c3fb27SDimitry Andric   if (!Clones.empty()) {
1717*06c3fb27SDimitry Andric     OS << "\tClones: ";
1718*06c3fb27SDimitry Andric     FieldSeparator FS;
1719*06c3fb27SDimitry Andric     for (auto *Clone : Clones)
1720*06c3fb27SDimitry Andric       OS << FS << Clone;
1721*06c3fb27SDimitry Andric     OS << "\n";
1722*06c3fb27SDimitry Andric   } else if (CloneOf) {
1723*06c3fb27SDimitry Andric     OS << "\tClone of " << CloneOf << "\n";
1724*06c3fb27SDimitry Andric   }
1725*06c3fb27SDimitry Andric }
1726*06c3fb27SDimitry Andric 
1727*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1728*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
1729*06c3fb27SDimitry Andric     const {
1730*06c3fb27SDimitry Andric   print(dbgs());
1731*06c3fb27SDimitry Andric   dbgs() << "\n";
1732*06c3fb27SDimitry Andric }
1733*06c3fb27SDimitry Andric 
1734*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1735*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
1736*06c3fb27SDimitry Andric     raw_ostream &OS) const {
1737*06c3fb27SDimitry Andric   OS << "Edge from Callee " << Callee << " to Caller: " << Caller
1738*06c3fb27SDimitry Andric      << " AllocTypes: " << getAllocTypeString(AllocTypes);
1739*06c3fb27SDimitry Andric   OS << " ContextIds:";
1740*06c3fb27SDimitry Andric   std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
1741*06c3fb27SDimitry Andric   std::sort(SortedIds.begin(), SortedIds.end());
1742*06c3fb27SDimitry Andric   for (auto Id : SortedIds)
1743*06c3fb27SDimitry Andric     OS << " " << Id;
1744*06c3fb27SDimitry Andric }
1745*06c3fb27SDimitry Andric 
1746*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1747*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump() const {
1748*06c3fb27SDimitry Andric   print(dbgs());
1749*06c3fb27SDimitry Andric }
1750*06c3fb27SDimitry Andric 
1751*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1752*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
1753*06c3fb27SDimitry Andric     raw_ostream &OS) const {
1754*06c3fb27SDimitry Andric   OS << "Callsite Context Graph:\n";
1755*06c3fb27SDimitry Andric   using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
1756*06c3fb27SDimitry Andric   for (const auto Node : nodes<GraphType>(this)) {
1757*06c3fb27SDimitry Andric     if (Node->isRemoved())
1758*06c3fb27SDimitry Andric       continue;
1759*06c3fb27SDimitry Andric     Node->print(OS);
1760*06c3fb27SDimitry Andric     OS << "\n";
1761*06c3fb27SDimitry Andric   }
1762*06c3fb27SDimitry Andric }
1763*06c3fb27SDimitry Andric 
1764*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1765*06c3fb27SDimitry Andric static void checkEdge(
1766*06c3fb27SDimitry Andric     const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1767*06c3fb27SDimitry Andric   // Confirm that alloc type is not None and that we have at least one context
1768*06c3fb27SDimitry Andric   // id.
1769*06c3fb27SDimitry Andric   assert(Edge->AllocTypes != (uint8_t)AllocationType::None);
1770*06c3fb27SDimitry Andric   assert(!Edge->ContextIds.empty());
1771*06c3fb27SDimitry Andric }
1772*06c3fb27SDimitry Andric 
1773*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1774*06c3fb27SDimitry Andric static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
1775*06c3fb27SDimitry Andric                       bool CheckEdges = true) {
1776*06c3fb27SDimitry Andric   if (Node->isRemoved())
1777*06c3fb27SDimitry Andric     return;
1778*06c3fb27SDimitry Andric   // Node's context ids should be the union of both its callee and caller edge
1779*06c3fb27SDimitry Andric   // context ids.
1780*06c3fb27SDimitry Andric   if (Node->CallerEdges.size()) {
1781*06c3fb27SDimitry Andric     auto EI = Node->CallerEdges.begin();
1782*06c3fb27SDimitry Andric     auto &FirstEdge = *EI;
1783*06c3fb27SDimitry Andric     EI++;
1784*06c3fb27SDimitry Andric     DenseSet<uint32_t> CallerEdgeContextIds(FirstEdge->ContextIds);
1785*06c3fb27SDimitry Andric     for (; EI != Node->CallerEdges.end(); EI++) {
1786*06c3fb27SDimitry Andric       const auto &Edge = *EI;
1787*06c3fb27SDimitry Andric       if (CheckEdges)
1788*06c3fb27SDimitry Andric         checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1789*06c3fb27SDimitry Andric       set_union(CallerEdgeContextIds, Edge->ContextIds);
1790*06c3fb27SDimitry Andric     }
1791*06c3fb27SDimitry Andric     // Node can have more context ids than callers if some contexts terminate at
1792*06c3fb27SDimitry Andric     // node and some are longer.
1793*06c3fb27SDimitry Andric     assert(Node->ContextIds == CallerEdgeContextIds ||
1794*06c3fb27SDimitry Andric            set_is_subset(CallerEdgeContextIds, Node->ContextIds));
1795*06c3fb27SDimitry Andric   }
1796*06c3fb27SDimitry Andric   if (Node->CalleeEdges.size()) {
1797*06c3fb27SDimitry Andric     auto EI = Node->CalleeEdges.begin();
1798*06c3fb27SDimitry Andric     auto &FirstEdge = *EI;
1799*06c3fb27SDimitry Andric     EI++;
1800*06c3fb27SDimitry Andric     DenseSet<uint32_t> CalleeEdgeContextIds(FirstEdge->ContextIds);
1801*06c3fb27SDimitry Andric     for (; EI != Node->CalleeEdges.end(); EI++) {
1802*06c3fb27SDimitry Andric       const auto &Edge = *EI;
1803*06c3fb27SDimitry Andric       if (CheckEdges)
1804*06c3fb27SDimitry Andric         checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1805*06c3fb27SDimitry Andric       set_union(CalleeEdgeContextIds, Edge->ContextIds);
1806*06c3fb27SDimitry Andric     }
1807*06c3fb27SDimitry Andric     assert(Node->ContextIds == CalleeEdgeContextIds);
1808*06c3fb27SDimitry Andric   }
1809*06c3fb27SDimitry Andric }
1810*06c3fb27SDimitry Andric 
1811*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1812*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const {
1813*06c3fb27SDimitry Andric   using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
1814*06c3fb27SDimitry Andric   for (const auto Node : nodes<GraphType>(this)) {
1815*06c3fb27SDimitry Andric     checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false);
1816*06c3fb27SDimitry Andric     for (auto &Edge : Node->CallerEdges)
1817*06c3fb27SDimitry Andric       checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1818*06c3fb27SDimitry Andric   }
1819*06c3fb27SDimitry Andric }
1820*06c3fb27SDimitry Andric 
1821*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1822*06c3fb27SDimitry Andric struct GraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> {
1823*06c3fb27SDimitry Andric   using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
1824*06c3fb27SDimitry Andric   using NodeRef = const ContextNode<DerivedCCG, FuncTy, CallTy> *;
1825*06c3fb27SDimitry Andric 
1826*06c3fb27SDimitry Andric   using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
1827*06c3fb27SDimitry Andric   static NodeRef getNode(const NodePtrTy &P) { return P.get(); }
1828*06c3fb27SDimitry Andric 
1829*06c3fb27SDimitry Andric   using nodes_iterator =
1830*06c3fb27SDimitry Andric       mapped_iterator<typename std::vector<NodePtrTy>::const_iterator,
1831*06c3fb27SDimitry Andric                       decltype(&getNode)>;
1832*06c3fb27SDimitry Andric 
1833*06c3fb27SDimitry Andric   static nodes_iterator nodes_begin(GraphType G) {
1834*06c3fb27SDimitry Andric     return nodes_iterator(G->NodeOwner.begin(), &getNode);
1835*06c3fb27SDimitry Andric   }
1836*06c3fb27SDimitry Andric 
1837*06c3fb27SDimitry Andric   static nodes_iterator nodes_end(GraphType G) {
1838*06c3fb27SDimitry Andric     return nodes_iterator(G->NodeOwner.end(), &getNode);
1839*06c3fb27SDimitry Andric   }
1840*06c3fb27SDimitry Andric 
1841*06c3fb27SDimitry Andric   static NodeRef getEntryNode(GraphType G) {
1842*06c3fb27SDimitry Andric     return G->NodeOwner.begin()->get();
1843*06c3fb27SDimitry Andric   }
1844*06c3fb27SDimitry Andric 
1845*06c3fb27SDimitry Andric   using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
1846*06c3fb27SDimitry Andric   static const ContextNode<DerivedCCG, FuncTy, CallTy> *
1847*06c3fb27SDimitry Andric   GetCallee(const EdgePtrTy &P) {
1848*06c3fb27SDimitry Andric     return P->Callee;
1849*06c3fb27SDimitry Andric   }
1850*06c3fb27SDimitry Andric 
1851*06c3fb27SDimitry Andric   using ChildIteratorType =
1852*06c3fb27SDimitry Andric       mapped_iterator<typename std::vector<std::shared_ptr<ContextEdge<
1853*06c3fb27SDimitry Andric                           DerivedCCG, FuncTy, CallTy>>>::const_iterator,
1854*06c3fb27SDimitry Andric                       decltype(&GetCallee)>;
1855*06c3fb27SDimitry Andric 
1856*06c3fb27SDimitry Andric   static ChildIteratorType child_begin(NodeRef N) {
1857*06c3fb27SDimitry Andric     return ChildIteratorType(N->CalleeEdges.begin(), &GetCallee);
1858*06c3fb27SDimitry Andric   }
1859*06c3fb27SDimitry Andric 
1860*06c3fb27SDimitry Andric   static ChildIteratorType child_end(NodeRef N) {
1861*06c3fb27SDimitry Andric     return ChildIteratorType(N->CalleeEdges.end(), &GetCallee);
1862*06c3fb27SDimitry Andric   }
1863*06c3fb27SDimitry Andric };
1864*06c3fb27SDimitry Andric 
1865*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1866*06c3fb27SDimitry Andric struct DOTGraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>
1867*06c3fb27SDimitry Andric     : public DefaultDOTGraphTraits {
1868*06c3fb27SDimitry Andric   DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
1869*06c3fb27SDimitry Andric 
1870*06c3fb27SDimitry Andric   using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
1871*06c3fb27SDimitry Andric   using GTraits = GraphTraits<GraphType>;
1872*06c3fb27SDimitry Andric   using NodeRef = typename GTraits::NodeRef;
1873*06c3fb27SDimitry Andric   using ChildIteratorType = typename GTraits::ChildIteratorType;
1874*06c3fb27SDimitry Andric 
1875*06c3fb27SDimitry Andric   static std::string getNodeLabel(NodeRef Node, GraphType G) {
1876*06c3fb27SDimitry Andric     std::string LabelString =
1877*06c3fb27SDimitry Andric         (Twine("OrigId: ") + (Node->IsAllocation ? "Alloc" : "") +
1878*06c3fb27SDimitry Andric          Twine(Node->OrigStackOrAllocId))
1879*06c3fb27SDimitry Andric             .str();
1880*06c3fb27SDimitry Andric     LabelString += "\n";
1881*06c3fb27SDimitry Andric     if (Node->hasCall()) {
1882*06c3fb27SDimitry Andric       auto Func = G->NodeToCallingFunc.find(Node);
1883*06c3fb27SDimitry Andric       assert(Func != G->NodeToCallingFunc.end());
1884*06c3fb27SDimitry Andric       LabelString +=
1885*06c3fb27SDimitry Andric           G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());
1886*06c3fb27SDimitry Andric     } else {
1887*06c3fb27SDimitry Andric       LabelString += "null call";
1888*06c3fb27SDimitry Andric       if (Node->Recursive)
1889*06c3fb27SDimitry Andric         LabelString += " (recursive)";
1890*06c3fb27SDimitry Andric       else
1891*06c3fb27SDimitry Andric         LabelString += " (external)";
1892*06c3fb27SDimitry Andric     }
1893*06c3fb27SDimitry Andric     return LabelString;
1894*06c3fb27SDimitry Andric   }
1895*06c3fb27SDimitry Andric 
1896*06c3fb27SDimitry Andric   static std::string getNodeAttributes(NodeRef Node, GraphType) {
1897*06c3fb27SDimitry Andric     std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " +
1898*06c3fb27SDimitry Andric                                    getContextIds(Node->ContextIds) + "\"")
1899*06c3fb27SDimitry Andric                                       .str();
1900*06c3fb27SDimitry Andric     AttributeString +=
1901*06c3fb27SDimitry Andric         (Twine(",fillcolor=\"") + getColor(Node->AllocTypes) + "\"").str();
1902*06c3fb27SDimitry Andric     AttributeString += ",style=\"filled\"";
1903*06c3fb27SDimitry Andric     if (Node->CloneOf) {
1904*06c3fb27SDimitry Andric       AttributeString += ",color=\"blue\"";
1905*06c3fb27SDimitry Andric       AttributeString += ",style=\"filled,bold,dashed\"";
1906*06c3fb27SDimitry Andric     } else
1907*06c3fb27SDimitry Andric       AttributeString += ",style=\"filled\"";
1908*06c3fb27SDimitry Andric     return AttributeString;
1909*06c3fb27SDimitry Andric   }
1910*06c3fb27SDimitry Andric 
1911*06c3fb27SDimitry Andric   static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter,
1912*06c3fb27SDimitry Andric                                        GraphType) {
1913*06c3fb27SDimitry Andric     auto &Edge = *(ChildIter.getCurrent());
1914*06c3fb27SDimitry Andric     return (Twine("tooltip=\"") + getContextIds(Edge->ContextIds) + "\"" +
1915*06c3fb27SDimitry Andric             Twine(",fillcolor=\"") + getColor(Edge->AllocTypes) + "\"")
1916*06c3fb27SDimitry Andric         .str();
1917*06c3fb27SDimitry Andric   }
1918*06c3fb27SDimitry Andric 
1919*06c3fb27SDimitry Andric   // Since the NodeOwners list includes nodes that are no longer connected to
1920*06c3fb27SDimitry Andric   // the graph, skip them here.
1921*06c3fb27SDimitry Andric   static bool isNodeHidden(NodeRef Node, GraphType) {
1922*06c3fb27SDimitry Andric     return Node->isRemoved();
1923*06c3fb27SDimitry Andric   }
1924*06c3fb27SDimitry Andric 
1925*06c3fb27SDimitry Andric private:
1926*06c3fb27SDimitry Andric   static std::string getContextIds(const DenseSet<uint32_t> &ContextIds) {
1927*06c3fb27SDimitry Andric     std::string IdString = "ContextIds:";
1928*06c3fb27SDimitry Andric     if (ContextIds.size() < 100) {
1929*06c3fb27SDimitry Andric       std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
1930*06c3fb27SDimitry Andric       std::sort(SortedIds.begin(), SortedIds.end());
1931*06c3fb27SDimitry Andric       for (auto Id : SortedIds)
1932*06c3fb27SDimitry Andric         IdString += (" " + Twine(Id)).str();
1933*06c3fb27SDimitry Andric     } else {
1934*06c3fb27SDimitry Andric       IdString += (" (" + Twine(ContextIds.size()) + " ids)").str();
1935*06c3fb27SDimitry Andric     }
1936*06c3fb27SDimitry Andric     return IdString;
1937*06c3fb27SDimitry Andric   }
1938*06c3fb27SDimitry Andric 
1939*06c3fb27SDimitry Andric   static std::string getColor(uint8_t AllocTypes) {
1940*06c3fb27SDimitry Andric     if (AllocTypes == (uint8_t)AllocationType::NotCold)
1941*06c3fb27SDimitry Andric       // Color "brown1" actually looks like a lighter red.
1942*06c3fb27SDimitry Andric       return "brown1";
1943*06c3fb27SDimitry Andric     if (AllocTypes == (uint8_t)AllocationType::Cold)
1944*06c3fb27SDimitry Andric       return "cyan";
1945*06c3fb27SDimitry Andric     if (AllocTypes ==
1946*06c3fb27SDimitry Andric         ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
1947*06c3fb27SDimitry Andric       // Lighter purple.
1948*06c3fb27SDimitry Andric       return "mediumorchid1";
1949*06c3fb27SDimitry Andric     return "gray";
1950*06c3fb27SDimitry Andric   }
1951*06c3fb27SDimitry Andric 
1952*06c3fb27SDimitry Andric   static std::string getNodeId(NodeRef Node) {
1953*06c3fb27SDimitry Andric     std::stringstream SStream;
1954*06c3fb27SDimitry Andric     SStream << std::hex << "N0x" << (unsigned long long)Node;
1955*06c3fb27SDimitry Andric     std::string Result = SStream.str();
1956*06c3fb27SDimitry Andric     return Result;
1957*06c3fb27SDimitry Andric   }
1958*06c3fb27SDimitry Andric };
1959*06c3fb27SDimitry Andric 
1960*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1961*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
1962*06c3fb27SDimitry Andric     std::string Label) const {
1963*06c3fb27SDimitry Andric   WriteGraph(this, "", false, Label,
1964*06c3fb27SDimitry Andric              DotFilePathPrefix + "ccg." + Label + ".dot");
1965*06c3fb27SDimitry Andric }
1966*06c3fb27SDimitry Andric 
1967*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1968*06c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1969*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
1970*06c3fb27SDimitry Andric     const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI) {
1971*06c3fb27SDimitry Andric   ContextNode *Node = Edge->Callee;
1972*06c3fb27SDimitry Andric   NodeOwner.push_back(
1973*06c3fb27SDimitry Andric       std::make_unique<ContextNode>(Node->IsAllocation, Node->Call));
1974*06c3fb27SDimitry Andric   ContextNode *Clone = NodeOwner.back().get();
1975*06c3fb27SDimitry Andric   Node->addClone(Clone);
1976*06c3fb27SDimitry Andric   assert(NodeToCallingFunc.count(Node));
1977*06c3fb27SDimitry Andric   NodeToCallingFunc[Clone] = NodeToCallingFunc[Node];
1978*06c3fb27SDimitry Andric   moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true);
1979*06c3fb27SDimitry Andric   return Clone;
1980*06c3fb27SDimitry Andric }
1981*06c3fb27SDimitry Andric 
1982*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1983*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1984*06c3fb27SDimitry Andric     moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
1985*06c3fb27SDimitry Andric                                   ContextNode *NewCallee, EdgeIter *CallerEdgeI,
1986*06c3fb27SDimitry Andric                                   bool NewClone) {
1987*06c3fb27SDimitry Andric   // NewCallee and Edge's current callee must be clones of the same original
1988*06c3fb27SDimitry Andric   // node (Edge's current callee may be the original node too).
1989*06c3fb27SDimitry Andric   assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
1990*06c3fb27SDimitry Andric   auto &EdgeContextIds = Edge->getContextIds();
1991*06c3fb27SDimitry Andric   ContextNode *OldCallee = Edge->Callee;
1992*06c3fb27SDimitry Andric   if (CallerEdgeI)
1993*06c3fb27SDimitry Andric     *CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI);
1994*06c3fb27SDimitry Andric   else
1995*06c3fb27SDimitry Andric     OldCallee->eraseCallerEdge(Edge.get());
1996*06c3fb27SDimitry Andric   Edge->Callee = NewCallee;
1997*06c3fb27SDimitry Andric   NewCallee->CallerEdges.push_back(Edge);
1998*06c3fb27SDimitry Andric   // Don't need to update Edge's context ids since we are simply reconnecting
1999*06c3fb27SDimitry Andric   // it.
2000*06c3fb27SDimitry Andric   set_subtract(OldCallee->ContextIds, EdgeContextIds);
2001*06c3fb27SDimitry Andric   NewCallee->ContextIds.insert(EdgeContextIds.begin(), EdgeContextIds.end());
2002*06c3fb27SDimitry Andric   NewCallee->AllocTypes |= Edge->AllocTypes;
2003*06c3fb27SDimitry Andric   OldCallee->AllocTypes = computeAllocType(OldCallee->ContextIds);
2004*06c3fb27SDimitry Andric   // OldCallee alloc type should be None iff its context id set is now empty.
2005*06c3fb27SDimitry Andric   assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
2006*06c3fb27SDimitry Andric          OldCallee->ContextIds.empty());
2007*06c3fb27SDimitry Andric   // Now walk the old callee node's callee edges and move Edge's context ids
2008*06c3fb27SDimitry Andric   // over to the corresponding edge into the clone (which is created here if
2009*06c3fb27SDimitry Andric   // this is a newly created clone).
2010*06c3fb27SDimitry Andric   for (auto &OldCalleeEdge : OldCallee->CalleeEdges) {
2011*06c3fb27SDimitry Andric     // The context ids moving to the new callee are the subset of this edge's
2012*06c3fb27SDimitry Andric     // context ids and the context ids on the caller edge being moved.
2013*06c3fb27SDimitry Andric     DenseSet<uint32_t> EdgeContextIdsToMove =
2014*06c3fb27SDimitry Andric         set_intersection(OldCalleeEdge->getContextIds(), EdgeContextIds);
2015*06c3fb27SDimitry Andric     set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
2016*06c3fb27SDimitry Andric     OldCalleeEdge->AllocTypes =
2017*06c3fb27SDimitry Andric         computeAllocType(OldCalleeEdge->getContextIds());
2018*06c3fb27SDimitry Andric     if (!NewClone) {
2019*06c3fb27SDimitry Andric       // Update context ids / alloc type on corresponding edge to NewCallee.
2020*06c3fb27SDimitry Andric       // There is a chance this may not exist if we are reusing an existing
2021*06c3fb27SDimitry Andric       // clone, specifically during function assignment, where we would have
2022*06c3fb27SDimitry Andric       // removed none type edges after creating the clone. If we can't find
2023*06c3fb27SDimitry Andric       // a corresponding edge there, fall through to the cloning below.
2024*06c3fb27SDimitry Andric       if (auto *NewCalleeEdge =
2025*06c3fb27SDimitry Andric               NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) {
2026*06c3fb27SDimitry Andric         NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.begin(),
2027*06c3fb27SDimitry Andric                                               EdgeContextIdsToMove.end());
2028*06c3fb27SDimitry Andric         NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
2029*06c3fb27SDimitry Andric         continue;
2030*06c3fb27SDimitry Andric       }
2031*06c3fb27SDimitry Andric     }
2032*06c3fb27SDimitry Andric     auto NewEdge = std::make_shared<ContextEdge>(
2033*06c3fb27SDimitry Andric         OldCalleeEdge->Callee, NewCallee,
2034*06c3fb27SDimitry Andric         computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
2035*06c3fb27SDimitry Andric     NewCallee->CalleeEdges.push_back(NewEdge);
2036*06c3fb27SDimitry Andric     NewEdge->Callee->CallerEdges.push_back(NewEdge);
2037*06c3fb27SDimitry Andric   }
2038*06c3fb27SDimitry Andric   if (VerifyCCG) {
2039*06c3fb27SDimitry Andric     checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee, /*CheckEdges=*/false);
2040*06c3fb27SDimitry Andric     checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee, /*CheckEdges=*/false);
2041*06c3fb27SDimitry Andric     for (const auto &OldCalleeEdge : OldCallee->CalleeEdges)
2042*06c3fb27SDimitry Andric       checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee,
2043*06c3fb27SDimitry Andric                                             /*CheckEdges=*/false);
2044*06c3fb27SDimitry Andric     for (const auto &NewCalleeEdge : NewCallee->CalleeEdges)
2045*06c3fb27SDimitry Andric       checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee,
2046*06c3fb27SDimitry Andric                                             /*CheckEdges=*/false);
2047*06c3fb27SDimitry Andric   }
2048*06c3fb27SDimitry Andric }
2049*06c3fb27SDimitry Andric 
2050*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
2051*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
2052*06c3fb27SDimitry Andric   DenseSet<const ContextNode *> Visited;
2053*06c3fb27SDimitry Andric   for (auto &Entry : AllocationCallToContextNodeMap)
2054*06c3fb27SDimitry Andric     identifyClones(Entry.second, Visited);
2055*06c3fb27SDimitry Andric }
2056*06c3fb27SDimitry Andric 
2057*06c3fb27SDimitry Andric // helper function to check an AllocType is cold or notcold or both.
2058*06c3fb27SDimitry Andric bool checkColdOrNotCold(uint8_t AllocType) {
2059*06c3fb27SDimitry Andric   return (AllocType == (uint8_t)AllocationType::Cold) ||
2060*06c3fb27SDimitry Andric          (AllocType == (uint8_t)AllocationType::NotCold) ||
2061*06c3fb27SDimitry Andric          (AllocType ==
2062*06c3fb27SDimitry Andric           ((uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold));
2063*06c3fb27SDimitry Andric }
2064*06c3fb27SDimitry Andric 
2065*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
2066*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
2067*06c3fb27SDimitry Andric     ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
2068*06c3fb27SDimitry Andric   if (VerifyNodes)
2069*06c3fb27SDimitry Andric     checkNode<DerivedCCG, FuncTy, CallTy>(Node);
2070*06c3fb27SDimitry Andric   assert(!Node->CloneOf);
2071*06c3fb27SDimitry Andric 
2072*06c3fb27SDimitry Andric   // If Node as a null call, then either it wasn't found in the module (regular
2073*06c3fb27SDimitry Andric   // LTO) or summary index (ThinLTO), or there were other conditions blocking
2074*06c3fb27SDimitry Andric   // cloning (e.g. recursion, calls multiple targets, etc).
2075*06c3fb27SDimitry Andric   // Do this here so that we don't try to recursively clone callers below, which
2076*06c3fb27SDimitry Andric   // isn't useful at least for this node.
2077*06c3fb27SDimitry Andric   if (!Node->hasCall())
2078*06c3fb27SDimitry Andric     return;
2079*06c3fb27SDimitry Andric 
2080*06c3fb27SDimitry Andric #ifndef NDEBUG
2081*06c3fb27SDimitry Andric   auto Insert =
2082*06c3fb27SDimitry Andric #endif
2083*06c3fb27SDimitry Andric       Visited.insert(Node);
2084*06c3fb27SDimitry Andric   // We should not have visited this node yet.
2085*06c3fb27SDimitry Andric   assert(Insert.second);
2086*06c3fb27SDimitry Andric   // The recursive call to identifyClones may delete the current edge from the
2087*06c3fb27SDimitry Andric   // CallerEdges vector. Make a copy and iterate on that, simpler than passing
2088*06c3fb27SDimitry Andric   // in an iterator and having recursive call erase from it. Other edges may
2089*06c3fb27SDimitry Andric   // also get removed during the recursion, which will have null Callee and
2090*06c3fb27SDimitry Andric   // Caller pointers (and are deleted later), so we skip those below.
2091*06c3fb27SDimitry Andric   {
2092*06c3fb27SDimitry Andric     auto CallerEdges = Node->CallerEdges;
2093*06c3fb27SDimitry Andric     for (auto &Edge : CallerEdges) {
2094*06c3fb27SDimitry Andric       // Skip any that have been removed by an earlier recursive call.
2095*06c3fb27SDimitry Andric       if (Edge->Callee == nullptr && Edge->Caller == nullptr) {
2096*06c3fb27SDimitry Andric         assert(!std::count(Node->CallerEdges.begin(), Node->CallerEdges.end(),
2097*06c3fb27SDimitry Andric                            Edge));
2098*06c3fb27SDimitry Andric         continue;
2099*06c3fb27SDimitry Andric       }
2100*06c3fb27SDimitry Andric       // Ignore any caller we previously visited via another edge.
2101*06c3fb27SDimitry Andric       if (!Visited.count(Edge->Caller) && !Edge->Caller->CloneOf) {
2102*06c3fb27SDimitry Andric         identifyClones(Edge->Caller, Visited);
2103*06c3fb27SDimitry Andric       }
2104*06c3fb27SDimitry Andric     }
2105*06c3fb27SDimitry Andric   }
2106*06c3fb27SDimitry Andric 
2107*06c3fb27SDimitry Andric   // Check if we reached an unambiguous call or have have only a single caller.
2108*06c3fb27SDimitry Andric   if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
2109*06c3fb27SDimitry Andric     return;
2110*06c3fb27SDimitry Andric 
2111*06c3fb27SDimitry Andric   // We need to clone.
2112*06c3fb27SDimitry Andric 
2113*06c3fb27SDimitry Andric   // Try to keep the original version as alloc type NotCold. This will make
2114*06c3fb27SDimitry Andric   // cases with indirect calls or any other situation with an unknown call to
2115*06c3fb27SDimitry Andric   // the original function get the default behavior. We do this by sorting the
2116*06c3fb27SDimitry Andric   // CallerEdges of the Node we will clone by alloc type.
2117*06c3fb27SDimitry Andric   //
2118*06c3fb27SDimitry Andric   // Give NotCold edge the lowest sort priority so those edges are at the end of
2119*06c3fb27SDimitry Andric   // the caller edges vector, and stay on the original version (since the below
2120*06c3fb27SDimitry Andric   // code clones greedily until it finds all remaining edges have the same type
2121*06c3fb27SDimitry Andric   // and leaves the remaining ones on the original Node).
2122*06c3fb27SDimitry Andric   //
2123*06c3fb27SDimitry Andric   // We shouldn't actually have any None type edges, so the sorting priority for
2124*06c3fb27SDimitry Andric   // that is arbitrary, and we assert in that case below.
2125*06c3fb27SDimitry Andric   const unsigned AllocTypeCloningPriority[] = {/*None*/ 3, /*NotCold*/ 4,
2126*06c3fb27SDimitry Andric                                                /*Cold*/ 1,
2127*06c3fb27SDimitry Andric                                                /*NotColdCold*/ 2};
2128*06c3fb27SDimitry Andric   std::stable_sort(Node->CallerEdges.begin(), Node->CallerEdges.end(),
2129*06c3fb27SDimitry Andric                    [&](const std::shared_ptr<ContextEdge> &A,
2130*06c3fb27SDimitry Andric                        const std::shared_ptr<ContextEdge> &B) {
2131*06c3fb27SDimitry Andric                      assert(checkColdOrNotCold(A->AllocTypes) &&
2132*06c3fb27SDimitry Andric                             checkColdOrNotCold(B->AllocTypes));
2133*06c3fb27SDimitry Andric 
2134*06c3fb27SDimitry Andric                      if (A->AllocTypes == B->AllocTypes)
2135*06c3fb27SDimitry Andric                        // Use the first context id for each edge as a
2136*06c3fb27SDimitry Andric                        // tie-breaker.
2137*06c3fb27SDimitry Andric                        return *A->ContextIds.begin() < *B->ContextIds.begin();
2138*06c3fb27SDimitry Andric                      return AllocTypeCloningPriority[A->AllocTypes] <
2139*06c3fb27SDimitry Andric                             AllocTypeCloningPriority[B->AllocTypes];
2140*06c3fb27SDimitry Andric                    });
2141*06c3fb27SDimitry Andric 
2142*06c3fb27SDimitry Andric   assert(Node->AllocTypes != (uint8_t)AllocationType::None);
2143*06c3fb27SDimitry Andric 
2144*06c3fb27SDimitry Andric   // Iterate until we find no more opportunities for disambiguating the alloc
2145*06c3fb27SDimitry Andric   // types via cloning. In most cases this loop will terminate once the Node
2146*06c3fb27SDimitry Andric   // has a single allocation type, in which case no more cloning is needed.
2147*06c3fb27SDimitry Andric   // We need to be able to remove Edge from CallerEdges, so need to adjust
2148*06c3fb27SDimitry Andric   // iterator inside the loop.
2149*06c3fb27SDimitry Andric   for (auto EI = Node->CallerEdges.begin(); EI != Node->CallerEdges.end();) {
2150*06c3fb27SDimitry Andric     auto CallerEdge = *EI;
2151*06c3fb27SDimitry Andric 
2152*06c3fb27SDimitry Andric     // See if cloning the prior caller edge left this node with a single alloc
2153*06c3fb27SDimitry Andric     // type or a single caller. In that case no more cloning of Node is needed.
2154*06c3fb27SDimitry Andric     if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
2155*06c3fb27SDimitry Andric       break;
2156*06c3fb27SDimitry Andric 
2157*06c3fb27SDimitry Andric     // Compute the node callee edge alloc types corresponding to the context ids
2158*06c3fb27SDimitry Andric     // for this caller edge.
2159*06c3fb27SDimitry Andric     std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
2160*06c3fb27SDimitry Andric     CalleeEdgeAllocTypesForCallerEdge.reserve(Node->CalleeEdges.size());
2161*06c3fb27SDimitry Andric     for (auto &CalleeEdge : Node->CalleeEdges)
2162*06c3fb27SDimitry Andric       CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
2163*06c3fb27SDimitry Andric           CalleeEdge->getContextIds(), CallerEdge->getContextIds()));
2164*06c3fb27SDimitry Andric 
2165*06c3fb27SDimitry Andric     // Don't clone if doing so will not disambiguate any alloc types amongst
2166*06c3fb27SDimitry Andric     // caller edges (including the callee edges that would be cloned).
2167*06c3fb27SDimitry Andric     // Otherwise we will simply move all edges to the clone.
2168*06c3fb27SDimitry Andric     //
2169*06c3fb27SDimitry Andric     // First check if by cloning we will disambiguate the caller allocation
2170*06c3fb27SDimitry Andric     // type from node's allocation type. Query allocTypeToUse so that we don't
2171*06c3fb27SDimitry Andric     // bother cloning to distinguish NotCold+Cold from NotCold. Note that
2172*06c3fb27SDimitry Andric     // neither of these should be None type.
2173*06c3fb27SDimitry Andric     //
2174*06c3fb27SDimitry Andric     // Then check if by cloning node at least one of the callee edges will be
2175*06c3fb27SDimitry Andric     // disambiguated by splitting out different context ids.
2176*06c3fb27SDimitry Andric     assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
2177*06c3fb27SDimitry Andric     assert(Node->AllocTypes != (uint8_t)AllocationType::None);
2178*06c3fb27SDimitry Andric     if (allocTypeToUse(CallerEdge->AllocTypes) ==
2179*06c3fb27SDimitry Andric             allocTypeToUse(Node->AllocTypes) &&
2180*06c3fb27SDimitry Andric         allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
2181*06c3fb27SDimitry Andric             CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) {
2182*06c3fb27SDimitry Andric       ++EI;
2183*06c3fb27SDimitry Andric       continue;
2184*06c3fb27SDimitry Andric     }
2185*06c3fb27SDimitry Andric 
2186*06c3fb27SDimitry Andric     // First see if we can use an existing clone. Check each clone and its
2187*06c3fb27SDimitry Andric     // callee edges for matching alloc types.
2188*06c3fb27SDimitry Andric     ContextNode *Clone = nullptr;
2189*06c3fb27SDimitry Andric     for (auto *CurClone : Node->Clones) {
2190*06c3fb27SDimitry Andric       if (allocTypeToUse(CurClone->AllocTypes) !=
2191*06c3fb27SDimitry Andric           allocTypeToUse(CallerEdge->AllocTypes))
2192*06c3fb27SDimitry Andric         continue;
2193*06c3fb27SDimitry Andric 
2194*06c3fb27SDimitry Andric       if (!allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
2195*06c3fb27SDimitry Andric               CalleeEdgeAllocTypesForCallerEdge, CurClone->CalleeEdges))
2196*06c3fb27SDimitry Andric         continue;
2197*06c3fb27SDimitry Andric       Clone = CurClone;
2198*06c3fb27SDimitry Andric       break;
2199*06c3fb27SDimitry Andric     }
2200*06c3fb27SDimitry Andric 
2201*06c3fb27SDimitry Andric     // The edge iterator is adjusted when we move the CallerEdge to the clone.
2202*06c3fb27SDimitry Andric     if (Clone)
2203*06c3fb27SDimitry Andric       moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI);
2204*06c3fb27SDimitry Andric     else
2205*06c3fb27SDimitry Andric       Clone = moveEdgeToNewCalleeClone(CallerEdge, &EI);
2206*06c3fb27SDimitry Andric 
2207*06c3fb27SDimitry Andric     assert(EI == Node->CallerEdges.end() ||
2208*06c3fb27SDimitry Andric            Node->AllocTypes != (uint8_t)AllocationType::None);
2209*06c3fb27SDimitry Andric     // Sanity check that no alloc types on clone or its edges are None.
2210*06c3fb27SDimitry Andric     assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
2211*06c3fb27SDimitry Andric     assert(llvm::none_of(
2212*06c3fb27SDimitry Andric         Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) {
2213*06c3fb27SDimitry Andric           return E->AllocTypes == (uint8_t)AllocationType::None;
2214*06c3fb27SDimitry Andric         }));
2215*06c3fb27SDimitry Andric   }
2216*06c3fb27SDimitry Andric 
2217*06c3fb27SDimitry Andric   // Cloning may have resulted in some cloned callee edges with type None,
2218*06c3fb27SDimitry Andric   // because they aren't carrying any contexts. Remove those edges.
2219*06c3fb27SDimitry Andric   for (auto *Clone : Node->Clones) {
2220*06c3fb27SDimitry Andric     removeNoneTypeCalleeEdges(Clone);
2221*06c3fb27SDimitry Andric     if (VerifyNodes)
2222*06c3fb27SDimitry Andric       checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
2223*06c3fb27SDimitry Andric   }
2224*06c3fb27SDimitry Andric   // We should still have some context ids on the original Node.
2225*06c3fb27SDimitry Andric   assert(!Node->ContextIds.empty());
2226*06c3fb27SDimitry Andric 
2227*06c3fb27SDimitry Andric   // Remove any callee edges that ended up with alloc type None after creating
2228*06c3fb27SDimitry Andric   // clones and updating callee edges.
2229*06c3fb27SDimitry Andric   removeNoneTypeCalleeEdges(Node);
2230*06c3fb27SDimitry Andric 
2231*06c3fb27SDimitry Andric   // Sanity check that no alloc types on node or edges are None.
2232*06c3fb27SDimitry Andric   assert(Node->AllocTypes != (uint8_t)AllocationType::None);
2233*06c3fb27SDimitry Andric   assert(llvm::none_of(Node->CalleeEdges,
2234*06c3fb27SDimitry Andric                        [&](const std::shared_ptr<ContextEdge> &E) {
2235*06c3fb27SDimitry Andric                          return E->AllocTypes == (uint8_t)AllocationType::None;
2236*06c3fb27SDimitry Andric                        }));
2237*06c3fb27SDimitry Andric   assert(llvm::none_of(Node->CallerEdges,
2238*06c3fb27SDimitry Andric                        [&](const std::shared_ptr<ContextEdge> &E) {
2239*06c3fb27SDimitry Andric                          return E->AllocTypes == (uint8_t)AllocationType::None;
2240*06c3fb27SDimitry Andric                        }));
2241*06c3fb27SDimitry Andric 
2242*06c3fb27SDimitry Andric   if (VerifyNodes)
2243*06c3fb27SDimitry Andric     checkNode<DerivedCCG, FuncTy, CallTy>(Node);
2244*06c3fb27SDimitry Andric }
2245*06c3fb27SDimitry Andric 
2246*06c3fb27SDimitry Andric void ModuleCallsiteContextGraph::updateAllocationCall(
2247*06c3fb27SDimitry Andric     CallInfo &Call, AllocationType AllocType) {
2248*06c3fb27SDimitry Andric   std::string AllocTypeString = getAllocTypeAttributeString(AllocType);
2249*06c3fb27SDimitry Andric   auto A = llvm::Attribute::get(Call.call()->getFunction()->getContext(),
2250*06c3fb27SDimitry Andric                                 "memprof", AllocTypeString);
2251*06c3fb27SDimitry Andric   cast<CallBase>(Call.call())->addFnAttr(A);
2252*06c3fb27SDimitry Andric   OREGetter(Call.call()->getFunction())
2253*06c3fb27SDimitry Andric       .emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", Call.call())
2254*06c3fb27SDimitry Andric             << ore::NV("AllocationCall", Call.call()) << " in clone "
2255*06c3fb27SDimitry Andric             << ore::NV("Caller", Call.call()->getFunction())
2256*06c3fb27SDimitry Andric             << " marked with memprof allocation attribute "
2257*06c3fb27SDimitry Andric             << ore::NV("Attribute", AllocTypeString));
2258*06c3fb27SDimitry Andric }
2259*06c3fb27SDimitry Andric 
2260*06c3fb27SDimitry Andric void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &Call,
2261*06c3fb27SDimitry Andric                                                      AllocationType AllocType) {
2262*06c3fb27SDimitry Andric   auto *AI = Call.call().dyn_cast<AllocInfo *>();
2263*06c3fb27SDimitry Andric   assert(AI);
2264*06c3fb27SDimitry Andric   assert(AI->Versions.size() > Call.cloneNo());
2265*06c3fb27SDimitry Andric   AI->Versions[Call.cloneNo()] = (uint8_t)AllocType;
2266*06c3fb27SDimitry Andric }
2267*06c3fb27SDimitry Andric 
2268*06c3fb27SDimitry Andric void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
2269*06c3fb27SDimitry Andric                                             FuncInfo CalleeFunc) {
2270*06c3fb27SDimitry Andric   if (CalleeFunc.cloneNo() > 0)
2271*06c3fb27SDimitry Andric     cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
2272*06c3fb27SDimitry Andric   OREGetter(CallerCall.call()->getFunction())
2273*06c3fb27SDimitry Andric       .emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call())
2274*06c3fb27SDimitry Andric             << ore::NV("Call", CallerCall.call()) << " in clone "
2275*06c3fb27SDimitry Andric             << ore::NV("Caller", CallerCall.call()->getFunction())
2276*06c3fb27SDimitry Andric             << " assigned to call function clone "
2277*06c3fb27SDimitry Andric             << ore::NV("Callee", CalleeFunc.func()));
2278*06c3fb27SDimitry Andric }
2279*06c3fb27SDimitry Andric 
2280*06c3fb27SDimitry Andric void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
2281*06c3fb27SDimitry Andric                                            FuncInfo CalleeFunc) {
2282*06c3fb27SDimitry Andric   auto *CI = CallerCall.call().dyn_cast<CallsiteInfo *>();
2283*06c3fb27SDimitry Andric   assert(CI &&
2284*06c3fb27SDimitry Andric          "Caller cannot be an allocation which should not have profiled calls");
2285*06c3fb27SDimitry Andric   assert(CI->Clones.size() > CallerCall.cloneNo());
2286*06c3fb27SDimitry Andric   CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
2287*06c3fb27SDimitry Andric }
2288*06c3fb27SDimitry Andric 
2289*06c3fb27SDimitry Andric CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
2290*06c3fb27SDimitry Andric                      Instruction *>::FuncInfo
2291*06c3fb27SDimitry Andric ModuleCallsiteContextGraph::cloneFunctionForCallsite(
2292*06c3fb27SDimitry Andric     FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
2293*06c3fb27SDimitry Andric     std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) {
2294*06c3fb27SDimitry Andric   // Use existing LLVM facilities for cloning and obtaining Call in clone
2295*06c3fb27SDimitry Andric   ValueToValueMapTy VMap;
2296*06c3fb27SDimitry Andric   auto *NewFunc = CloneFunction(Func.func(), VMap);
2297*06c3fb27SDimitry Andric   std::string Name = getMemProfFuncName(Func.func()->getName(), CloneNo);
2298*06c3fb27SDimitry Andric   assert(!Func.func()->getParent()->getFunction(Name));
2299*06c3fb27SDimitry Andric   NewFunc->setName(Name);
2300*06c3fb27SDimitry Andric   for (auto &Inst : CallsWithMetadataInFunc) {
2301*06c3fb27SDimitry Andric     // This map always has the initial version in it.
2302*06c3fb27SDimitry Andric     assert(Inst.cloneNo() == 0);
2303*06c3fb27SDimitry Andric     CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo};
2304*06c3fb27SDimitry Andric   }
2305*06c3fb27SDimitry Andric   OREGetter(Func.func())
2306*06c3fb27SDimitry Andric       .emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", Func.func())
2307*06c3fb27SDimitry Andric             << "created clone " << ore::NV("NewFunction", NewFunc));
2308*06c3fb27SDimitry Andric   return {NewFunc, CloneNo};
2309*06c3fb27SDimitry Andric }
2310*06c3fb27SDimitry Andric 
2311*06c3fb27SDimitry Andric CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
2312*06c3fb27SDimitry Andric                      IndexCall>::FuncInfo
2313*06c3fb27SDimitry Andric IndexCallsiteContextGraph::cloneFunctionForCallsite(
2314*06c3fb27SDimitry Andric     FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
2315*06c3fb27SDimitry Andric     std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) {
2316*06c3fb27SDimitry Andric   // Check how many clones we have of Call (and therefore function).
2317*06c3fb27SDimitry Andric   // The next clone number is the current size of versions array.
2318*06c3fb27SDimitry Andric   // Confirm this matches the CloneNo provided by the caller, which is based on
2319*06c3fb27SDimitry Andric   // the number of function clones we have.
2320*06c3fb27SDimitry Andric   assert(CloneNo ==
2321*06c3fb27SDimitry Andric          (Call.call().is<AllocInfo *>()
2322*06c3fb27SDimitry Andric               ? Call.call().dyn_cast<AllocInfo *>()->Versions.size()
2323*06c3fb27SDimitry Andric               : Call.call().dyn_cast<CallsiteInfo *>()->Clones.size()));
2324*06c3fb27SDimitry Andric   // Walk all the instructions in this function. Create a new version for
2325*06c3fb27SDimitry Andric   // each (by adding an entry to the Versions/Clones summary array), and copy
2326*06c3fb27SDimitry Andric   // over the version being called for the function clone being cloned here.
2327*06c3fb27SDimitry Andric   // Additionally, add an entry to the CallMap for the new function clone,
2328*06c3fb27SDimitry Andric   // mapping the original call (clone 0, what is in CallsWithMetadataInFunc)
2329*06c3fb27SDimitry Andric   // to the new call clone.
2330*06c3fb27SDimitry Andric   for (auto &Inst : CallsWithMetadataInFunc) {
2331*06c3fb27SDimitry Andric     // This map always has the initial version in it.
2332*06c3fb27SDimitry Andric     assert(Inst.cloneNo() == 0);
2333*06c3fb27SDimitry Andric     if (auto *AI = Inst.call().dyn_cast<AllocInfo *>()) {
2334*06c3fb27SDimitry Andric       assert(AI->Versions.size() == CloneNo);
2335*06c3fb27SDimitry Andric       // We assign the allocation type later (in updateAllocationCall), just add
2336*06c3fb27SDimitry Andric       // an entry for it here.
2337*06c3fb27SDimitry Andric       AI->Versions.push_back(0);
2338*06c3fb27SDimitry Andric     } else {
2339*06c3fb27SDimitry Andric       auto *CI = Inst.call().dyn_cast<CallsiteInfo *>();
2340*06c3fb27SDimitry Andric       assert(CI && CI->Clones.size() == CloneNo);
2341*06c3fb27SDimitry Andric       // We assign the clone number later (in updateCall), just add an entry for
2342*06c3fb27SDimitry Andric       // it here.
2343*06c3fb27SDimitry Andric       CI->Clones.push_back(0);
2344*06c3fb27SDimitry Andric     }
2345*06c3fb27SDimitry Andric     CallMap[Inst] = {Inst.call(), CloneNo};
2346*06c3fb27SDimitry Andric   }
2347*06c3fb27SDimitry Andric   return {Func.func(), CloneNo};
2348*06c3fb27SDimitry Andric }
2349*06c3fb27SDimitry Andric 
2350*06c3fb27SDimitry Andric // This method assigns cloned callsites to functions, cloning the functions as
2351*06c3fb27SDimitry Andric // needed. The assignment is greedy and proceeds roughly as follows:
2352*06c3fb27SDimitry Andric //
2353*06c3fb27SDimitry Andric // For each function Func:
2354*06c3fb27SDimitry Andric //   For each call with graph Node having clones:
2355*06c3fb27SDimitry Andric //     Initialize ClonesWorklist to Node and its clones
2356*06c3fb27SDimitry Andric //     Initialize NodeCloneCount to 0
2357*06c3fb27SDimitry Andric //     While ClonesWorklist is not empty:
2358*06c3fb27SDimitry Andric //        Clone = pop front ClonesWorklist
2359*06c3fb27SDimitry Andric //        NodeCloneCount++
2360*06c3fb27SDimitry Andric //        If Func has been cloned less than NodeCloneCount times:
2361*06c3fb27SDimitry Andric //           If NodeCloneCount is 1:
2362*06c3fb27SDimitry Andric //             Assign Clone to original Func
2363*06c3fb27SDimitry Andric //             Continue
2364*06c3fb27SDimitry Andric //           Create a new function clone
2365*06c3fb27SDimitry Andric //           If other callers not assigned to call a function clone yet:
2366*06c3fb27SDimitry Andric //              Assign them to call new function clone
2367*06c3fb27SDimitry Andric //              Continue
2368*06c3fb27SDimitry Andric //           Assign any other caller calling the cloned version to new clone
2369*06c3fb27SDimitry Andric //
2370*06c3fb27SDimitry Andric //        For each caller of Clone:
2371*06c3fb27SDimitry Andric //           If caller is assigned to call a specific function clone:
2372*06c3fb27SDimitry Andric //             If we cannot assign Clone to that function clone:
2373*06c3fb27SDimitry Andric //               Create new callsite Clone NewClone
2374*06c3fb27SDimitry Andric //               Add NewClone to ClonesWorklist
2375*06c3fb27SDimitry Andric //               Continue
2376*06c3fb27SDimitry Andric //             Assign Clone to existing caller's called function clone
2377*06c3fb27SDimitry Andric //           Else:
2378*06c3fb27SDimitry Andric //             If Clone not already assigned to a function clone:
2379*06c3fb27SDimitry Andric //                Assign to first function clone without assignment
2380*06c3fb27SDimitry Andric //             Assign caller to selected function clone
2381*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
2382*06c3fb27SDimitry Andric bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
2383*06c3fb27SDimitry Andric   bool Changed = false;
2384*06c3fb27SDimitry Andric 
2385*06c3fb27SDimitry Andric   // Keep track of the assignment of nodes (callsites) to function clones they
2386*06c3fb27SDimitry Andric   // call.
2387*06c3fb27SDimitry Andric   DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
2388*06c3fb27SDimitry Andric 
2389*06c3fb27SDimitry Andric   // Update caller node to call function version CalleeFunc, by recording the
2390*06c3fb27SDimitry Andric   // assignment in CallsiteToCalleeFuncCloneMap.
2391*06c3fb27SDimitry Andric   auto RecordCalleeFuncOfCallsite = [&](ContextNode *Caller,
2392*06c3fb27SDimitry Andric                                         const FuncInfo &CalleeFunc) {
2393*06c3fb27SDimitry Andric     assert(Caller->hasCall());
2394*06c3fb27SDimitry Andric     CallsiteToCalleeFuncCloneMap[Caller] = CalleeFunc;
2395*06c3fb27SDimitry Andric   };
2396*06c3fb27SDimitry Andric 
2397*06c3fb27SDimitry Andric   // Walk all functions for which we saw calls with memprof metadata, and handle
2398*06c3fb27SDimitry Andric   // cloning for each of its calls.
2399*06c3fb27SDimitry Andric   for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
2400*06c3fb27SDimitry Andric     FuncInfo OrigFunc(Func);
2401*06c3fb27SDimitry Andric     // Map from each clone of OrigFunc to a map of remappings of each call of
2402*06c3fb27SDimitry Andric     // interest (from original uncloned call to the corresponding cloned call in
2403*06c3fb27SDimitry Andric     // that function clone).
2404*06c3fb27SDimitry Andric     std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap;
2405*06c3fb27SDimitry Andric     for (auto &Call : CallsWithMetadata) {
2406*06c3fb27SDimitry Andric       ContextNode *Node = getNodeForInst(Call);
2407*06c3fb27SDimitry Andric       // Skip call if we do not have a node for it (all uses of its stack ids
2408*06c3fb27SDimitry Andric       // were either on inlined chains or pruned from the MIBs), or if we did
2409*06c3fb27SDimitry Andric       // not create any clones for it.
2410*06c3fb27SDimitry Andric       if (!Node || Node->Clones.empty())
2411*06c3fb27SDimitry Andric         continue;
2412*06c3fb27SDimitry Andric       assert(Node->hasCall() &&
2413*06c3fb27SDimitry Andric              "Not having a call should have prevented cloning");
2414*06c3fb27SDimitry Andric 
2415*06c3fb27SDimitry Andric       // Track the assignment of function clones to clones of the current
2416*06c3fb27SDimitry Andric       // callsite Node being handled.
2417*06c3fb27SDimitry Andric       std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
2418*06c3fb27SDimitry Andric 
2419*06c3fb27SDimitry Andric       // Assign callsite version CallsiteClone to function version FuncClone,
2420*06c3fb27SDimitry Andric       // and also assign (possibly cloned) Call to CallsiteClone.
2421*06c3fb27SDimitry Andric       auto AssignCallsiteCloneToFuncClone = [&](const FuncInfo &FuncClone,
2422*06c3fb27SDimitry Andric                                                 CallInfo &Call,
2423*06c3fb27SDimitry Andric                                                 ContextNode *CallsiteClone,
2424*06c3fb27SDimitry Andric                                                 bool IsAlloc) {
2425*06c3fb27SDimitry Andric         // Record the clone of callsite node assigned to this function clone.
2426*06c3fb27SDimitry Andric         FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
2427*06c3fb27SDimitry Andric 
2428*06c3fb27SDimitry Andric         assert(FuncClonesToCallMap.count(FuncClone));
2429*06c3fb27SDimitry Andric         std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone];
2430*06c3fb27SDimitry Andric         CallInfo CallClone(Call);
2431*06c3fb27SDimitry Andric         if (CallMap.count(Call))
2432*06c3fb27SDimitry Andric           CallClone = CallMap[Call];
2433*06c3fb27SDimitry Andric         CallsiteClone->setCall(CallClone);
2434*06c3fb27SDimitry Andric       };
2435*06c3fb27SDimitry Andric 
2436*06c3fb27SDimitry Andric       // Keep track of the clones of callsite Node that need to be assigned to
2437*06c3fb27SDimitry Andric       // function clones. This list may be expanded in the loop body below if we
2438*06c3fb27SDimitry Andric       // find additional cloning is required.
2439*06c3fb27SDimitry Andric       std::deque<ContextNode *> ClonesWorklist;
2440*06c3fb27SDimitry Andric       // Ignore original Node if we moved all of its contexts to clones.
2441*06c3fb27SDimitry Andric       if (!Node->ContextIds.empty())
2442*06c3fb27SDimitry Andric         ClonesWorklist.push_back(Node);
2443*06c3fb27SDimitry Andric       ClonesWorklist.insert(ClonesWorklist.end(), Node->Clones.begin(),
2444*06c3fb27SDimitry Andric                             Node->Clones.end());
2445*06c3fb27SDimitry Andric 
2446*06c3fb27SDimitry Andric       // Now walk through all of the clones of this callsite Node that we need,
2447*06c3fb27SDimitry Andric       // and determine the assignment to a corresponding clone of the current
2448*06c3fb27SDimitry Andric       // function (creating new function clones as needed).
2449*06c3fb27SDimitry Andric       unsigned NodeCloneCount = 0;
2450*06c3fb27SDimitry Andric       while (!ClonesWorklist.empty()) {
2451*06c3fb27SDimitry Andric         ContextNode *Clone = ClonesWorklist.front();
2452*06c3fb27SDimitry Andric         ClonesWorklist.pop_front();
2453*06c3fb27SDimitry Andric         NodeCloneCount++;
2454*06c3fb27SDimitry Andric         if (VerifyNodes)
2455*06c3fb27SDimitry Andric           checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
2456*06c3fb27SDimitry Andric 
2457*06c3fb27SDimitry Andric         // Need to create a new function clone if we have more callsite clones
2458*06c3fb27SDimitry Andric         // than existing function clones, which would have been assigned to an
2459*06c3fb27SDimitry Andric         // earlier clone in the list (we assign callsite clones to function
2460*06c3fb27SDimitry Andric         // clones greedily).
2461*06c3fb27SDimitry Andric         if (FuncClonesToCallMap.size() < NodeCloneCount) {
2462*06c3fb27SDimitry Andric           // If this is the first callsite copy, assign to original function.
2463*06c3fb27SDimitry Andric           if (NodeCloneCount == 1) {
2464*06c3fb27SDimitry Andric             // Since FuncClonesToCallMap is empty in this case, no clones have
2465*06c3fb27SDimitry Andric             // been created for this function yet, and no callers should have
2466*06c3fb27SDimitry Andric             // been assigned a function clone for this callee node yet.
2467*06c3fb27SDimitry Andric             assert(llvm::none_of(
2468*06c3fb27SDimitry Andric                 Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) {
2469*06c3fb27SDimitry Andric                   return CallsiteToCalleeFuncCloneMap.count(E->Caller);
2470*06c3fb27SDimitry Andric                 }));
2471*06c3fb27SDimitry Andric             // Initialize with empty call map, assign Clone to original function
2472*06c3fb27SDimitry Andric             // and its callers, and skip to the next clone.
2473*06c3fb27SDimitry Andric             FuncClonesToCallMap[OrigFunc] = {};
2474*06c3fb27SDimitry Andric             AssignCallsiteCloneToFuncClone(
2475*06c3fb27SDimitry Andric                 OrigFunc, Call, Clone,
2476*06c3fb27SDimitry Andric                 AllocationCallToContextNodeMap.count(Call));
2477*06c3fb27SDimitry Andric             for (auto &CE : Clone->CallerEdges) {
2478*06c3fb27SDimitry Andric               // Ignore any caller that does not have a recorded callsite Call.
2479*06c3fb27SDimitry Andric               if (!CE->Caller->hasCall())
2480*06c3fb27SDimitry Andric                 continue;
2481*06c3fb27SDimitry Andric               RecordCalleeFuncOfCallsite(CE->Caller, OrigFunc);
2482*06c3fb27SDimitry Andric             }
2483*06c3fb27SDimitry Andric             continue;
2484*06c3fb27SDimitry Andric           }
2485*06c3fb27SDimitry Andric 
2486*06c3fb27SDimitry Andric           // First locate which copy of OrigFunc to clone again. If a caller
2487*06c3fb27SDimitry Andric           // of this callsite clone was already assigned to call a particular
2488*06c3fb27SDimitry Andric           // function clone, we need to redirect all of those callers to the
2489*06c3fb27SDimitry Andric           // new function clone, and update their other callees within this
2490*06c3fb27SDimitry Andric           // function.
2491*06c3fb27SDimitry Andric           FuncInfo PreviousAssignedFuncClone;
2492*06c3fb27SDimitry Andric           auto EI = llvm::find_if(
2493*06c3fb27SDimitry Andric               Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) {
2494*06c3fb27SDimitry Andric                 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
2495*06c3fb27SDimitry Andric               });
2496*06c3fb27SDimitry Andric           bool CallerAssignedToCloneOfFunc = false;
2497*06c3fb27SDimitry Andric           if (EI != Clone->CallerEdges.end()) {
2498*06c3fb27SDimitry Andric             const std::shared_ptr<ContextEdge> &Edge = *EI;
2499*06c3fb27SDimitry Andric             PreviousAssignedFuncClone =
2500*06c3fb27SDimitry Andric                 CallsiteToCalleeFuncCloneMap[Edge->Caller];
2501*06c3fb27SDimitry Andric             CallerAssignedToCloneOfFunc = true;
2502*06c3fb27SDimitry Andric           }
2503*06c3fb27SDimitry Andric 
2504*06c3fb27SDimitry Andric           // Clone function and save it along with the CallInfo map created
2505*06c3fb27SDimitry Andric           // during cloning in the FuncClonesToCallMap.
2506*06c3fb27SDimitry Andric           std::map<CallInfo, CallInfo> NewCallMap;
2507*06c3fb27SDimitry Andric           unsigned CloneNo = FuncClonesToCallMap.size();
2508*06c3fb27SDimitry Andric           assert(CloneNo > 0 && "Clone 0 is the original function, which "
2509*06c3fb27SDimitry Andric                                 "should already exist in the map");
2510*06c3fb27SDimitry Andric           FuncInfo NewFuncClone = cloneFunctionForCallsite(
2511*06c3fb27SDimitry Andric               OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);
2512*06c3fb27SDimitry Andric           FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap));
2513*06c3fb27SDimitry Andric           FunctionClonesAnalysis++;
2514*06c3fb27SDimitry Andric           Changed = true;
2515*06c3fb27SDimitry Andric 
2516*06c3fb27SDimitry Andric           // If no caller callsites were already assigned to a clone of this
2517*06c3fb27SDimitry Andric           // function, we can simply assign this clone to the new func clone
2518*06c3fb27SDimitry Andric           // and update all callers to it, then skip to the next clone.
2519*06c3fb27SDimitry Andric           if (!CallerAssignedToCloneOfFunc) {
2520*06c3fb27SDimitry Andric             AssignCallsiteCloneToFuncClone(
2521*06c3fb27SDimitry Andric                 NewFuncClone, Call, Clone,
2522*06c3fb27SDimitry Andric                 AllocationCallToContextNodeMap.count(Call));
2523*06c3fb27SDimitry Andric             for (auto &CE : Clone->CallerEdges) {
2524*06c3fb27SDimitry Andric               // Ignore any caller that does not have a recorded callsite Call.
2525*06c3fb27SDimitry Andric               if (!CE->Caller->hasCall())
2526*06c3fb27SDimitry Andric                 continue;
2527*06c3fb27SDimitry Andric               RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone);
2528*06c3fb27SDimitry Andric             }
2529*06c3fb27SDimitry Andric             continue;
2530*06c3fb27SDimitry Andric           }
2531*06c3fb27SDimitry Andric 
2532*06c3fb27SDimitry Andric           // We may need to do additional node cloning in this case.
2533*06c3fb27SDimitry Andric           // Reset the CallsiteToCalleeFuncCloneMap entry for any callers
2534*06c3fb27SDimitry Andric           // that were previously assigned to call PreviousAssignedFuncClone,
2535*06c3fb27SDimitry Andric           // to record that they now call NewFuncClone.
2536*06c3fb27SDimitry Andric           for (auto CE : Clone->CallerEdges) {
2537*06c3fb27SDimitry Andric             // Ignore any caller that does not have a recorded callsite Call.
2538*06c3fb27SDimitry Andric             if (!CE->Caller->hasCall())
2539*06c3fb27SDimitry Andric               continue;
2540*06c3fb27SDimitry Andric 
2541*06c3fb27SDimitry Andric             if (!CallsiteToCalleeFuncCloneMap.count(CE->Caller) ||
2542*06c3fb27SDimitry Andric                 // We subsequently fall through to later handling that
2543*06c3fb27SDimitry Andric                 // will perform any additional cloning required for
2544*06c3fb27SDimitry Andric                 // callers that were calling other function clones.
2545*06c3fb27SDimitry Andric                 CallsiteToCalleeFuncCloneMap[CE->Caller] !=
2546*06c3fb27SDimitry Andric                     PreviousAssignedFuncClone)
2547*06c3fb27SDimitry Andric               continue;
2548*06c3fb27SDimitry Andric 
2549*06c3fb27SDimitry Andric             RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone);
2550*06c3fb27SDimitry Andric 
2551*06c3fb27SDimitry Andric             // If we are cloning a function that was already assigned to some
2552*06c3fb27SDimitry Andric             // callers, then essentially we are creating new callsite clones
2553*06c3fb27SDimitry Andric             // of the other callsites in that function that are reached by those
2554*06c3fb27SDimitry Andric             // callers. Clone the other callees of the current callsite's caller
2555*06c3fb27SDimitry Andric             // that were already assigned to PreviousAssignedFuncClone
2556*06c3fb27SDimitry Andric             // accordingly. This is important since we subsequently update the
2557*06c3fb27SDimitry Andric             // calls from the nodes in the graph and their assignments to callee
2558*06c3fb27SDimitry Andric             // functions recorded in CallsiteToCalleeFuncCloneMap.
2559*06c3fb27SDimitry Andric             for (auto CalleeEdge : CE->Caller->CalleeEdges) {
2560*06c3fb27SDimitry Andric               // Skip any that have been removed on an earlier iteration when
2561*06c3fb27SDimitry Andric               // cleaning up newly None type callee edges.
2562*06c3fb27SDimitry Andric               if (!CalleeEdge)
2563*06c3fb27SDimitry Andric                 continue;
2564*06c3fb27SDimitry Andric               ContextNode *Callee = CalleeEdge->Callee;
2565*06c3fb27SDimitry Andric               // Skip the current callsite, we are looking for other
2566*06c3fb27SDimitry Andric               // callsites Caller calls, as well as any that does not have a
2567*06c3fb27SDimitry Andric               // recorded callsite Call.
2568*06c3fb27SDimitry Andric               if (Callee == Clone || !Callee->hasCall())
2569*06c3fb27SDimitry Andric                 continue;
2570*06c3fb27SDimitry Andric               ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge);
2571*06c3fb27SDimitry Andric               removeNoneTypeCalleeEdges(NewClone);
2572*06c3fb27SDimitry Andric               // Moving the edge may have resulted in some none type
2573*06c3fb27SDimitry Andric               // callee edges on the original Callee.
2574*06c3fb27SDimitry Andric               removeNoneTypeCalleeEdges(Callee);
2575*06c3fb27SDimitry Andric               assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
2576*06c3fb27SDimitry Andric               // If the Callee node was already assigned to call a specific
2577*06c3fb27SDimitry Andric               // function version, make sure its new clone is assigned to call
2578*06c3fb27SDimitry Andric               // that same function clone.
2579*06c3fb27SDimitry Andric               if (CallsiteToCalleeFuncCloneMap.count(Callee))
2580*06c3fb27SDimitry Andric                 RecordCalleeFuncOfCallsite(
2581*06c3fb27SDimitry Andric                     NewClone, CallsiteToCalleeFuncCloneMap[Callee]);
2582*06c3fb27SDimitry Andric               // Update NewClone with the new Call clone of this callsite's Call
2583*06c3fb27SDimitry Andric               // created for the new function clone created earlier.
2584*06c3fb27SDimitry Andric               // Recall that we have already ensured when building the graph
2585*06c3fb27SDimitry Andric               // that each caller can only call callsites within the same
2586*06c3fb27SDimitry Andric               // function, so we are guaranteed that Callee Call is in the
2587*06c3fb27SDimitry Andric               // current OrigFunc.
2588*06c3fb27SDimitry Andric               // CallMap is set up as indexed by original Call at clone 0.
2589*06c3fb27SDimitry Andric               CallInfo OrigCall(Callee->getOrigNode()->Call);
2590*06c3fb27SDimitry Andric               OrigCall.setCloneNo(0);
2591*06c3fb27SDimitry Andric               std::map<CallInfo, CallInfo> &CallMap =
2592*06c3fb27SDimitry Andric                   FuncClonesToCallMap[NewFuncClone];
2593*06c3fb27SDimitry Andric               assert(CallMap.count(OrigCall));
2594*06c3fb27SDimitry Andric               CallInfo NewCall(CallMap[OrigCall]);
2595*06c3fb27SDimitry Andric               assert(NewCall);
2596*06c3fb27SDimitry Andric               NewClone->setCall(NewCall);
2597*06c3fb27SDimitry Andric             }
2598*06c3fb27SDimitry Andric           }
2599*06c3fb27SDimitry Andric           // Fall through to handling below to perform the recording of the
2600*06c3fb27SDimitry Andric           // function for this callsite clone. This enables handling of cases
2601*06c3fb27SDimitry Andric           // where the callers were assigned to different clones of a function.
2602*06c3fb27SDimitry Andric         }
2603*06c3fb27SDimitry Andric 
2604*06c3fb27SDimitry Andric         // See if we can use existing function clone. Walk through
2605*06c3fb27SDimitry Andric         // all caller edges to see if any have already been assigned to
2606*06c3fb27SDimitry Andric         // a clone of this callsite's function. If we can use it, do so. If not,
2607*06c3fb27SDimitry Andric         // because that function clone is already assigned to a different clone
2608*06c3fb27SDimitry Andric         // of this callsite, then we need to clone again.
2609*06c3fb27SDimitry Andric         // Basically, this checking is needed to handle the case where different
2610*06c3fb27SDimitry Andric         // caller functions/callsites may need versions of this function
2611*06c3fb27SDimitry Andric         // containing different mixes of callsite clones across the different
2612*06c3fb27SDimitry Andric         // callsites within the function. If that happens, we need to create
2613*06c3fb27SDimitry Andric         // additional function clones to handle the various combinations.
2614*06c3fb27SDimitry Andric         //
2615*06c3fb27SDimitry Andric         // Keep track of any new clones of this callsite created by the
2616*06c3fb27SDimitry Andric         // following loop, as well as any existing clone that we decided to
2617*06c3fb27SDimitry Andric         // assign this clone to.
2618*06c3fb27SDimitry Andric         std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
2619*06c3fb27SDimitry Andric         FuncInfo FuncCloneAssignedToCurCallsiteClone;
2620*06c3fb27SDimitry Andric         // We need to be able to remove Edge from CallerEdges, so need to adjust
2621*06c3fb27SDimitry Andric         // iterator in the loop.
2622*06c3fb27SDimitry Andric         for (auto EI = Clone->CallerEdges.begin();
2623*06c3fb27SDimitry Andric              EI != Clone->CallerEdges.end();) {
2624*06c3fb27SDimitry Andric           auto Edge = *EI;
2625*06c3fb27SDimitry Andric           // Ignore any caller that does not have a recorded callsite Call.
2626*06c3fb27SDimitry Andric           if (!Edge->Caller->hasCall()) {
2627*06c3fb27SDimitry Andric             EI++;
2628*06c3fb27SDimitry Andric             continue;
2629*06c3fb27SDimitry Andric           }
2630*06c3fb27SDimitry Andric           // If this caller already assigned to call a version of OrigFunc, need
2631*06c3fb27SDimitry Andric           // to ensure we can assign this callsite clone to that function clone.
2632*06c3fb27SDimitry Andric           if (CallsiteToCalleeFuncCloneMap.count(Edge->Caller)) {
2633*06c3fb27SDimitry Andric             FuncInfo FuncCloneCalledByCaller =
2634*06c3fb27SDimitry Andric                 CallsiteToCalleeFuncCloneMap[Edge->Caller];
2635*06c3fb27SDimitry Andric             // First we need to confirm that this function clone is available
2636*06c3fb27SDimitry Andric             // for use by this callsite node clone.
2637*06c3fb27SDimitry Andric             //
2638*06c3fb27SDimitry Andric             // While FuncCloneToCurNodeCloneMap is built only for this Node and
2639*06c3fb27SDimitry Andric             // its callsite clones, one of those callsite clones X could have
2640*06c3fb27SDimitry Andric             // been assigned to the same function clone called by Edge's caller
2641*06c3fb27SDimitry Andric             // - if Edge's caller calls another callsite within Node's original
2642*06c3fb27SDimitry Andric             // function, and that callsite has another caller reaching clone X.
2643*06c3fb27SDimitry Andric             // We need to clone Node again in this case.
2644*06c3fb27SDimitry Andric             if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
2645*06c3fb27SDimitry Andric                  FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
2646*06c3fb27SDimitry Andric                      Clone) ||
2647*06c3fb27SDimitry Andric                 // Detect when we have multiple callers of this callsite that
2648*06c3fb27SDimitry Andric                 // have already been assigned to specific, and different, clones
2649*06c3fb27SDimitry Andric                 // of OrigFunc (due to other unrelated callsites in Func they
2650*06c3fb27SDimitry Andric                 // reach via call contexts). Is this Clone of callsite Node
2651*06c3fb27SDimitry Andric                 // assigned to a different clone of OrigFunc? If so, clone Node
2652*06c3fb27SDimitry Andric                 // again.
2653*06c3fb27SDimitry Andric                 (FuncCloneAssignedToCurCallsiteClone &&
2654*06c3fb27SDimitry Andric                  FuncCloneAssignedToCurCallsiteClone !=
2655*06c3fb27SDimitry Andric                      FuncCloneCalledByCaller)) {
2656*06c3fb27SDimitry Andric               // We need to use a different newly created callsite clone, in
2657*06c3fb27SDimitry Andric               // order to assign it to another new function clone on a
2658*06c3fb27SDimitry Andric               // subsequent iteration over the Clones array (adjusted below).
2659*06c3fb27SDimitry Andric               // Note we specifically do not reset the
2660*06c3fb27SDimitry Andric               // CallsiteToCalleeFuncCloneMap entry for this caller, so that
2661*06c3fb27SDimitry Andric               // when this new clone is processed later we know which version of
2662*06c3fb27SDimitry Andric               // the function to copy (so that other callsite clones we have
2663*06c3fb27SDimitry Andric               // assigned to that function clone are properly cloned over). See
2664*06c3fb27SDimitry Andric               // comments in the function cloning handling earlier.
2665*06c3fb27SDimitry Andric 
2666*06c3fb27SDimitry Andric               // Check if we already have cloned this callsite again while
2667*06c3fb27SDimitry Andric               // walking through caller edges, for a caller calling the same
2668*06c3fb27SDimitry Andric               // function clone. If so, we can move this edge to that new clone
2669*06c3fb27SDimitry Andric               // rather than creating yet another new clone.
2670*06c3fb27SDimitry Andric               if (FuncCloneToNewCallsiteCloneMap.count(
2671*06c3fb27SDimitry Andric                       FuncCloneCalledByCaller)) {
2672*06c3fb27SDimitry Andric                 ContextNode *NewClone =
2673*06c3fb27SDimitry Andric                     FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
2674*06c3fb27SDimitry Andric                 moveEdgeToExistingCalleeClone(Edge, NewClone, &EI);
2675*06c3fb27SDimitry Andric                 // Cleanup any none type edges cloned over.
2676*06c3fb27SDimitry Andric                 removeNoneTypeCalleeEdges(NewClone);
2677*06c3fb27SDimitry Andric               } else {
2678*06c3fb27SDimitry Andric                 // Create a new callsite clone.
2679*06c3fb27SDimitry Andric                 ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge, &EI);
2680*06c3fb27SDimitry Andric                 removeNoneTypeCalleeEdges(NewClone);
2681*06c3fb27SDimitry Andric                 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
2682*06c3fb27SDimitry Andric                     NewClone;
2683*06c3fb27SDimitry Andric                 // Add to list of clones and process later.
2684*06c3fb27SDimitry Andric                 ClonesWorklist.push_back(NewClone);
2685*06c3fb27SDimitry Andric                 assert(EI == Clone->CallerEdges.end() ||
2686*06c3fb27SDimitry Andric                        Clone->AllocTypes != (uint8_t)AllocationType::None);
2687*06c3fb27SDimitry Andric                 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
2688*06c3fb27SDimitry Andric               }
2689*06c3fb27SDimitry Andric               // Moving the caller edge may have resulted in some none type
2690*06c3fb27SDimitry Andric               // callee edges.
2691*06c3fb27SDimitry Andric               removeNoneTypeCalleeEdges(Clone);
2692*06c3fb27SDimitry Andric               // We will handle the newly created callsite clone in a subsequent
2693*06c3fb27SDimitry Andric               // iteration over this Node's Clones. Continue here since we
2694*06c3fb27SDimitry Andric               // already adjusted iterator EI while moving the edge.
2695*06c3fb27SDimitry Andric               continue;
2696*06c3fb27SDimitry Andric             }
2697*06c3fb27SDimitry Andric 
2698*06c3fb27SDimitry Andric             // Otherwise, we can use the function clone already assigned to this
2699*06c3fb27SDimitry Andric             // caller.
2700*06c3fb27SDimitry Andric             if (!FuncCloneAssignedToCurCallsiteClone) {
2701*06c3fb27SDimitry Andric               FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
2702*06c3fb27SDimitry Andric               // Assign Clone to FuncCloneCalledByCaller
2703*06c3fb27SDimitry Andric               AssignCallsiteCloneToFuncClone(
2704*06c3fb27SDimitry Andric                   FuncCloneCalledByCaller, Call, Clone,
2705*06c3fb27SDimitry Andric                   AllocationCallToContextNodeMap.count(Call));
2706*06c3fb27SDimitry Andric             } else
2707*06c3fb27SDimitry Andric               // Don't need to do anything - callsite is already calling this
2708*06c3fb27SDimitry Andric               // function clone.
2709*06c3fb27SDimitry Andric               assert(FuncCloneAssignedToCurCallsiteClone ==
2710*06c3fb27SDimitry Andric                      FuncCloneCalledByCaller);
2711*06c3fb27SDimitry Andric 
2712*06c3fb27SDimitry Andric           } else {
2713*06c3fb27SDimitry Andric             // We have not already assigned this caller to a version of
2714*06c3fb27SDimitry Andric             // OrigFunc. Do the assignment now.
2715*06c3fb27SDimitry Andric 
2716*06c3fb27SDimitry Andric             // First check if we have already assigned this callsite clone to a
2717*06c3fb27SDimitry Andric             // clone of OrigFunc for another caller during this iteration over
2718*06c3fb27SDimitry Andric             // its caller edges.
2719*06c3fb27SDimitry Andric             if (!FuncCloneAssignedToCurCallsiteClone) {
2720*06c3fb27SDimitry Andric               // Find first function in FuncClonesToCallMap without an assigned
2721*06c3fb27SDimitry Andric               // clone of this callsite Node. We should always have one
2722*06c3fb27SDimitry Andric               // available at this point due to the earlier cloning when the
2723*06c3fb27SDimitry Andric               // FuncClonesToCallMap size was smaller than the clone number.
2724*06c3fb27SDimitry Andric               for (auto &CF : FuncClonesToCallMap) {
2725*06c3fb27SDimitry Andric                 if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
2726*06c3fb27SDimitry Andric                   FuncCloneAssignedToCurCallsiteClone = CF.first;
2727*06c3fb27SDimitry Andric                   break;
2728*06c3fb27SDimitry Andric                 }
2729*06c3fb27SDimitry Andric               }
2730*06c3fb27SDimitry Andric               assert(FuncCloneAssignedToCurCallsiteClone);
2731*06c3fb27SDimitry Andric               // Assign Clone to FuncCloneAssignedToCurCallsiteClone
2732*06c3fb27SDimitry Andric               AssignCallsiteCloneToFuncClone(
2733*06c3fb27SDimitry Andric                   FuncCloneAssignedToCurCallsiteClone, Call, Clone,
2734*06c3fb27SDimitry Andric                   AllocationCallToContextNodeMap.count(Call));
2735*06c3fb27SDimitry Andric             } else
2736*06c3fb27SDimitry Andric               assert(FuncCloneToCurNodeCloneMap
2737*06c3fb27SDimitry Andric                          [FuncCloneAssignedToCurCallsiteClone] == Clone);
2738*06c3fb27SDimitry Andric             // Update callers to record function version called.
2739*06c3fb27SDimitry Andric             RecordCalleeFuncOfCallsite(Edge->Caller,
2740*06c3fb27SDimitry Andric                                        FuncCloneAssignedToCurCallsiteClone);
2741*06c3fb27SDimitry Andric           }
2742*06c3fb27SDimitry Andric 
2743*06c3fb27SDimitry Andric           EI++;
2744*06c3fb27SDimitry Andric         }
2745*06c3fb27SDimitry Andric       }
2746*06c3fb27SDimitry Andric       if (VerifyCCG) {
2747*06c3fb27SDimitry Andric         checkNode<DerivedCCG, FuncTy, CallTy>(Node);
2748*06c3fb27SDimitry Andric         for (const auto &PE : Node->CalleeEdges)
2749*06c3fb27SDimitry Andric           checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
2750*06c3fb27SDimitry Andric         for (const auto &CE : Node->CallerEdges)
2751*06c3fb27SDimitry Andric           checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller);
2752*06c3fb27SDimitry Andric         for (auto *Clone : Node->Clones) {
2753*06c3fb27SDimitry Andric           checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
2754*06c3fb27SDimitry Andric           for (const auto &PE : Clone->CalleeEdges)
2755*06c3fb27SDimitry Andric             checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
2756*06c3fb27SDimitry Andric           for (const auto &CE : Clone->CallerEdges)
2757*06c3fb27SDimitry Andric             checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller);
2758*06c3fb27SDimitry Andric         }
2759*06c3fb27SDimitry Andric       }
2760*06c3fb27SDimitry Andric     }
2761*06c3fb27SDimitry Andric   }
2762*06c3fb27SDimitry Andric 
2763*06c3fb27SDimitry Andric   auto UpdateCalls = [&](ContextNode *Node,
2764*06c3fb27SDimitry Andric                          DenseSet<const ContextNode *> &Visited,
2765*06c3fb27SDimitry Andric                          auto &&UpdateCalls) {
2766*06c3fb27SDimitry Andric     auto Inserted = Visited.insert(Node);
2767*06c3fb27SDimitry Andric     if (!Inserted.second)
2768*06c3fb27SDimitry Andric       return;
2769*06c3fb27SDimitry Andric 
2770*06c3fb27SDimitry Andric     for (auto *Clone : Node->Clones)
2771*06c3fb27SDimitry Andric       UpdateCalls(Clone, Visited, UpdateCalls);
2772*06c3fb27SDimitry Andric 
2773*06c3fb27SDimitry Andric     for (auto &Edge : Node->CallerEdges)
2774*06c3fb27SDimitry Andric       UpdateCalls(Edge->Caller, Visited, UpdateCalls);
2775*06c3fb27SDimitry Andric 
2776*06c3fb27SDimitry Andric     // Skip if either no call to update, or if we ended up with no context ids
2777*06c3fb27SDimitry Andric     // (we moved all edges onto other clones).
2778*06c3fb27SDimitry Andric     if (!Node->hasCall() || Node->ContextIds.empty())
2779*06c3fb27SDimitry Andric       return;
2780*06c3fb27SDimitry Andric 
2781*06c3fb27SDimitry Andric     if (Node->IsAllocation) {
2782*06c3fb27SDimitry Andric       updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes));
2783*06c3fb27SDimitry Andric       return;
2784*06c3fb27SDimitry Andric     }
2785*06c3fb27SDimitry Andric 
2786*06c3fb27SDimitry Andric     if (!CallsiteToCalleeFuncCloneMap.count(Node))
2787*06c3fb27SDimitry Andric       return;
2788*06c3fb27SDimitry Andric 
2789*06c3fb27SDimitry Andric     auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node];
2790*06c3fb27SDimitry Andric     updateCall(Node->Call, CalleeFunc);
2791*06c3fb27SDimitry Andric   };
2792*06c3fb27SDimitry Andric 
2793*06c3fb27SDimitry Andric   // Performs DFS traversal starting from allocation nodes to update calls to
2794*06c3fb27SDimitry Andric   // reflect cloning decisions recorded earlier. For regular LTO this will
2795*06c3fb27SDimitry Andric   // update the actual calls in the IR to call the appropriate function clone
2796*06c3fb27SDimitry Andric   // (and add attributes to allocation calls), whereas for ThinLTO the decisions
2797*06c3fb27SDimitry Andric   // are recorded in the summary entries.
2798*06c3fb27SDimitry Andric   DenseSet<const ContextNode *> Visited;
2799*06c3fb27SDimitry Andric   for (auto &Entry : AllocationCallToContextNodeMap)
2800*06c3fb27SDimitry Andric     UpdateCalls(Entry.second, Visited, UpdateCalls);
2801*06c3fb27SDimitry Andric 
2802*06c3fb27SDimitry Andric   return Changed;
2803*06c3fb27SDimitry Andric }
2804*06c3fb27SDimitry Andric 
2805*06c3fb27SDimitry Andric static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
2806*06c3fb27SDimitry Andric     Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE,
2807*06c3fb27SDimitry Andric     std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
2808*06c3fb27SDimitry Andric         &FuncToAliasMap) {
2809*06c3fb27SDimitry Andric   // The first "clone" is the original copy, we should only call this if we
2810*06c3fb27SDimitry Andric   // needed to create new clones.
2811*06c3fb27SDimitry Andric   assert(NumClones > 1);
2812*06c3fb27SDimitry Andric   SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
2813*06c3fb27SDimitry Andric   VMaps.reserve(NumClones - 1);
2814*06c3fb27SDimitry Andric   FunctionsClonedThinBackend++;
2815*06c3fb27SDimitry Andric   for (unsigned I = 1; I < NumClones; I++) {
2816*06c3fb27SDimitry Andric     VMaps.emplace_back(std::make_unique<ValueToValueMapTy>());
2817*06c3fb27SDimitry Andric     auto *NewF = CloneFunction(&F, *VMaps.back());
2818*06c3fb27SDimitry Andric     FunctionClonesThinBackend++;
2819*06c3fb27SDimitry Andric     // Strip memprof and callsite metadata from clone as they are no longer
2820*06c3fb27SDimitry Andric     // needed.
2821*06c3fb27SDimitry Andric     for (auto &BB : *NewF) {
2822*06c3fb27SDimitry Andric       for (auto &Inst : BB) {
2823*06c3fb27SDimitry Andric         Inst.setMetadata(LLVMContext::MD_memprof, nullptr);
2824*06c3fb27SDimitry Andric         Inst.setMetadata(LLVMContext::MD_callsite, nullptr);
2825*06c3fb27SDimitry Andric       }
2826*06c3fb27SDimitry Andric     }
2827*06c3fb27SDimitry Andric     std::string Name = getMemProfFuncName(F.getName(), I);
2828*06c3fb27SDimitry Andric     auto *PrevF = M.getFunction(Name);
2829*06c3fb27SDimitry Andric     if (PrevF) {
2830*06c3fb27SDimitry Andric       // We might have created this when adjusting callsite in another
2831*06c3fb27SDimitry Andric       // function. It should be a declaration.
2832*06c3fb27SDimitry Andric       assert(PrevF->isDeclaration());
2833*06c3fb27SDimitry Andric       NewF->takeName(PrevF);
2834*06c3fb27SDimitry Andric       PrevF->replaceAllUsesWith(NewF);
2835*06c3fb27SDimitry Andric       PrevF->eraseFromParent();
2836*06c3fb27SDimitry Andric     } else
2837*06c3fb27SDimitry Andric       NewF->setName(Name);
2838*06c3fb27SDimitry Andric     ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
2839*06c3fb27SDimitry Andric              << "created clone " << ore::NV("NewFunction", NewF));
2840*06c3fb27SDimitry Andric 
2841*06c3fb27SDimitry Andric     // Now handle aliases to this function, and clone those as well.
2842*06c3fb27SDimitry Andric     if (!FuncToAliasMap.count(&F))
2843*06c3fb27SDimitry Andric       continue;
2844*06c3fb27SDimitry Andric     for (auto *A : FuncToAliasMap[&F]) {
2845*06c3fb27SDimitry Andric       std::string Name = getMemProfFuncName(A->getName(), I);
2846*06c3fb27SDimitry Andric       auto *PrevA = M.getNamedAlias(Name);
2847*06c3fb27SDimitry Andric       auto *NewA = GlobalAlias::create(A->getValueType(),
2848*06c3fb27SDimitry Andric                                        A->getType()->getPointerAddressSpace(),
2849*06c3fb27SDimitry Andric                                        A->getLinkage(), Name, NewF);
2850*06c3fb27SDimitry Andric       NewA->copyAttributesFrom(A);
2851*06c3fb27SDimitry Andric       if (PrevA) {
2852*06c3fb27SDimitry Andric         // We might have created this when adjusting callsite in another
2853*06c3fb27SDimitry Andric         // function. It should be a declaration.
2854*06c3fb27SDimitry Andric         assert(PrevA->isDeclaration());
2855*06c3fb27SDimitry Andric         NewA->takeName(PrevA);
2856*06c3fb27SDimitry Andric         PrevA->replaceAllUsesWith(NewA);
2857*06c3fb27SDimitry Andric         PrevA->eraseFromParent();
2858*06c3fb27SDimitry Andric       }
2859*06c3fb27SDimitry Andric     }
2860*06c3fb27SDimitry Andric   }
2861*06c3fb27SDimitry Andric   return VMaps;
2862*06c3fb27SDimitry Andric }
2863*06c3fb27SDimitry Andric 
2864*06c3fb27SDimitry Andric // Locate the summary for F. This is complicated by the fact that it might
2865*06c3fb27SDimitry Andric // have been internalized or promoted.
2866*06c3fb27SDimitry Andric static ValueInfo findValueInfoForFunc(const Function &F, const Module &M,
2867*06c3fb27SDimitry Andric                                       const ModuleSummaryIndex *ImportSummary) {
2868*06c3fb27SDimitry Andric   // FIXME: Ideally we would retain the original GUID in some fashion on the
2869*06c3fb27SDimitry Andric   // function (e.g. as metadata), but for now do our best to locate the
2870*06c3fb27SDimitry Andric   // summary without that information.
2871*06c3fb27SDimitry Andric   ValueInfo TheFnVI = ImportSummary->getValueInfo(F.getGUID());
2872*06c3fb27SDimitry Andric   if (!TheFnVI)
2873*06c3fb27SDimitry Andric     // See if theFn was internalized, by checking index directly with
2874*06c3fb27SDimitry Andric     // original name (this avoids the name adjustment done by getGUID() for
2875*06c3fb27SDimitry Andric     // internal symbols).
2876*06c3fb27SDimitry Andric     TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(F.getName()));
2877*06c3fb27SDimitry Andric   if (TheFnVI)
2878*06c3fb27SDimitry Andric     return TheFnVI;
2879*06c3fb27SDimitry Andric   // Now query with the original name before any promotion was performed.
2880*06c3fb27SDimitry Andric   StringRef OrigName =
2881*06c3fb27SDimitry Andric       ModuleSummaryIndex::getOriginalNameBeforePromote(F.getName());
2882*06c3fb27SDimitry Andric   std::string OrigId = GlobalValue::getGlobalIdentifier(
2883*06c3fb27SDimitry Andric       OrigName, GlobalValue::InternalLinkage, M.getSourceFileName());
2884*06c3fb27SDimitry Andric   TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(OrigId));
2885*06c3fb27SDimitry Andric   if (TheFnVI)
2886*06c3fb27SDimitry Andric     return TheFnVI;
2887*06c3fb27SDimitry Andric   // Could be a promoted local imported from another module. We need to pass
2888*06c3fb27SDimitry Andric   // down more info here to find the original module id. For now, try with
2889*06c3fb27SDimitry Andric   // the OrigName which might have been stored in the OidGuidMap in the
2890*06c3fb27SDimitry Andric   // index. This would not work if there were same-named locals in multiple
2891*06c3fb27SDimitry Andric   // modules, however.
2892*06c3fb27SDimitry Andric   auto OrigGUID =
2893*06c3fb27SDimitry Andric       ImportSummary->getGUIDFromOriginalID(GlobalValue::getGUID(OrigName));
2894*06c3fb27SDimitry Andric   if (OrigGUID)
2895*06c3fb27SDimitry Andric     TheFnVI = ImportSummary->getValueInfo(OrigGUID);
2896*06c3fb27SDimitry Andric   return TheFnVI;
2897*06c3fb27SDimitry Andric }
2898*06c3fb27SDimitry Andric 
2899*06c3fb27SDimitry Andric bool MemProfContextDisambiguation::applyImport(Module &M) {
2900*06c3fb27SDimitry Andric   assert(ImportSummary);
2901*06c3fb27SDimitry Andric   bool Changed = false;
2902*06c3fb27SDimitry Andric 
2903*06c3fb27SDimitry Andric   auto IsMemProfClone = [](const Function &F) {
2904*06c3fb27SDimitry Andric     return F.getName().contains(MemProfCloneSuffix);
2905*06c3fb27SDimitry Andric   };
2906*06c3fb27SDimitry Andric 
2907*06c3fb27SDimitry Andric   // We also need to clone any aliases that reference cloned functions, because
2908*06c3fb27SDimitry Andric   // the modified callsites may invoke via the alias. Keep track of the aliases
2909*06c3fb27SDimitry Andric   // for each function.
2910*06c3fb27SDimitry Andric   std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
2911*06c3fb27SDimitry Andric       FuncToAliasMap;
2912*06c3fb27SDimitry Andric   for (auto &A : M.aliases()) {
2913*06c3fb27SDimitry Andric     auto *Aliasee = A.getAliaseeObject();
2914*06c3fb27SDimitry Andric     if (auto *F = dyn_cast<Function>(Aliasee))
2915*06c3fb27SDimitry Andric       FuncToAliasMap[F].insert(&A);
2916*06c3fb27SDimitry Andric   }
2917*06c3fb27SDimitry Andric 
2918*06c3fb27SDimitry Andric   for (auto &F : M) {
2919*06c3fb27SDimitry Andric     if (F.isDeclaration() || IsMemProfClone(F))
2920*06c3fb27SDimitry Andric       continue;
2921*06c3fb27SDimitry Andric 
2922*06c3fb27SDimitry Andric     OptimizationRemarkEmitter ORE(&F);
2923*06c3fb27SDimitry Andric 
2924*06c3fb27SDimitry Andric     SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
2925*06c3fb27SDimitry Andric     bool ClonesCreated = false;
2926*06c3fb27SDimitry Andric     unsigned NumClonesCreated = 0;
2927*06c3fb27SDimitry Andric     auto CloneFuncIfNeeded = [&](unsigned NumClones) {
2928*06c3fb27SDimitry Andric       // We should at least have version 0 which is the original copy.
2929*06c3fb27SDimitry Andric       assert(NumClones > 0);
2930*06c3fb27SDimitry Andric       // If only one copy needed use original.
2931*06c3fb27SDimitry Andric       if (NumClones == 1)
2932*06c3fb27SDimitry Andric         return;
2933*06c3fb27SDimitry Andric       // If we already performed cloning of this function, confirm that the
2934*06c3fb27SDimitry Andric       // requested number of clones matches (the thin link should ensure the
2935*06c3fb27SDimitry Andric       // number of clones for each constituent callsite is consistent within
2936*06c3fb27SDimitry Andric       // each function), before returning.
2937*06c3fb27SDimitry Andric       if (ClonesCreated) {
2938*06c3fb27SDimitry Andric         assert(NumClonesCreated == NumClones);
2939*06c3fb27SDimitry Andric         return;
2940*06c3fb27SDimitry Andric       }
2941*06c3fb27SDimitry Andric       VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap);
2942*06c3fb27SDimitry Andric       // The first "clone" is the original copy, which doesn't have a VMap.
2943*06c3fb27SDimitry Andric       assert(VMaps.size() == NumClones - 1);
2944*06c3fb27SDimitry Andric       Changed = true;
2945*06c3fb27SDimitry Andric       ClonesCreated = true;
2946*06c3fb27SDimitry Andric       NumClonesCreated = NumClones;
2947*06c3fb27SDimitry Andric     };
2948*06c3fb27SDimitry Andric 
2949*06c3fb27SDimitry Andric     // Locate the summary for F.
2950*06c3fb27SDimitry Andric     ValueInfo TheFnVI = findValueInfoForFunc(F, M, ImportSummary);
2951*06c3fb27SDimitry Andric     // If not found, this could be an imported local (see comment in
2952*06c3fb27SDimitry Andric     // findValueInfoForFunc). Skip for now as it will be cloned in its original
2953*06c3fb27SDimitry Andric     // module (where it would have been promoted to global scope so should
2954*06c3fb27SDimitry Andric     // satisfy any reference in this module).
2955*06c3fb27SDimitry Andric     if (!TheFnVI)
2956*06c3fb27SDimitry Andric       continue;
2957*06c3fb27SDimitry Andric 
2958*06c3fb27SDimitry Andric     auto *GVSummary =
2959*06c3fb27SDimitry Andric         ImportSummary->findSummaryInModule(TheFnVI, M.getModuleIdentifier());
2960*06c3fb27SDimitry Andric     if (!GVSummary)
2961*06c3fb27SDimitry Andric       // Must have been imported, use the first summary (might be multiple if
2962*06c3fb27SDimitry Andric       // this was a linkonce_odr).
2963*06c3fb27SDimitry Andric       GVSummary = TheFnVI.getSummaryList().front().get();
2964*06c3fb27SDimitry Andric 
2965*06c3fb27SDimitry Andric     // If this was an imported alias skip it as we won't have the function
2966*06c3fb27SDimitry Andric     // summary, and it should be cloned in the original module.
2967*06c3fb27SDimitry Andric     if (isa<AliasSummary>(GVSummary))
2968*06c3fb27SDimitry Andric       continue;
2969*06c3fb27SDimitry Andric 
2970*06c3fb27SDimitry Andric     auto *FS = cast<FunctionSummary>(GVSummary->getBaseObject());
2971*06c3fb27SDimitry Andric 
2972*06c3fb27SDimitry Andric     if (FS->allocs().empty() && FS->callsites().empty())
2973*06c3fb27SDimitry Andric       continue;
2974*06c3fb27SDimitry Andric 
2975*06c3fb27SDimitry Andric     auto SI = FS->callsites().begin();
2976*06c3fb27SDimitry Andric     auto AI = FS->allocs().begin();
2977*06c3fb27SDimitry Andric 
2978*06c3fb27SDimitry Andric     // Assume for now that the instructions are in the exact same order
2979*06c3fb27SDimitry Andric     // as when the summary was created, but confirm this is correct by
2980*06c3fb27SDimitry Andric     // matching the stack ids.
2981*06c3fb27SDimitry Andric     for (auto &BB : F) {
2982*06c3fb27SDimitry Andric       for (auto &I : BB) {
2983*06c3fb27SDimitry Andric         auto *CB = dyn_cast<CallBase>(&I);
2984*06c3fb27SDimitry Andric         // Same handling as when creating module summary.
2985*06c3fb27SDimitry Andric         if (!mayHaveMemprofSummary(CB))
2986*06c3fb27SDimitry Andric           continue;
2987*06c3fb27SDimitry Andric 
2988*06c3fb27SDimitry Andric         CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2989*06c3fb27SDimitry Andric             I.getMetadata(LLVMContext::MD_callsite));
2990*06c3fb27SDimitry Andric         auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof);
2991*06c3fb27SDimitry Andric 
2992*06c3fb27SDimitry Andric         // Include allocs that were already assigned a memprof function
2993*06c3fb27SDimitry Andric         // attribute in the statistics.
2994*06c3fb27SDimitry Andric         if (CB->getAttributes().hasFnAttr("memprof")) {
2995*06c3fb27SDimitry Andric           assert(!MemProfMD);
2996*06c3fb27SDimitry Andric           CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold"
2997*06c3fb27SDimitry Andric               ? AllocTypeColdThinBackend++
2998*06c3fb27SDimitry Andric               : AllocTypeNotColdThinBackend++;
2999*06c3fb27SDimitry Andric           OrigAllocsThinBackend++;
3000*06c3fb27SDimitry Andric           AllocVersionsThinBackend++;
3001*06c3fb27SDimitry Andric           if (!MaxAllocVersionsThinBackend)
3002*06c3fb27SDimitry Andric             MaxAllocVersionsThinBackend = 1;
3003*06c3fb27SDimitry Andric           // Remove any remaining callsite metadata and we can skip the rest of
3004*06c3fb27SDimitry Andric           // the handling for this instruction, since no cloning needed.
3005*06c3fb27SDimitry Andric           I.setMetadata(LLVMContext::MD_callsite, nullptr);
3006*06c3fb27SDimitry Andric           continue;
3007*06c3fb27SDimitry Andric         }
3008*06c3fb27SDimitry Andric 
3009*06c3fb27SDimitry Andric         if (MemProfMD) {
3010*06c3fb27SDimitry Andric           // Consult the next alloc node.
3011*06c3fb27SDimitry Andric           assert(AI != FS->allocs().end());
3012*06c3fb27SDimitry Andric           auto &AllocNode = *(AI++);
3013*06c3fb27SDimitry Andric 
3014*06c3fb27SDimitry Andric           // Sanity check that the MIB stack ids match between the summary and
3015*06c3fb27SDimitry Andric           // instruction metadata.
3016*06c3fb27SDimitry Andric           auto MIBIter = AllocNode.MIBs.begin();
3017*06c3fb27SDimitry Andric           for (auto &MDOp : MemProfMD->operands()) {
3018*06c3fb27SDimitry Andric             assert(MIBIter != AllocNode.MIBs.end());
3019*06c3fb27SDimitry Andric             LLVM_ATTRIBUTE_UNUSED auto StackIdIndexIter =
3020*06c3fb27SDimitry Andric                 MIBIter->StackIdIndices.begin();
3021*06c3fb27SDimitry Andric             auto *MIBMD = cast<const MDNode>(MDOp);
3022*06c3fb27SDimitry Andric             MDNode *StackMDNode = getMIBStackNode(MIBMD);
3023*06c3fb27SDimitry Andric             assert(StackMDNode);
3024*06c3fb27SDimitry Andric             SmallVector<unsigned> StackIdsFromMetadata;
3025*06c3fb27SDimitry Andric             CallStack<MDNode, MDNode::op_iterator> StackContext(StackMDNode);
3026*06c3fb27SDimitry Andric             for (auto ContextIter =
3027*06c3fb27SDimitry Andric                      StackContext.beginAfterSharedPrefix(CallsiteContext);
3028*06c3fb27SDimitry Andric                  ContextIter != StackContext.end(); ++ContextIter) {
3029*06c3fb27SDimitry Andric               // If this is a direct recursion, simply skip the duplicate
3030*06c3fb27SDimitry Andric               // entries, to be consistent with how the summary ids were
3031*06c3fb27SDimitry Andric               // generated during ModuleSummaryAnalysis.
3032*06c3fb27SDimitry Andric               if (!StackIdsFromMetadata.empty() &&
3033*06c3fb27SDimitry Andric                   StackIdsFromMetadata.back() == *ContextIter)
3034*06c3fb27SDimitry Andric                 continue;
3035*06c3fb27SDimitry Andric               assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
3036*06c3fb27SDimitry Andric               assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
3037*06c3fb27SDimitry Andric                      *ContextIter);
3038*06c3fb27SDimitry Andric               StackIdIndexIter++;
3039*06c3fb27SDimitry Andric             }
3040*06c3fb27SDimitry Andric             MIBIter++;
3041*06c3fb27SDimitry Andric           }
3042*06c3fb27SDimitry Andric 
3043*06c3fb27SDimitry Andric           // Perform cloning if not yet done.
3044*06c3fb27SDimitry Andric           CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size());
3045*06c3fb27SDimitry Andric 
3046*06c3fb27SDimitry Andric           OrigAllocsThinBackend++;
3047*06c3fb27SDimitry Andric           AllocVersionsThinBackend += AllocNode.Versions.size();
3048*06c3fb27SDimitry Andric           if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
3049*06c3fb27SDimitry Andric             MaxAllocVersionsThinBackend = AllocNode.Versions.size();
3050*06c3fb27SDimitry Andric 
3051*06c3fb27SDimitry Andric           // If there is only one version that means we didn't end up
3052*06c3fb27SDimitry Andric           // considering this function for cloning, and in that case the alloc
3053*06c3fb27SDimitry Andric           // will still be none type or should have gotten the default NotCold.
3054*06c3fb27SDimitry Andric           // Skip that after calling clone helper since that does some sanity
3055*06c3fb27SDimitry Andric           // checks that confirm we haven't decided yet that we need cloning.
3056*06c3fb27SDimitry Andric           if (AllocNode.Versions.size() == 1) {
3057*06c3fb27SDimitry Andric             assert((AllocationType)AllocNode.Versions[0] ==
3058*06c3fb27SDimitry Andric                        AllocationType::NotCold ||
3059*06c3fb27SDimitry Andric                    (AllocationType)AllocNode.Versions[0] ==
3060*06c3fb27SDimitry Andric                        AllocationType::None);
3061*06c3fb27SDimitry Andric             UnclonableAllocsThinBackend++;
3062*06c3fb27SDimitry Andric             continue;
3063*06c3fb27SDimitry Andric           }
3064*06c3fb27SDimitry Andric 
3065*06c3fb27SDimitry Andric           // All versions should have a singular allocation type.
3066*06c3fb27SDimitry Andric           assert(llvm::none_of(AllocNode.Versions, [](uint8_t Type) {
3067*06c3fb27SDimitry Andric             return Type == ((uint8_t)AllocationType::NotCold |
3068*06c3fb27SDimitry Andric                             (uint8_t)AllocationType::Cold);
3069*06c3fb27SDimitry Andric           }));
3070*06c3fb27SDimitry Andric 
3071*06c3fb27SDimitry Andric           // Update the allocation types per the summary info.
3072*06c3fb27SDimitry Andric           for (unsigned J = 0; J < AllocNode.Versions.size(); J++) {
3073*06c3fb27SDimitry Andric             // Ignore any that didn't get an assigned allocation type.
3074*06c3fb27SDimitry Andric             if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
3075*06c3fb27SDimitry Andric               continue;
3076*06c3fb27SDimitry Andric             AllocationType AllocTy = (AllocationType)AllocNode.Versions[J];
3077*06c3fb27SDimitry Andric             AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
3078*06c3fb27SDimitry Andric                                             : AllocTypeNotColdThinBackend++;
3079*06c3fb27SDimitry Andric             std::string AllocTypeString = getAllocTypeAttributeString(AllocTy);
3080*06c3fb27SDimitry Andric             auto A = llvm::Attribute::get(F.getContext(), "memprof",
3081*06c3fb27SDimitry Andric                                           AllocTypeString);
3082*06c3fb27SDimitry Andric             CallBase *CBClone;
3083*06c3fb27SDimitry Andric             // Copy 0 is the original function.
3084*06c3fb27SDimitry Andric             if (!J)
3085*06c3fb27SDimitry Andric               CBClone = CB;
3086*06c3fb27SDimitry Andric             else
3087*06c3fb27SDimitry Andric               // Since VMaps are only created for new clones, we index with
3088*06c3fb27SDimitry Andric               // clone J-1 (J==0 is the original clone and does not have a VMaps
3089*06c3fb27SDimitry Andric               // entry).
3090*06c3fb27SDimitry Andric               CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
3091*06c3fb27SDimitry Andric             CBClone->addFnAttr(A);
3092*06c3fb27SDimitry Andric             ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", CBClone)
3093*06c3fb27SDimitry Andric                      << ore::NV("AllocationCall", CBClone) << " in clone "
3094*06c3fb27SDimitry Andric                      << ore::NV("Caller", CBClone->getFunction())
3095*06c3fb27SDimitry Andric                      << " marked with memprof allocation attribute "
3096*06c3fb27SDimitry Andric                      << ore::NV("Attribute", AllocTypeString));
3097*06c3fb27SDimitry Andric           }
3098*06c3fb27SDimitry Andric         } else if (!CallsiteContext.empty()) {
3099*06c3fb27SDimitry Andric           // Consult the next callsite node.
3100*06c3fb27SDimitry Andric           assert(SI != FS->callsites().end());
3101*06c3fb27SDimitry Andric           auto &StackNode = *(SI++);
3102*06c3fb27SDimitry Andric 
3103*06c3fb27SDimitry Andric #ifndef NDEBUG
3104*06c3fb27SDimitry Andric           // Sanity check that the stack ids match between the summary and
3105*06c3fb27SDimitry Andric           // instruction metadata.
3106*06c3fb27SDimitry Andric           auto StackIdIndexIter = StackNode.StackIdIndices.begin();
3107*06c3fb27SDimitry Andric           for (auto StackId : CallsiteContext) {
3108*06c3fb27SDimitry Andric             assert(StackIdIndexIter != StackNode.StackIdIndices.end());
3109*06c3fb27SDimitry Andric             assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
3110*06c3fb27SDimitry Andric                    StackId);
3111*06c3fb27SDimitry Andric             StackIdIndexIter++;
3112*06c3fb27SDimitry Andric           }
3113*06c3fb27SDimitry Andric #endif
3114*06c3fb27SDimitry Andric 
3115*06c3fb27SDimitry Andric           // Perform cloning if not yet done.
3116*06c3fb27SDimitry Andric           CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size());
3117*06c3fb27SDimitry Andric 
3118*06c3fb27SDimitry Andric           // Should have skipped indirect calls via mayHaveMemprofSummary.
3119*06c3fb27SDimitry Andric           assert(CB->getCalledFunction());
3120*06c3fb27SDimitry Andric           assert(!IsMemProfClone(*CB->getCalledFunction()));
3121*06c3fb27SDimitry Andric 
3122*06c3fb27SDimitry Andric           // Update the calls per the summary info.
3123*06c3fb27SDimitry Andric           // Save orig name since it gets updated in the first iteration
3124*06c3fb27SDimitry Andric           // below.
3125*06c3fb27SDimitry Andric           auto CalleeOrigName = CB->getCalledFunction()->getName();
3126*06c3fb27SDimitry Andric           for (unsigned J = 0; J < StackNode.Clones.size(); J++) {
3127*06c3fb27SDimitry Andric             // Do nothing if this version calls the original version of its
3128*06c3fb27SDimitry Andric             // callee.
3129*06c3fb27SDimitry Andric             if (!StackNode.Clones[J])
3130*06c3fb27SDimitry Andric               continue;
3131*06c3fb27SDimitry Andric             auto NewF = M.getOrInsertFunction(
3132*06c3fb27SDimitry Andric                 getMemProfFuncName(CalleeOrigName, StackNode.Clones[J]),
3133*06c3fb27SDimitry Andric                 CB->getCalledFunction()->getFunctionType());
3134*06c3fb27SDimitry Andric             CallBase *CBClone;
3135*06c3fb27SDimitry Andric             // Copy 0 is the original function.
3136*06c3fb27SDimitry Andric             if (!J)
3137*06c3fb27SDimitry Andric               CBClone = CB;
3138*06c3fb27SDimitry Andric             else
3139*06c3fb27SDimitry Andric               CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
3140*06c3fb27SDimitry Andric             CBClone->setCalledFunction(NewF);
3141*06c3fb27SDimitry Andric             ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
3142*06c3fb27SDimitry Andric                      << ore::NV("Call", CBClone) << " in clone "
3143*06c3fb27SDimitry Andric                      << ore::NV("Caller", CBClone->getFunction())
3144*06c3fb27SDimitry Andric                      << " assigned to call function clone "
3145*06c3fb27SDimitry Andric                      << ore::NV("Callee", NewF.getCallee()));
3146*06c3fb27SDimitry Andric           }
3147*06c3fb27SDimitry Andric         }
3148*06c3fb27SDimitry Andric         // Memprof and callsite metadata on memory allocations no longer needed.
3149*06c3fb27SDimitry Andric         I.setMetadata(LLVMContext::MD_memprof, nullptr);
3150*06c3fb27SDimitry Andric         I.setMetadata(LLVMContext::MD_callsite, nullptr);
3151*06c3fb27SDimitry Andric       }
3152*06c3fb27SDimitry Andric     }
3153*06c3fb27SDimitry Andric   }
3154*06c3fb27SDimitry Andric 
3155*06c3fb27SDimitry Andric   return Changed;
3156*06c3fb27SDimitry Andric }
3157*06c3fb27SDimitry Andric 
3158*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
3159*06c3fb27SDimitry Andric bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
3160*06c3fb27SDimitry Andric   if (DumpCCG) {
3161*06c3fb27SDimitry Andric     dbgs() << "CCG before cloning:\n";
3162*06c3fb27SDimitry Andric     dbgs() << *this;
3163*06c3fb27SDimitry Andric   }
3164*06c3fb27SDimitry Andric   if (ExportToDot)
3165*06c3fb27SDimitry Andric     exportToDot("postbuild");
3166*06c3fb27SDimitry Andric 
3167*06c3fb27SDimitry Andric   if (VerifyCCG) {
3168*06c3fb27SDimitry Andric     check();
3169*06c3fb27SDimitry Andric   }
3170*06c3fb27SDimitry Andric 
3171*06c3fb27SDimitry Andric   identifyClones();
3172*06c3fb27SDimitry Andric 
3173*06c3fb27SDimitry Andric   if (VerifyCCG) {
3174*06c3fb27SDimitry Andric     check();
3175*06c3fb27SDimitry Andric   }
3176*06c3fb27SDimitry Andric 
3177*06c3fb27SDimitry Andric   if (DumpCCG) {
3178*06c3fb27SDimitry Andric     dbgs() << "CCG after cloning:\n";
3179*06c3fb27SDimitry Andric     dbgs() << *this;
3180*06c3fb27SDimitry Andric   }
3181*06c3fb27SDimitry Andric   if (ExportToDot)
3182*06c3fb27SDimitry Andric     exportToDot("cloned");
3183*06c3fb27SDimitry Andric 
3184*06c3fb27SDimitry Andric   bool Changed = assignFunctions();
3185*06c3fb27SDimitry Andric 
3186*06c3fb27SDimitry Andric   if (DumpCCG) {
3187*06c3fb27SDimitry Andric     dbgs() << "CCG after assigning function clones:\n";
3188*06c3fb27SDimitry Andric     dbgs() << *this;
3189*06c3fb27SDimitry Andric   }
3190*06c3fb27SDimitry Andric   if (ExportToDot)
3191*06c3fb27SDimitry Andric     exportToDot("clonefuncassign");
3192*06c3fb27SDimitry Andric 
3193*06c3fb27SDimitry Andric   return Changed;
3194*06c3fb27SDimitry Andric }
3195*06c3fb27SDimitry Andric 
3196*06c3fb27SDimitry Andric bool MemProfContextDisambiguation::processModule(
3197*06c3fb27SDimitry Andric     Module &M,
3198*06c3fb27SDimitry Andric     function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
3199*06c3fb27SDimitry Andric 
3200*06c3fb27SDimitry Andric   // If we have an import summary, then the cloning decisions were made during
3201*06c3fb27SDimitry Andric   // the thin link on the index. Apply them and return.
3202*06c3fb27SDimitry Andric   if (ImportSummary)
3203*06c3fb27SDimitry Andric     return applyImport(M);
3204*06c3fb27SDimitry Andric 
3205*06c3fb27SDimitry Andric   // TODO: If/when other types of memprof cloning are enabled beyond just for
3206*06c3fb27SDimitry Andric   // hot and cold, we will need to change this to individually control the
3207*06c3fb27SDimitry Andric   // AllocationType passed to addStackNodesForMIB during CCG construction.
3208*06c3fb27SDimitry Andric   // Note that we specifically check this after applying imports above, so that
3209*06c3fb27SDimitry Andric   // the option isn't needed to be passed to distributed ThinLTO backend
3210*06c3fb27SDimitry Andric   // clang processes, which won't necessarily have visibility into the linker
3211*06c3fb27SDimitry Andric   // dependences. Instead the information is communicated from the LTO link to
3212*06c3fb27SDimitry Andric   // the backends via the combined summary index.
3213*06c3fb27SDimitry Andric   if (!SupportsHotColdNew)
3214*06c3fb27SDimitry Andric     return false;
3215*06c3fb27SDimitry Andric 
3216*06c3fb27SDimitry Andric   ModuleCallsiteContextGraph CCG(M, OREGetter);
3217*06c3fb27SDimitry Andric   return CCG.process();
3218*06c3fb27SDimitry Andric }
3219*06c3fb27SDimitry Andric 
3220*06c3fb27SDimitry Andric MemProfContextDisambiguation::MemProfContextDisambiguation(
3221*06c3fb27SDimitry Andric     const ModuleSummaryIndex *Summary)
3222*06c3fb27SDimitry Andric     : ImportSummary(Summary) {
3223*06c3fb27SDimitry Andric   if (ImportSummary) {
3224*06c3fb27SDimitry Andric     // The MemProfImportSummary should only be used for testing ThinLTO
3225*06c3fb27SDimitry Andric     // distributed backend handling via opt, in which case we don't have a
3226*06c3fb27SDimitry Andric     // summary from the pass pipeline.
3227*06c3fb27SDimitry Andric     assert(MemProfImportSummary.empty());
3228*06c3fb27SDimitry Andric     return;
3229*06c3fb27SDimitry Andric   }
3230*06c3fb27SDimitry Andric   if (MemProfImportSummary.empty())
3231*06c3fb27SDimitry Andric     return;
3232*06c3fb27SDimitry Andric 
3233*06c3fb27SDimitry Andric   auto ReadSummaryFile =
3234*06c3fb27SDimitry Andric       errorOrToExpected(MemoryBuffer::getFile(MemProfImportSummary));
3235*06c3fb27SDimitry Andric   if (!ReadSummaryFile) {
3236*06c3fb27SDimitry Andric     logAllUnhandledErrors(ReadSummaryFile.takeError(), errs(),
3237*06c3fb27SDimitry Andric                           "Error loading file '" + MemProfImportSummary +
3238*06c3fb27SDimitry Andric                               "': ");
3239*06c3fb27SDimitry Andric     return;
3240*06c3fb27SDimitry Andric   }
3241*06c3fb27SDimitry Andric   auto ImportSummaryForTestingOrErr = getModuleSummaryIndex(**ReadSummaryFile);
3242*06c3fb27SDimitry Andric   if (!ImportSummaryForTestingOrErr) {
3243*06c3fb27SDimitry Andric     logAllUnhandledErrors(ImportSummaryForTestingOrErr.takeError(), errs(),
3244*06c3fb27SDimitry Andric                           "Error parsing file '" + MemProfImportSummary +
3245*06c3fb27SDimitry Andric                               "': ");
3246*06c3fb27SDimitry Andric     return;
3247*06c3fb27SDimitry Andric   }
3248*06c3fb27SDimitry Andric   ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
3249*06c3fb27SDimitry Andric   ImportSummary = ImportSummaryForTesting.get();
3250*06c3fb27SDimitry Andric }
3251*06c3fb27SDimitry Andric 
3252*06c3fb27SDimitry Andric PreservedAnalyses MemProfContextDisambiguation::run(Module &M,
3253*06c3fb27SDimitry Andric                                                     ModuleAnalysisManager &AM) {
3254*06c3fb27SDimitry Andric   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
3255*06c3fb27SDimitry Andric   auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
3256*06c3fb27SDimitry Andric     return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
3257*06c3fb27SDimitry Andric   };
3258*06c3fb27SDimitry Andric   if (!processModule(M, OREGetter))
3259*06c3fb27SDimitry Andric     return PreservedAnalyses::all();
3260*06c3fb27SDimitry Andric   return PreservedAnalyses::none();
3261*06c3fb27SDimitry Andric }
3262*06c3fb27SDimitry Andric 
3263*06c3fb27SDimitry Andric void MemProfContextDisambiguation::run(
3264*06c3fb27SDimitry Andric     ModuleSummaryIndex &Index,
3265*06c3fb27SDimitry Andric     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
3266*06c3fb27SDimitry Andric         isPrevailing) {
3267*06c3fb27SDimitry Andric   // TODO: If/when other types of memprof cloning are enabled beyond just for
3268*06c3fb27SDimitry Andric   // hot and cold, we will need to change this to individually control the
3269*06c3fb27SDimitry Andric   // AllocationType passed to addStackNodesForMIB during CCG construction.
3270*06c3fb27SDimitry Andric   // The index was set from the option, so these should be in sync.
3271*06c3fb27SDimitry Andric   assert(Index.withSupportsHotColdNew() == SupportsHotColdNew);
3272*06c3fb27SDimitry Andric   if (!SupportsHotColdNew)
3273*06c3fb27SDimitry Andric     return;
3274*06c3fb27SDimitry Andric 
3275*06c3fb27SDimitry Andric   IndexCallsiteContextGraph CCG(Index, isPrevailing);
3276*06c3fb27SDimitry Andric   CCG.process();
3277*06c3fb27SDimitry Andric }
3278