xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
106c3fb27SDimitry Andric //==-- MemProfContextDisambiguation.cpp - Disambiguate contexts -------------=//
206c3fb27SDimitry Andric //
306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
606c3fb27SDimitry Andric //
706c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
806c3fb27SDimitry Andric //
906c3fb27SDimitry Andric // This file implements support for context disambiguation of allocation
1006c3fb27SDimitry Andric // calls for profile guided heap optimization. Specifically, it uses Memprof
1106c3fb27SDimitry Andric // profiles which indicate context specific allocation behavior (currently
1206c3fb27SDimitry Andric // distinguishing cold vs hot memory allocations). Cloning is performed to
1306c3fb27SDimitry Andric // expose the cold allocation call contexts, and the allocation calls are
1406c3fb27SDimitry Andric // subsequently annotated with an attribute for later transformation.
1506c3fb27SDimitry Andric //
1606c3fb27SDimitry Andric // The transformations can be performed either directly on IR (regular LTO), or
1706c3fb27SDimitry Andric // on a ThinLTO index (and later applied to the IR during the ThinLTO backend).
1806c3fb27SDimitry Andric // Both types of LTO operate on a the same base graph representation, which
1906c3fb27SDimitry Andric // uses CRTP to support either IR or Index formats.
2006c3fb27SDimitry Andric //
2106c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
2206c3fb27SDimitry Andric 
2306c3fb27SDimitry Andric #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
2406c3fb27SDimitry Andric #include "llvm/ADT/DenseMap.h"
2506c3fb27SDimitry Andric #include "llvm/ADT/DenseSet.h"
2606c3fb27SDimitry Andric #include "llvm/ADT/MapVector.h"
2706c3fb27SDimitry Andric #include "llvm/ADT/SetOperations.h"
2806c3fb27SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
2906c3fb27SDimitry Andric #include "llvm/ADT/SmallSet.h"
3006c3fb27SDimitry Andric #include "llvm/ADT/SmallVector.h"
3106c3fb27SDimitry Andric #include "llvm/ADT/Statistic.h"
3206c3fb27SDimitry Andric #include "llvm/Analysis/MemoryProfileInfo.h"
3306c3fb27SDimitry Andric #include "llvm/Analysis/ModuleSummaryAnalysis.h"
3406c3fb27SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
3506c3fb27SDimitry Andric #include "llvm/Bitcode/BitcodeReader.h"
3606c3fb27SDimitry Andric #include "llvm/IR/Constants.h"
3706c3fb27SDimitry Andric #include "llvm/IR/Instructions.h"
3806c3fb27SDimitry Andric #include "llvm/IR/Module.h"
3906c3fb27SDimitry Andric #include "llvm/IR/ModuleSummaryIndex.h"
4006c3fb27SDimitry Andric #include "llvm/Pass.h"
4106c3fb27SDimitry Andric #include "llvm/Support/CommandLine.h"
4206c3fb27SDimitry Andric #include "llvm/Support/FileSystem.h"
4306c3fb27SDimitry Andric #include "llvm/Support/GraphWriter.h"
4406c3fb27SDimitry Andric #include "llvm/Support/raw_ostream.h"
4506c3fb27SDimitry Andric #include "llvm/Transforms/IPO.h"
4606c3fb27SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h"
47*0fca6ea1SDimitry Andric #include <deque>
4806c3fb27SDimitry Andric #include <sstream>
497a6dacacSDimitry Andric #include <unordered_map>
5006c3fb27SDimitry Andric #include <vector>
5106c3fb27SDimitry Andric using namespace llvm;
5206c3fb27SDimitry Andric using namespace llvm::memprof;
5306c3fb27SDimitry Andric 
5406c3fb27SDimitry Andric #define DEBUG_TYPE "memprof-context-disambiguation"
5506c3fb27SDimitry Andric 
5606c3fb27SDimitry Andric STATISTIC(FunctionClonesAnalysis,
5706c3fb27SDimitry Andric           "Number of function clones created during whole program analysis");
5806c3fb27SDimitry Andric STATISTIC(FunctionClonesThinBackend,
5906c3fb27SDimitry Andric           "Number of function clones created during ThinLTO backend");
6006c3fb27SDimitry Andric STATISTIC(FunctionsClonedThinBackend,
6106c3fb27SDimitry Andric           "Number of functions that had clones created during ThinLTO backend");
6206c3fb27SDimitry Andric STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly "
6306c3fb27SDimitry Andric                             "cloned) during whole program analysis");
6406c3fb27SDimitry Andric STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) "
6506c3fb27SDimitry Andric                          "during whole program analysis");
6606c3fb27SDimitry Andric STATISTIC(AllocTypeNotColdThinBackend,
6706c3fb27SDimitry Andric           "Number of not cold static allocations (possibly cloned) during "
6806c3fb27SDimitry Andric           "ThinLTO backend");
6906c3fb27SDimitry Andric STATISTIC(AllocTypeColdThinBackend, "Number of cold static allocations "
7006c3fb27SDimitry Andric                                     "(possibly cloned) during ThinLTO backend");
7106c3fb27SDimitry Andric STATISTIC(OrigAllocsThinBackend,
7206c3fb27SDimitry Andric           "Number of original (not cloned) allocations with memprof profiles "
7306c3fb27SDimitry Andric           "during ThinLTO backend");
7406c3fb27SDimitry Andric STATISTIC(
7506c3fb27SDimitry Andric     AllocVersionsThinBackend,
7606c3fb27SDimitry Andric     "Number of allocation versions (including clones) during ThinLTO backend");
7706c3fb27SDimitry Andric STATISTIC(MaxAllocVersionsThinBackend,
7806c3fb27SDimitry Andric           "Maximum number of allocation versions created for an original "
7906c3fb27SDimitry Andric           "allocation during ThinLTO backend");
8006c3fb27SDimitry Andric STATISTIC(UnclonableAllocsThinBackend,
8106c3fb27SDimitry Andric           "Number of unclonable ambigous allocations during ThinLTO backend");
82297eecfbSDimitry Andric STATISTIC(RemovedEdgesWithMismatchedCallees,
83297eecfbSDimitry Andric           "Number of edges removed due to mismatched callees (profiled vs IR)");
84297eecfbSDimitry Andric STATISTIC(FoundProfiledCalleeCount,
85297eecfbSDimitry Andric           "Number of profiled callees found via tail calls");
86297eecfbSDimitry Andric STATISTIC(FoundProfiledCalleeDepth,
87297eecfbSDimitry Andric           "Aggregate depth of profiled callees found via tail calls");
88297eecfbSDimitry Andric STATISTIC(FoundProfiledCalleeMaxDepth,
89297eecfbSDimitry Andric           "Maximum depth of profiled callees found via tail calls");
90297eecfbSDimitry Andric STATISTIC(FoundProfiledCalleeNonUniquelyCount,
91297eecfbSDimitry Andric           "Number of profiled callees found via multiple tail call chains");
9206c3fb27SDimitry Andric 
9306c3fb27SDimitry Andric static cl::opt<std::string> DotFilePathPrefix(
9406c3fb27SDimitry Andric     "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden,
9506c3fb27SDimitry Andric     cl::value_desc("filename"),
9606c3fb27SDimitry Andric     cl::desc("Specify the path prefix of the MemProf dot files."));
9706c3fb27SDimitry Andric 
9806c3fb27SDimitry Andric static cl::opt<bool> ExportToDot("memprof-export-to-dot", cl::init(false),
9906c3fb27SDimitry Andric                                  cl::Hidden,
10006c3fb27SDimitry Andric                                  cl::desc("Export graph to dot files."));
10106c3fb27SDimitry Andric 
10206c3fb27SDimitry Andric static cl::opt<bool>
10306c3fb27SDimitry Andric     DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden,
10406c3fb27SDimitry Andric             cl::desc("Dump CallingContextGraph to stdout after each stage."));
10506c3fb27SDimitry Andric 
10606c3fb27SDimitry Andric static cl::opt<bool>
10706c3fb27SDimitry Andric     VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden,
10806c3fb27SDimitry Andric               cl::desc("Perform verification checks on CallingContextGraph."));
10906c3fb27SDimitry Andric 
11006c3fb27SDimitry Andric static cl::opt<bool>
11106c3fb27SDimitry Andric     VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden,
11206c3fb27SDimitry Andric                 cl::desc("Perform frequent verification checks on nodes."));
11306c3fb27SDimitry Andric 
11406c3fb27SDimitry Andric static cl::opt<std::string> MemProfImportSummary(
11506c3fb27SDimitry Andric     "memprof-import-summary",
11606c3fb27SDimitry Andric     cl::desc("Import summary to use for testing the ThinLTO backend via opt"),
11706c3fb27SDimitry Andric     cl::Hidden);
11806c3fb27SDimitry Andric 
119297eecfbSDimitry Andric static cl::opt<unsigned>
120297eecfbSDimitry Andric     TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5),
121297eecfbSDimitry Andric                         cl::Hidden,
122297eecfbSDimitry Andric                         cl::desc("Max depth to recursively search for missing "
123297eecfbSDimitry Andric                                  "frames through tail calls."));
124297eecfbSDimitry Andric 
1255f757f3fSDimitry Andric namespace llvm {
126*0fca6ea1SDimitry Andric cl::opt<bool> EnableMemProfContextDisambiguation(
127*0fca6ea1SDimitry Andric     "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
128*0fca6ea1SDimitry Andric     cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation"));
129*0fca6ea1SDimitry Andric 
13006c3fb27SDimitry Andric // Indicate we are linking with an allocator that supports hot/cold operator
13106c3fb27SDimitry Andric // new interfaces.
13206c3fb27SDimitry Andric cl::opt<bool> SupportsHotColdNew(
13306c3fb27SDimitry Andric     "supports-hot-cold-new", cl::init(false), cl::Hidden,
13406c3fb27SDimitry Andric     cl::desc("Linking with hot/cold operator new interfaces"));
1355f757f3fSDimitry Andric } // namespace llvm
13606c3fb27SDimitry Andric 
137*0fca6ea1SDimitry Andric extern cl::opt<bool> MemProfReportHintedSizes;
138*0fca6ea1SDimitry Andric 
13906c3fb27SDimitry Andric namespace {
14006c3fb27SDimitry Andric /// CRTP base for graphs built from either IR or ThinLTO summary index.
14106c3fb27SDimitry Andric ///
14206c3fb27SDimitry Andric /// The graph represents the call contexts in all memprof metadata on allocation
14306c3fb27SDimitry Andric /// calls, with nodes for the allocations themselves, as well as for the calls
14406c3fb27SDimitry Andric /// in each context. The graph is initially built from the allocation memprof
14506c3fb27SDimitry Andric /// metadata (or summary) MIBs. It is then updated to match calls with callsite
14606c3fb27SDimitry Andric /// metadata onto the nodes, updating it to reflect any inlining performed on
14706c3fb27SDimitry Andric /// those calls.
14806c3fb27SDimitry Andric ///
14906c3fb27SDimitry Andric /// Each MIB (representing an allocation's call context with allocation
15006c3fb27SDimitry Andric /// behavior) is assigned a unique context id during the graph build. The edges
15106c3fb27SDimitry Andric /// and nodes in the graph are decorated with the context ids they carry. This
15206c3fb27SDimitry Andric /// is used to correctly update the graph when cloning is performed so that we
15306c3fb27SDimitry Andric /// can uniquify the context for a single (possibly cloned) allocation.
15406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
15506c3fb27SDimitry Andric class CallsiteContextGraph {
15606c3fb27SDimitry Andric public:
15706c3fb27SDimitry Andric   CallsiteContextGraph() = default;
15806c3fb27SDimitry Andric   CallsiteContextGraph(const CallsiteContextGraph &) = default;
15906c3fb27SDimitry Andric   CallsiteContextGraph(CallsiteContextGraph &&) = default;
16006c3fb27SDimitry Andric 
16106c3fb27SDimitry Andric   /// Main entry point to perform analysis and transformations on graph.
16206c3fb27SDimitry Andric   bool process();
16306c3fb27SDimitry Andric 
16406c3fb27SDimitry Andric   /// Perform cloning on the graph necessary to uniquely identify the allocation
16506c3fb27SDimitry Andric   /// behavior of an allocation based on its context.
16606c3fb27SDimitry Andric   void identifyClones();
16706c3fb27SDimitry Andric 
16806c3fb27SDimitry Andric   /// Assign callsite clones to functions, cloning functions as needed to
16906c3fb27SDimitry Andric   /// accommodate the combinations of their callsite clones reached by callers.
17006c3fb27SDimitry Andric   /// For regular LTO this clones functions and callsites in the IR, but for
17106c3fb27SDimitry Andric   /// ThinLTO the cloning decisions are noted in the summaries and later applied
17206c3fb27SDimitry Andric   /// in applyImport.
17306c3fb27SDimitry Andric   bool assignFunctions();
17406c3fb27SDimitry Andric 
17506c3fb27SDimitry Andric   void dump() const;
17606c3fb27SDimitry Andric   void print(raw_ostream &OS) const;
177*0fca6ea1SDimitry Andric   void printTotalSizes(raw_ostream &OS) const;
17806c3fb27SDimitry Andric 
17906c3fb27SDimitry Andric   friend raw_ostream &operator<<(raw_ostream &OS,
18006c3fb27SDimitry Andric                                  const CallsiteContextGraph &CCG) {
18106c3fb27SDimitry Andric     CCG.print(OS);
18206c3fb27SDimitry Andric     return OS;
18306c3fb27SDimitry Andric   }
18406c3fb27SDimitry Andric 
18506c3fb27SDimitry Andric   friend struct GraphTraits<
18606c3fb27SDimitry Andric       const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
18706c3fb27SDimitry Andric   friend struct DOTGraphTraits<
18806c3fb27SDimitry Andric       const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
18906c3fb27SDimitry Andric 
19006c3fb27SDimitry Andric   void exportToDot(std::string Label) const;
19106c3fb27SDimitry Andric 
19206c3fb27SDimitry Andric   /// Represents a function clone via FuncTy pointer and clone number pair.
19306c3fb27SDimitry Andric   struct FuncInfo final
19406c3fb27SDimitry Andric       : public std::pair<FuncTy *, unsigned /*Clone number*/> {
19506c3fb27SDimitry Andric     using Base = std::pair<FuncTy *, unsigned>;
19606c3fb27SDimitry Andric     FuncInfo(const Base &B) : Base(B) {}
19706c3fb27SDimitry Andric     FuncInfo(FuncTy *F = nullptr, unsigned CloneNo = 0) : Base(F, CloneNo) {}
19806c3fb27SDimitry Andric     explicit operator bool() const { return this->first != nullptr; }
19906c3fb27SDimitry Andric     FuncTy *func() const { return this->first; }
20006c3fb27SDimitry Andric     unsigned cloneNo() const { return this->second; }
20106c3fb27SDimitry Andric   };
20206c3fb27SDimitry Andric 
20306c3fb27SDimitry Andric   /// Represents a callsite clone via CallTy and clone number pair.
20406c3fb27SDimitry Andric   struct CallInfo final : public std::pair<CallTy, unsigned /*Clone number*/> {
20506c3fb27SDimitry Andric     using Base = std::pair<CallTy, unsigned>;
20606c3fb27SDimitry Andric     CallInfo(const Base &B) : Base(B) {}
20706c3fb27SDimitry Andric     CallInfo(CallTy Call = nullptr, unsigned CloneNo = 0)
20806c3fb27SDimitry Andric         : Base(Call, CloneNo) {}
20906c3fb27SDimitry Andric     explicit operator bool() const { return (bool)this->first; }
21006c3fb27SDimitry Andric     CallTy call() const { return this->first; }
21106c3fb27SDimitry Andric     unsigned cloneNo() const { return this->second; }
21206c3fb27SDimitry Andric     void setCloneNo(unsigned N) { this->second = N; }
21306c3fb27SDimitry Andric     void print(raw_ostream &OS) const {
21406c3fb27SDimitry Andric       if (!operator bool()) {
21506c3fb27SDimitry Andric         assert(!cloneNo());
21606c3fb27SDimitry Andric         OS << "null Call";
21706c3fb27SDimitry Andric         return;
21806c3fb27SDimitry Andric       }
21906c3fb27SDimitry Andric       call()->print(OS);
22006c3fb27SDimitry Andric       OS << "\t(clone " << cloneNo() << ")";
22106c3fb27SDimitry Andric     }
22206c3fb27SDimitry Andric     void dump() const {
22306c3fb27SDimitry Andric       print(dbgs());
22406c3fb27SDimitry Andric       dbgs() << "\n";
22506c3fb27SDimitry Andric     }
22606c3fb27SDimitry Andric     friend raw_ostream &operator<<(raw_ostream &OS, const CallInfo &Call) {
22706c3fb27SDimitry Andric       Call.print(OS);
22806c3fb27SDimitry Andric       return OS;
22906c3fb27SDimitry Andric     }
23006c3fb27SDimitry Andric   };
23106c3fb27SDimitry Andric 
23206c3fb27SDimitry Andric   struct ContextEdge;
23306c3fb27SDimitry Andric 
23406c3fb27SDimitry Andric   /// Node in the Callsite Context Graph
23506c3fb27SDimitry Andric   struct ContextNode {
23606c3fb27SDimitry Andric     // Keep this for now since in the IR case where we have an Instruction* it
23706c3fb27SDimitry Andric     // is not as immediately discoverable. Used for printing richer information
23806c3fb27SDimitry Andric     // when dumping graph.
23906c3fb27SDimitry Andric     bool IsAllocation;
24006c3fb27SDimitry Andric 
24106c3fb27SDimitry Andric     // Keeps track of when the Call was reset to null because there was
24206c3fb27SDimitry Andric     // recursion.
24306c3fb27SDimitry Andric     bool Recursive = false;
24406c3fb27SDimitry Andric 
24506c3fb27SDimitry Andric     // The corresponding allocation or interior call.
24606c3fb27SDimitry Andric     CallInfo Call;
24706c3fb27SDimitry Andric 
24806c3fb27SDimitry Andric     // For alloc nodes this is a unique id assigned when constructed, and for
24906c3fb27SDimitry Andric     // callsite stack nodes it is the original stack id when the node is
25006c3fb27SDimitry Andric     // constructed from the memprof MIB metadata on the alloc nodes. Note that
25106c3fb27SDimitry Andric     // this is only used when matching callsite metadata onto the stack nodes
25206c3fb27SDimitry Andric     // created when processing the allocation memprof MIBs, and for labeling
25306c3fb27SDimitry Andric     // nodes in the dot graph. Therefore we don't bother to assign a value for
25406c3fb27SDimitry Andric     // clones.
25506c3fb27SDimitry Andric     uint64_t OrigStackOrAllocId = 0;
25606c3fb27SDimitry Andric 
25706c3fb27SDimitry Andric     // This will be formed by ORing together the AllocationType enum values
25806c3fb27SDimitry Andric     // for contexts including this node.
25906c3fb27SDimitry Andric     uint8_t AllocTypes = 0;
26006c3fb27SDimitry Andric 
26106c3fb27SDimitry Andric     // Edges to all callees in the profiled call stacks.
26206c3fb27SDimitry Andric     // TODO: Should this be a map (from Callee node) for more efficient lookup?
26306c3fb27SDimitry Andric     std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
26406c3fb27SDimitry Andric 
26506c3fb27SDimitry Andric     // Edges to all callers in the profiled call stacks.
26606c3fb27SDimitry Andric     // TODO: Should this be a map (from Caller node) for more efficient lookup?
26706c3fb27SDimitry Andric     std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
26806c3fb27SDimitry Andric 
269*0fca6ea1SDimitry Andric     // Get the list of edges from which we can compute allocation information
270*0fca6ea1SDimitry Andric     // such as the context ids and allocation type of this node.
271*0fca6ea1SDimitry Andric     const std::vector<std::shared_ptr<ContextEdge>> *
272*0fca6ea1SDimitry Andric     getEdgesWithAllocInfo() const {
273*0fca6ea1SDimitry Andric       // If node has any callees, compute from those, otherwise compute from
274*0fca6ea1SDimitry Andric       // callers (i.e. if this is the leaf allocation node).
275*0fca6ea1SDimitry Andric       if (!CalleeEdges.empty())
276*0fca6ea1SDimitry Andric         return &CalleeEdges;
277*0fca6ea1SDimitry Andric       if (!CallerEdges.empty()) {
278*0fca6ea1SDimitry Andric         // A node with caller edges but no callee edges must be the allocation
279*0fca6ea1SDimitry Andric         // node.
280*0fca6ea1SDimitry Andric         assert(IsAllocation);
281*0fca6ea1SDimitry Andric         return &CallerEdges;
282*0fca6ea1SDimitry Andric       }
283*0fca6ea1SDimitry Andric       return nullptr;
284*0fca6ea1SDimitry Andric     }
285*0fca6ea1SDimitry Andric 
286*0fca6ea1SDimitry Andric     // Compute the context ids for this node from the union of its edge context
287*0fca6ea1SDimitry Andric     // ids.
288*0fca6ea1SDimitry Andric     DenseSet<uint32_t> getContextIds() const {
28906c3fb27SDimitry Andric       DenseSet<uint32_t> ContextIds;
290*0fca6ea1SDimitry Andric       auto *Edges = getEdgesWithAllocInfo();
291*0fca6ea1SDimitry Andric       if (!Edges)
292*0fca6ea1SDimitry Andric         return {};
293*0fca6ea1SDimitry Andric       unsigned Count = 0;
294*0fca6ea1SDimitry Andric       for (auto &Edge : *Edges)
295*0fca6ea1SDimitry Andric         Count += Edge->getContextIds().size();
296*0fca6ea1SDimitry Andric       ContextIds.reserve(Count);
297*0fca6ea1SDimitry Andric       for (auto &Edge : *Edges)
298*0fca6ea1SDimitry Andric         ContextIds.insert(Edge->getContextIds().begin(),
299*0fca6ea1SDimitry Andric                           Edge->getContextIds().end());
300*0fca6ea1SDimitry Andric       return ContextIds;
301*0fca6ea1SDimitry Andric     }
302*0fca6ea1SDimitry Andric 
303*0fca6ea1SDimitry Andric     // Compute the allocation type for this node from the OR of its edge
304*0fca6ea1SDimitry Andric     // allocation types.
305*0fca6ea1SDimitry Andric     uint8_t computeAllocType() const {
306*0fca6ea1SDimitry Andric       auto *Edges = getEdgesWithAllocInfo();
307*0fca6ea1SDimitry Andric       if (!Edges)
308*0fca6ea1SDimitry Andric         return (uint8_t)AllocationType::None;
309*0fca6ea1SDimitry Andric       uint8_t BothTypes =
310*0fca6ea1SDimitry Andric           (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
311*0fca6ea1SDimitry Andric       uint8_t AllocType = (uint8_t)AllocationType::None;
312*0fca6ea1SDimitry Andric       for (auto &Edge : *Edges) {
313*0fca6ea1SDimitry Andric         AllocType |= Edge->AllocTypes;
314*0fca6ea1SDimitry Andric         // Bail early if alloc type reached both, no further refinement.
315*0fca6ea1SDimitry Andric         if (AllocType == BothTypes)
316*0fca6ea1SDimitry Andric           return AllocType;
317*0fca6ea1SDimitry Andric       }
318*0fca6ea1SDimitry Andric       return AllocType;
319*0fca6ea1SDimitry Andric     }
320*0fca6ea1SDimitry Andric 
321*0fca6ea1SDimitry Andric     // The context ids set for this node is empty if its edge context ids are
322*0fca6ea1SDimitry Andric     // also all empty.
323*0fca6ea1SDimitry Andric     bool emptyContextIds() const {
324*0fca6ea1SDimitry Andric       auto *Edges = getEdgesWithAllocInfo();
325*0fca6ea1SDimitry Andric       if (!Edges)
326*0fca6ea1SDimitry Andric         return true;
327*0fca6ea1SDimitry Andric       for (auto &Edge : *Edges) {
328*0fca6ea1SDimitry Andric         if (!Edge->getContextIds().empty())
329*0fca6ea1SDimitry Andric           return false;
330*0fca6ea1SDimitry Andric       }
331*0fca6ea1SDimitry Andric       return true;
332*0fca6ea1SDimitry Andric     }
33306c3fb27SDimitry Andric 
33406c3fb27SDimitry Andric     // List of clones of this ContextNode, initially empty.
33506c3fb27SDimitry Andric     std::vector<ContextNode *> Clones;
33606c3fb27SDimitry Andric 
33706c3fb27SDimitry Andric     // If a clone, points to the original uncloned node.
33806c3fb27SDimitry Andric     ContextNode *CloneOf = nullptr;
33906c3fb27SDimitry Andric 
34006c3fb27SDimitry Andric     ContextNode(bool IsAllocation) : IsAllocation(IsAllocation), Call() {}
34106c3fb27SDimitry Andric 
34206c3fb27SDimitry Andric     ContextNode(bool IsAllocation, CallInfo C)
34306c3fb27SDimitry Andric         : IsAllocation(IsAllocation), Call(C) {}
34406c3fb27SDimitry Andric 
34506c3fb27SDimitry Andric     void addClone(ContextNode *Clone) {
34606c3fb27SDimitry Andric       if (CloneOf) {
34706c3fb27SDimitry Andric         CloneOf->Clones.push_back(Clone);
34806c3fb27SDimitry Andric         Clone->CloneOf = CloneOf;
34906c3fb27SDimitry Andric       } else {
35006c3fb27SDimitry Andric         Clones.push_back(Clone);
35106c3fb27SDimitry Andric         assert(!Clone->CloneOf);
35206c3fb27SDimitry Andric         Clone->CloneOf = this;
35306c3fb27SDimitry Andric       }
35406c3fb27SDimitry Andric     }
35506c3fb27SDimitry Andric 
35606c3fb27SDimitry Andric     ContextNode *getOrigNode() {
35706c3fb27SDimitry Andric       if (!CloneOf)
35806c3fb27SDimitry Andric         return this;
35906c3fb27SDimitry Andric       return CloneOf;
36006c3fb27SDimitry Andric     }
36106c3fb27SDimitry Andric 
36206c3fb27SDimitry Andric     void addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType,
36306c3fb27SDimitry Andric                                unsigned int ContextId);
36406c3fb27SDimitry Andric 
36506c3fb27SDimitry Andric     ContextEdge *findEdgeFromCallee(const ContextNode *Callee);
36606c3fb27SDimitry Andric     ContextEdge *findEdgeFromCaller(const ContextNode *Caller);
36706c3fb27SDimitry Andric     void eraseCalleeEdge(const ContextEdge *Edge);
36806c3fb27SDimitry Andric     void eraseCallerEdge(const ContextEdge *Edge);
36906c3fb27SDimitry Andric 
37006c3fb27SDimitry Andric     void setCall(CallInfo C) { Call = C; }
37106c3fb27SDimitry Andric 
37206c3fb27SDimitry Andric     bool hasCall() const { return (bool)Call.call(); }
37306c3fb27SDimitry Andric 
37406c3fb27SDimitry Andric     void printCall(raw_ostream &OS) const { Call.print(OS); }
37506c3fb27SDimitry Andric 
37606c3fb27SDimitry Andric     // True if this node was effectively removed from the graph, in which case
377*0fca6ea1SDimitry Andric     // it should have an allocation type of None and empty context ids.
37806c3fb27SDimitry Andric     bool isRemoved() const {
379*0fca6ea1SDimitry Andric       assert((AllocTypes == (uint8_t)AllocationType::None) ==
380*0fca6ea1SDimitry Andric              emptyContextIds());
381*0fca6ea1SDimitry Andric       return AllocTypes == (uint8_t)AllocationType::None;
38206c3fb27SDimitry Andric     }
38306c3fb27SDimitry Andric 
38406c3fb27SDimitry Andric     void dump() const;
38506c3fb27SDimitry Andric     void print(raw_ostream &OS) const;
38606c3fb27SDimitry Andric 
38706c3fb27SDimitry Andric     friend raw_ostream &operator<<(raw_ostream &OS, const ContextNode &Node) {
38806c3fb27SDimitry Andric       Node.print(OS);
38906c3fb27SDimitry Andric       return OS;
39006c3fb27SDimitry Andric     }
39106c3fb27SDimitry Andric   };
39206c3fb27SDimitry Andric 
39306c3fb27SDimitry Andric   /// Edge in the Callsite Context Graph from a ContextNode N to a caller or
39406c3fb27SDimitry Andric   /// callee.
39506c3fb27SDimitry Andric   struct ContextEdge {
39606c3fb27SDimitry Andric     ContextNode *Callee;
39706c3fb27SDimitry Andric     ContextNode *Caller;
39806c3fb27SDimitry Andric 
39906c3fb27SDimitry Andric     // This will be formed by ORing together the AllocationType enum values
40006c3fb27SDimitry Andric     // for contexts including this edge.
40106c3fb27SDimitry Andric     uint8_t AllocTypes = 0;
40206c3fb27SDimitry Andric 
40306c3fb27SDimitry Andric     // The set of IDs for contexts including this edge.
40406c3fb27SDimitry Andric     DenseSet<uint32_t> ContextIds;
40506c3fb27SDimitry Andric 
40606c3fb27SDimitry Andric     ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t AllocType,
40706c3fb27SDimitry Andric                 DenseSet<uint32_t> ContextIds)
40806c3fb27SDimitry Andric         : Callee(Callee), Caller(Caller), AllocTypes(AllocType),
409*0fca6ea1SDimitry Andric           ContextIds(std::move(ContextIds)) {}
41006c3fb27SDimitry Andric 
41106c3fb27SDimitry Andric     DenseSet<uint32_t> &getContextIds() { return ContextIds; }
41206c3fb27SDimitry Andric 
41306c3fb27SDimitry Andric     void dump() const;
41406c3fb27SDimitry Andric     void print(raw_ostream &OS) const;
41506c3fb27SDimitry Andric 
41606c3fb27SDimitry Andric     friend raw_ostream &operator<<(raw_ostream &OS, const ContextEdge &Edge) {
41706c3fb27SDimitry Andric       Edge.print(OS);
41806c3fb27SDimitry Andric       return OS;
41906c3fb27SDimitry Andric     }
42006c3fb27SDimitry Andric   };
42106c3fb27SDimitry Andric 
422*0fca6ea1SDimitry Andric   /// Helpers to remove callee edges that have allocation type None (due to not
42306c3fb27SDimitry Andric   /// carrying any context ids) after transformations.
42406c3fb27SDimitry Andric   void removeNoneTypeCalleeEdges(ContextNode *Node);
425*0fca6ea1SDimitry Andric   void
426*0fca6ea1SDimitry Andric   recursivelyRemoveNoneTypeCalleeEdges(ContextNode *Node,
427*0fca6ea1SDimitry Andric                                        DenseSet<const ContextNode *> &Visited);
42806c3fb27SDimitry Andric 
42906c3fb27SDimitry Andric protected:
43006c3fb27SDimitry Andric   /// Get a list of nodes corresponding to the stack ids in the given callsite
43106c3fb27SDimitry Andric   /// context.
43206c3fb27SDimitry Andric   template <class NodeT, class IteratorT>
43306c3fb27SDimitry Andric   std::vector<uint64_t>
43406c3fb27SDimitry Andric   getStackIdsWithContextNodes(CallStack<NodeT, IteratorT> &CallsiteContext);
43506c3fb27SDimitry Andric 
43606c3fb27SDimitry Andric   /// Adds nodes for the given allocation and any stack ids on its memprof MIB
43706c3fb27SDimitry Andric   /// metadata (or summary).
43806c3fb27SDimitry Andric   ContextNode *addAllocNode(CallInfo Call, const FuncTy *F);
43906c3fb27SDimitry Andric 
44006c3fb27SDimitry Andric   /// Adds nodes for the given MIB stack ids.
44106c3fb27SDimitry Andric   template <class NodeT, class IteratorT>
44206c3fb27SDimitry Andric   void addStackNodesForMIB(ContextNode *AllocNode,
44306c3fb27SDimitry Andric                            CallStack<NodeT, IteratorT> &StackContext,
44406c3fb27SDimitry Andric                            CallStack<NodeT, IteratorT> &CallsiteContext,
445*0fca6ea1SDimitry Andric                            AllocationType AllocType, uint64_t TotalSize);
44606c3fb27SDimitry Andric 
44706c3fb27SDimitry Andric   /// Matches all callsite metadata (or summary) to the nodes created for
44806c3fb27SDimitry Andric   /// allocation memprof MIB metadata, synthesizing new nodes to reflect any
44906c3fb27SDimitry Andric   /// inlining performed on those callsite instructions.
45006c3fb27SDimitry Andric   void updateStackNodes();
45106c3fb27SDimitry Andric 
45206c3fb27SDimitry Andric   /// Update graph to conservatively handle any callsite stack nodes that target
45306c3fb27SDimitry Andric   /// multiple different callee target functions.
45406c3fb27SDimitry Andric   void handleCallsitesWithMultipleTargets();
45506c3fb27SDimitry Andric 
45606c3fb27SDimitry Andric   /// Save lists of calls with MemProf metadata in each function, for faster
45706c3fb27SDimitry Andric   /// iteration.
458297eecfbSDimitry Andric   MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata;
45906c3fb27SDimitry Andric 
46006c3fb27SDimitry Andric   /// Map from callsite node to the enclosing caller function.
46106c3fb27SDimitry Andric   std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
46206c3fb27SDimitry Andric 
46306c3fb27SDimitry Andric private:
46406c3fb27SDimitry Andric   using EdgeIter = typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
46506c3fb27SDimitry Andric 
46606c3fb27SDimitry Andric   using CallContextInfo = std::tuple<CallTy, std::vector<uint64_t>,
46706c3fb27SDimitry Andric                                      const FuncTy *, DenseSet<uint32_t>>;
46806c3fb27SDimitry Andric 
46906c3fb27SDimitry Andric   /// Assigns the given Node to calls at or inlined into the location with
47006c3fb27SDimitry Andric   /// the Node's stack id, after post order traversing and processing its
47106c3fb27SDimitry Andric   /// caller nodes. Uses the call information recorded in the given
47206c3fb27SDimitry Andric   /// StackIdToMatchingCalls map, and creates new nodes for inlined sequences
47306c3fb27SDimitry Andric   /// as needed. Called by updateStackNodes which sets up the given
47406c3fb27SDimitry Andric   /// StackIdToMatchingCalls map.
47506c3fb27SDimitry Andric   void assignStackNodesPostOrder(
47606c3fb27SDimitry Andric       ContextNode *Node, DenseSet<const ContextNode *> &Visited,
47706c3fb27SDimitry Andric       DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls);
47806c3fb27SDimitry Andric 
47906c3fb27SDimitry Andric   /// Duplicates the given set of context ids, updating the provided
48006c3fb27SDimitry Andric   /// map from each original id with the newly generated context ids,
48106c3fb27SDimitry Andric   /// and returning the new duplicated id set.
48206c3fb27SDimitry Andric   DenseSet<uint32_t> duplicateContextIds(
48306c3fb27SDimitry Andric       const DenseSet<uint32_t> &StackSequenceContextIds,
48406c3fb27SDimitry Andric       DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
48506c3fb27SDimitry Andric 
48606c3fb27SDimitry Andric   /// Propagates all duplicated context ids across the graph.
48706c3fb27SDimitry Andric   void propagateDuplicateContextIds(
48806c3fb27SDimitry Andric       const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
48906c3fb27SDimitry Andric 
49006c3fb27SDimitry Andric   /// Connect the NewNode to OrigNode's callees if TowardsCallee is true,
49106c3fb27SDimitry Andric   /// else to its callers. Also updates OrigNode's edges to remove any context
49206c3fb27SDimitry Andric   /// ids moved to the newly created edge.
49306c3fb27SDimitry Andric   void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
494*0fca6ea1SDimitry Andric                       bool TowardsCallee,
495*0fca6ea1SDimitry Andric                       DenseSet<uint32_t> RemainingContextIds);
49606c3fb27SDimitry Andric 
49706c3fb27SDimitry Andric   /// Get the stack id corresponding to the given Id or Index (for IR this will
49806c3fb27SDimitry Andric   /// return itself, for a summary index this will return the id recorded in the
49906c3fb27SDimitry Andric   /// index for that stack id index value).
50006c3fb27SDimitry Andric   uint64_t getStackId(uint64_t IdOrIndex) const {
50106c3fb27SDimitry Andric     return static_cast<const DerivedCCG *>(this)->getStackId(IdOrIndex);
50206c3fb27SDimitry Andric   }
50306c3fb27SDimitry Andric 
504297eecfbSDimitry Andric   /// Returns true if the given call targets the callee of the given edge, or if
505297eecfbSDimitry Andric   /// we were able to identify the call chain through intermediate tail calls.
506297eecfbSDimitry Andric   /// In the latter case new context nodes are added to the graph for the
507297eecfbSDimitry Andric   /// identified tail calls, and their synthesized nodes are added to
508*0fca6ea1SDimitry Andric   /// TailCallToContextNodeMap. The EdgeIter is updated in the latter case for
509*0fca6ea1SDimitry Andric   /// the updated edges and to prepare it for an increment in the caller.
510297eecfbSDimitry Andric   bool
511297eecfbSDimitry Andric   calleesMatch(CallTy Call, EdgeIter &EI,
512297eecfbSDimitry Andric                MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap);
513297eecfbSDimitry Andric 
514297eecfbSDimitry Andric   /// Returns true if the given call targets the given function, or if we were
515297eecfbSDimitry Andric   /// able to identify the call chain through intermediate tail calls (in which
516297eecfbSDimitry Andric   /// case FoundCalleeChain will be populated).
517297eecfbSDimitry Andric   bool calleeMatchesFunc(
518297eecfbSDimitry Andric       CallTy Call, const FuncTy *Func, const FuncTy *CallerFunc,
519297eecfbSDimitry Andric       std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
520297eecfbSDimitry Andric     return static_cast<DerivedCCG *>(this)->calleeMatchesFunc(
521297eecfbSDimitry Andric         Call, Func, CallerFunc, FoundCalleeChain);
52206c3fb27SDimitry Andric   }
52306c3fb27SDimitry Andric 
52406c3fb27SDimitry Andric   /// Get a list of nodes corresponding to the stack ids in the given
52506c3fb27SDimitry Andric   /// callsite's context.
52606c3fb27SDimitry Andric   std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {
52706c3fb27SDimitry Andric     return static_cast<DerivedCCG *>(this)->getStackIdsWithContextNodesForCall(
52806c3fb27SDimitry Andric         Call);
52906c3fb27SDimitry Andric   }
53006c3fb27SDimitry Andric 
53106c3fb27SDimitry Andric   /// Get the last stack id in the context for callsite.
53206c3fb27SDimitry Andric   uint64_t getLastStackId(CallTy Call) {
53306c3fb27SDimitry Andric     return static_cast<DerivedCCG *>(this)->getLastStackId(Call);
53406c3fb27SDimitry Andric   }
53506c3fb27SDimitry Andric 
53606c3fb27SDimitry Andric   /// Update the allocation call to record type of allocated memory.
53706c3fb27SDimitry Andric   void updateAllocationCall(CallInfo &Call, AllocationType AllocType) {
53806c3fb27SDimitry Andric     AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
53906c3fb27SDimitry Andric     static_cast<DerivedCCG *>(this)->updateAllocationCall(Call, AllocType);
54006c3fb27SDimitry Andric   }
54106c3fb27SDimitry Andric 
54206c3fb27SDimitry Andric   /// Update non-allocation call to invoke (possibly cloned) function
54306c3fb27SDimitry Andric   /// CalleeFunc.
54406c3fb27SDimitry Andric   void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
54506c3fb27SDimitry Andric     static_cast<DerivedCCG *>(this)->updateCall(CallerCall, CalleeFunc);
54606c3fb27SDimitry Andric   }
54706c3fb27SDimitry Andric 
54806c3fb27SDimitry Andric   /// Clone the given function for the given callsite, recording mapping of all
54906c3fb27SDimitry Andric   /// of the functions tracked calls to their new versions in the CallMap.
55006c3fb27SDimitry Andric   /// Assigns new clones to clone number CloneNo.
55106c3fb27SDimitry Andric   FuncInfo cloneFunctionForCallsite(
55206c3fb27SDimitry Andric       FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
55306c3fb27SDimitry Andric       std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) {
55406c3fb27SDimitry Andric     return static_cast<DerivedCCG *>(this)->cloneFunctionForCallsite(
55506c3fb27SDimitry Andric         Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo);
55606c3fb27SDimitry Andric   }
55706c3fb27SDimitry Andric 
55806c3fb27SDimitry Andric   /// Gets a label to use in the dot graph for the given call clone in the given
55906c3fb27SDimitry Andric   /// function.
56006c3fb27SDimitry Andric   std::string getLabel(const FuncTy *Func, const CallTy Call,
56106c3fb27SDimitry Andric                        unsigned CloneNo) const {
56206c3fb27SDimitry Andric     return static_cast<const DerivedCCG *>(this)->getLabel(Func, Call, CloneNo);
56306c3fb27SDimitry Andric   }
56406c3fb27SDimitry Andric 
56506c3fb27SDimitry Andric   /// Helpers to find the node corresponding to the given call or stackid.
56606c3fb27SDimitry Andric   ContextNode *getNodeForInst(const CallInfo &C);
56706c3fb27SDimitry Andric   ContextNode *getNodeForAlloc(const CallInfo &C);
56806c3fb27SDimitry Andric   ContextNode *getNodeForStackId(uint64_t StackId);
56906c3fb27SDimitry Andric 
57006c3fb27SDimitry Andric   /// Computes the alloc type corresponding to the given context ids, by
57106c3fb27SDimitry Andric   /// unioning their recorded alloc types.
57206c3fb27SDimitry Andric   uint8_t computeAllocType(DenseSet<uint32_t> &ContextIds);
57306c3fb27SDimitry Andric 
574*0fca6ea1SDimitry Andric   /// Returns the allocation type of the intersection of the contexts of two
57506c3fb27SDimitry Andric   /// nodes (based on their provided context id sets), optimized for the case
57606c3fb27SDimitry Andric   /// when Node1Ids is smaller than Node2Ids.
57706c3fb27SDimitry Andric   uint8_t intersectAllocTypesImpl(const DenseSet<uint32_t> &Node1Ids,
57806c3fb27SDimitry Andric                                   const DenseSet<uint32_t> &Node2Ids);
57906c3fb27SDimitry Andric 
580*0fca6ea1SDimitry Andric   /// Returns the allocation type of the intersection of the contexts of two
58106c3fb27SDimitry Andric   /// nodes (based on their provided context id sets).
58206c3fb27SDimitry Andric   uint8_t intersectAllocTypes(const DenseSet<uint32_t> &Node1Ids,
58306c3fb27SDimitry Andric                               const DenseSet<uint32_t> &Node2Ids);
58406c3fb27SDimitry Andric 
58506c3fb27SDimitry Andric   /// Create a clone of Edge's callee and move Edge to that new callee node,
58606c3fb27SDimitry Andric   /// performing the necessary context id and allocation type updates.
58706c3fb27SDimitry Andric   /// If callee's caller edge iterator is supplied, it is updated when removing
588*0fca6ea1SDimitry Andric   /// the edge from that list. If ContextIdsToMove is non-empty, only that
589*0fca6ea1SDimitry Andric   /// subset of Edge's ids are moved to an edge to the new callee.
59006c3fb27SDimitry Andric   ContextNode *
59106c3fb27SDimitry Andric   moveEdgeToNewCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
592*0fca6ea1SDimitry Andric                            EdgeIter *CallerEdgeI = nullptr,
593*0fca6ea1SDimitry Andric                            DenseSet<uint32_t> ContextIdsToMove = {});
59406c3fb27SDimitry Andric 
59506c3fb27SDimitry Andric   /// Change the callee of Edge to existing callee clone NewCallee, performing
59606c3fb27SDimitry Andric   /// the necessary context id and allocation type updates.
59706c3fb27SDimitry Andric   /// If callee's caller edge iterator is supplied, it is updated when removing
598*0fca6ea1SDimitry Andric   /// the edge from that list. If ContextIdsToMove is non-empty, only that
599*0fca6ea1SDimitry Andric   /// subset of Edge's ids are moved to an edge to the new callee.
60006c3fb27SDimitry Andric   void moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
60106c3fb27SDimitry Andric                                      ContextNode *NewCallee,
60206c3fb27SDimitry Andric                                      EdgeIter *CallerEdgeI = nullptr,
603*0fca6ea1SDimitry Andric                                      bool NewClone = false,
604*0fca6ea1SDimitry Andric                                      DenseSet<uint32_t> ContextIdsToMove = {});
60506c3fb27SDimitry Andric 
60606c3fb27SDimitry Andric   /// Recursively perform cloning on the graph for the given Node and its
60706c3fb27SDimitry Andric   /// callers, in order to uniquely identify the allocation behavior of an
608*0fca6ea1SDimitry Andric   /// allocation given its context. The context ids of the allocation being
609*0fca6ea1SDimitry Andric   /// processed are given in AllocContextIds.
610*0fca6ea1SDimitry Andric   void identifyClones(ContextNode *Node, DenseSet<const ContextNode *> &Visited,
611*0fca6ea1SDimitry Andric                       const DenseSet<uint32_t> &AllocContextIds);
61206c3fb27SDimitry Andric 
61306c3fb27SDimitry Andric   /// Map from each context ID to the AllocationType assigned to that context.
614*0fca6ea1SDimitry Andric   DenseMap<uint32_t, AllocationType> ContextIdToAllocationType;
615*0fca6ea1SDimitry Andric 
616*0fca6ea1SDimitry Andric   /// Map from each contextID to the profiled aggregate allocation size,
617*0fca6ea1SDimitry Andric   /// optionally populated when requested (via MemProfReportHintedSizes).
618*0fca6ea1SDimitry Andric   DenseMap<uint32_t, uint64_t> ContextIdToTotalSize;
61906c3fb27SDimitry Andric 
62006c3fb27SDimitry Andric   /// Identifies the context node created for a stack id when adding the MIB
62106c3fb27SDimitry Andric   /// contexts to the graph. This is used to locate the context nodes when
62206c3fb27SDimitry Andric   /// trying to assign the corresponding callsites with those stack ids to these
62306c3fb27SDimitry Andric   /// nodes.
624*0fca6ea1SDimitry Andric   DenseMap<uint64_t, ContextNode *> StackEntryIdToContextNodeMap;
62506c3fb27SDimitry Andric 
62606c3fb27SDimitry Andric   /// Maps to track the calls to their corresponding nodes in the graph.
62706c3fb27SDimitry Andric   MapVector<CallInfo, ContextNode *> AllocationCallToContextNodeMap;
62806c3fb27SDimitry Andric   MapVector<CallInfo, ContextNode *> NonAllocationCallToContextNodeMap;
62906c3fb27SDimitry Andric 
63006c3fb27SDimitry Andric   /// Owner of all ContextNode unique_ptrs.
63106c3fb27SDimitry Andric   std::vector<std::unique_ptr<ContextNode>> NodeOwner;
63206c3fb27SDimitry Andric 
63306c3fb27SDimitry Andric   /// Perform sanity checks on graph when requested.
63406c3fb27SDimitry Andric   void check() const;
63506c3fb27SDimitry Andric 
63606c3fb27SDimitry Andric   /// Keeps track of the last unique context id assigned.
63706c3fb27SDimitry Andric   unsigned int LastContextId = 0;
63806c3fb27SDimitry Andric };
63906c3fb27SDimitry Andric 
64006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
64106c3fb27SDimitry Andric using ContextNode =
64206c3fb27SDimitry Andric     typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
64306c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
64406c3fb27SDimitry Andric using ContextEdge =
64506c3fb27SDimitry Andric     typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
64606c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
64706c3fb27SDimitry Andric using FuncInfo =
64806c3fb27SDimitry Andric     typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
64906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
65006c3fb27SDimitry Andric using CallInfo =
65106c3fb27SDimitry Andric     typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
65206c3fb27SDimitry Andric 
65306c3fb27SDimitry Andric /// CRTP derived class for graphs built from IR (regular LTO).
65406c3fb27SDimitry Andric class ModuleCallsiteContextGraph
65506c3fb27SDimitry Andric     : public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
65606c3fb27SDimitry Andric                                   Instruction *> {
65706c3fb27SDimitry Andric public:
65806c3fb27SDimitry Andric   ModuleCallsiteContextGraph(
65906c3fb27SDimitry Andric       Module &M,
6607a6dacacSDimitry Andric       llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
66106c3fb27SDimitry Andric 
66206c3fb27SDimitry Andric private:
66306c3fb27SDimitry Andric   friend CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
66406c3fb27SDimitry Andric                               Instruction *>;
66506c3fb27SDimitry Andric 
66606c3fb27SDimitry Andric   uint64_t getStackId(uint64_t IdOrIndex) const;
667297eecfbSDimitry Andric   bool calleeMatchesFunc(
668297eecfbSDimitry Andric       Instruction *Call, const Function *Func, const Function *CallerFunc,
669297eecfbSDimitry Andric       std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
670297eecfbSDimitry Andric   bool findProfiledCalleeThroughTailCalls(
671297eecfbSDimitry Andric       const Function *ProfiledCallee, Value *CurCallee, unsigned Depth,
672297eecfbSDimitry Andric       std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
673297eecfbSDimitry Andric       bool &FoundMultipleCalleeChains);
67406c3fb27SDimitry Andric   uint64_t getLastStackId(Instruction *Call);
67506c3fb27SDimitry Andric   std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *Call);
67606c3fb27SDimitry Andric   void updateAllocationCall(CallInfo &Call, AllocationType AllocType);
67706c3fb27SDimitry Andric   void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
67806c3fb27SDimitry Andric   CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
67906c3fb27SDimitry Andric                        Instruction *>::FuncInfo
68006c3fb27SDimitry Andric   cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call,
68106c3fb27SDimitry Andric                            std::map<CallInfo, CallInfo> &CallMap,
68206c3fb27SDimitry Andric                            std::vector<CallInfo> &CallsWithMetadataInFunc,
68306c3fb27SDimitry Andric                            unsigned CloneNo);
68406c3fb27SDimitry Andric   std::string getLabel(const Function *Func, const Instruction *Call,
68506c3fb27SDimitry Andric                        unsigned CloneNo) const;
68606c3fb27SDimitry Andric 
68706c3fb27SDimitry Andric   const Module &Mod;
6887a6dacacSDimitry Andric   llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter;
68906c3fb27SDimitry Andric };
69006c3fb27SDimitry Andric 
69106c3fb27SDimitry Andric /// Represents a call in the summary index graph, which can either be an
69206c3fb27SDimitry Andric /// allocation or an interior callsite node in an allocation's context.
69306c3fb27SDimitry Andric /// Holds a pointer to the corresponding data structure in the index.
69406c3fb27SDimitry Andric struct IndexCall : public PointerUnion<CallsiteInfo *, AllocInfo *> {
69506c3fb27SDimitry Andric   IndexCall() : PointerUnion() {}
69606c3fb27SDimitry Andric   IndexCall(std::nullptr_t) : IndexCall() {}
69706c3fb27SDimitry Andric   IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
69806c3fb27SDimitry Andric   IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
69906c3fb27SDimitry Andric   IndexCall(PointerUnion PT) : PointerUnion(PT) {}
70006c3fb27SDimitry Andric 
70106c3fb27SDimitry Andric   IndexCall *operator->() { return this; }
70206c3fb27SDimitry Andric 
70306c3fb27SDimitry Andric   PointerUnion<CallsiteInfo *, AllocInfo *> getBase() const { return *this; }
70406c3fb27SDimitry Andric 
70506c3fb27SDimitry Andric   void print(raw_ostream &OS) const {
70606c3fb27SDimitry Andric     if (auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(getBase())) {
70706c3fb27SDimitry Andric       OS << *AI;
70806c3fb27SDimitry Andric     } else {
70906c3fb27SDimitry Andric       auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(getBase());
71006c3fb27SDimitry Andric       assert(CI);
71106c3fb27SDimitry Andric       OS << *CI;
71206c3fb27SDimitry Andric     }
71306c3fb27SDimitry Andric   }
71406c3fb27SDimitry Andric };
71506c3fb27SDimitry Andric 
71606c3fb27SDimitry Andric /// CRTP derived class for graphs built from summary index (ThinLTO).
71706c3fb27SDimitry Andric class IndexCallsiteContextGraph
71806c3fb27SDimitry Andric     : public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
71906c3fb27SDimitry Andric                                   IndexCall> {
72006c3fb27SDimitry Andric public:
72106c3fb27SDimitry Andric   IndexCallsiteContextGraph(
72206c3fb27SDimitry Andric       ModuleSummaryIndex &Index,
7237a6dacacSDimitry Andric       llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
72406c3fb27SDimitry Andric           isPrevailing);
72506c3fb27SDimitry Andric 
726297eecfbSDimitry Andric   ~IndexCallsiteContextGraph() {
727297eecfbSDimitry Andric     // Now that we are done with the graph it is safe to add the new
728297eecfbSDimitry Andric     // CallsiteInfo structs to the function summary vectors. The graph nodes
729297eecfbSDimitry Andric     // point into locations within these vectors, so we don't want to add them
730297eecfbSDimitry Andric     // any earlier.
731297eecfbSDimitry Andric     for (auto &I : FunctionCalleesToSynthesizedCallsiteInfos) {
732297eecfbSDimitry Andric       auto *FS = I.first;
733297eecfbSDimitry Andric       for (auto &Callsite : I.second)
734297eecfbSDimitry Andric         FS->addCallsite(*Callsite.second);
735297eecfbSDimitry Andric     }
736297eecfbSDimitry Andric   }
737297eecfbSDimitry Andric 
73806c3fb27SDimitry Andric private:
73906c3fb27SDimitry Andric   friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
74006c3fb27SDimitry Andric                               IndexCall>;
74106c3fb27SDimitry Andric 
74206c3fb27SDimitry Andric   uint64_t getStackId(uint64_t IdOrIndex) const;
743297eecfbSDimitry Andric   bool calleeMatchesFunc(
744297eecfbSDimitry Andric       IndexCall &Call, const FunctionSummary *Func,
745297eecfbSDimitry Andric       const FunctionSummary *CallerFunc,
746297eecfbSDimitry Andric       std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
747297eecfbSDimitry Andric   bool findProfiledCalleeThroughTailCalls(
748297eecfbSDimitry Andric       ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth,
749297eecfbSDimitry Andric       std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
750297eecfbSDimitry Andric       bool &FoundMultipleCalleeChains);
75106c3fb27SDimitry Andric   uint64_t getLastStackId(IndexCall &Call);
75206c3fb27SDimitry Andric   std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
75306c3fb27SDimitry Andric   void updateAllocationCall(CallInfo &Call, AllocationType AllocType);
75406c3fb27SDimitry Andric   void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
75506c3fb27SDimitry Andric   CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
75606c3fb27SDimitry Andric                        IndexCall>::FuncInfo
75706c3fb27SDimitry Andric   cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call,
75806c3fb27SDimitry Andric                            std::map<CallInfo, CallInfo> &CallMap,
75906c3fb27SDimitry Andric                            std::vector<CallInfo> &CallsWithMetadataInFunc,
76006c3fb27SDimitry Andric                            unsigned CloneNo);
76106c3fb27SDimitry Andric   std::string getLabel(const FunctionSummary *Func, const IndexCall &Call,
76206c3fb27SDimitry Andric                        unsigned CloneNo) const;
76306c3fb27SDimitry Andric 
76406c3fb27SDimitry Andric   // Saves mapping from function summaries containing memprof records back to
76506c3fb27SDimitry Andric   // its VI, for use in checking and debugging.
76606c3fb27SDimitry Andric   std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
76706c3fb27SDimitry Andric 
76806c3fb27SDimitry Andric   const ModuleSummaryIndex &Index;
7697a6dacacSDimitry Andric   llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
770297eecfbSDimitry Andric       isPrevailing;
771297eecfbSDimitry Andric 
772297eecfbSDimitry Andric   // Saves/owns the callsite info structures synthesized for missing tail call
773297eecfbSDimitry Andric   // frames that we discover while building the graph.
774297eecfbSDimitry Andric   // It maps from the summary of the function making the tail call, to a map
775297eecfbSDimitry Andric   // of callee ValueInfo to corresponding synthesized callsite info.
776297eecfbSDimitry Andric   std::unordered_map<FunctionSummary *,
777297eecfbSDimitry Andric                      std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
778297eecfbSDimitry Andric       FunctionCalleesToSynthesizedCallsiteInfos;
77906c3fb27SDimitry Andric };
78006c3fb27SDimitry Andric } // namespace
78106c3fb27SDimitry Andric 
78206c3fb27SDimitry Andric namespace llvm {
78306c3fb27SDimitry Andric template <>
78406c3fb27SDimitry Andric struct DenseMapInfo<typename CallsiteContextGraph<
78506c3fb27SDimitry Andric     ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo>
78606c3fb27SDimitry Andric     : public DenseMapInfo<std::pair<Instruction *, unsigned>> {};
78706c3fb27SDimitry Andric template <>
78806c3fb27SDimitry Andric struct DenseMapInfo<typename CallsiteContextGraph<
78906c3fb27SDimitry Andric     IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo>
79006c3fb27SDimitry Andric     : public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
79106c3fb27SDimitry Andric template <>
79206c3fb27SDimitry Andric struct DenseMapInfo<IndexCall>
79306c3fb27SDimitry Andric     : public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
79406c3fb27SDimitry Andric } // end namespace llvm
79506c3fb27SDimitry Andric 
79606c3fb27SDimitry Andric namespace {
79706c3fb27SDimitry Andric 
79806c3fb27SDimitry Andric struct FieldSeparator {
79906c3fb27SDimitry Andric   bool Skip = true;
80006c3fb27SDimitry Andric   const char *Sep;
80106c3fb27SDimitry Andric 
80206c3fb27SDimitry Andric   FieldSeparator(const char *Sep = ", ") : Sep(Sep) {}
80306c3fb27SDimitry Andric };
80406c3fb27SDimitry Andric 
80506c3fb27SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) {
80606c3fb27SDimitry Andric   if (FS.Skip) {
80706c3fb27SDimitry Andric     FS.Skip = false;
80806c3fb27SDimitry Andric     return OS;
80906c3fb27SDimitry Andric   }
81006c3fb27SDimitry Andric   return OS << FS.Sep;
81106c3fb27SDimitry Andric }
81206c3fb27SDimitry Andric 
81306c3fb27SDimitry Andric // Map the uint8_t alloc types (which may contain NotCold|Cold) to the alloc
81406c3fb27SDimitry Andric // type we should actually use on the corresponding allocation.
81506c3fb27SDimitry Andric // If we can't clone a node that has NotCold+Cold alloc type, we will fall
81606c3fb27SDimitry Andric // back to using NotCold. So don't bother cloning to distinguish NotCold+Cold
81706c3fb27SDimitry Andric // from NotCold.
81806c3fb27SDimitry Andric AllocationType allocTypeToUse(uint8_t AllocTypes) {
81906c3fb27SDimitry Andric   assert(AllocTypes != (uint8_t)AllocationType::None);
82006c3fb27SDimitry Andric   if (AllocTypes ==
82106c3fb27SDimitry Andric       ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
82206c3fb27SDimitry Andric     return AllocationType::NotCold;
82306c3fb27SDimitry Andric   else
82406c3fb27SDimitry Andric     return (AllocationType)AllocTypes;
82506c3fb27SDimitry Andric }
82606c3fb27SDimitry Andric 
82706c3fb27SDimitry Andric // Helper to check if the alloc types for all edges recorded in the
82806c3fb27SDimitry Andric // InAllocTypes vector match the alloc types for all edges in the Edges
82906c3fb27SDimitry Andric // vector.
83006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
83106c3fb27SDimitry Andric bool allocTypesMatch(
83206c3fb27SDimitry Andric     const std::vector<uint8_t> &InAllocTypes,
83306c3fb27SDimitry Andric     const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
83406c3fb27SDimitry Andric         &Edges) {
83506c3fb27SDimitry Andric   return std::equal(
83606c3fb27SDimitry Andric       InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(),
83706c3fb27SDimitry Andric       [](const uint8_t &l,
83806c3fb27SDimitry Andric          const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
83906c3fb27SDimitry Andric         // Can share if one of the edges is None type - don't
84006c3fb27SDimitry Andric         // care about the type along that edge as it doesn't
84106c3fb27SDimitry Andric         // exist for those context ids.
84206c3fb27SDimitry Andric         if (l == (uint8_t)AllocationType::None ||
84306c3fb27SDimitry Andric             r->AllocTypes == (uint8_t)AllocationType::None)
84406c3fb27SDimitry Andric           return true;
84506c3fb27SDimitry Andric         return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
84606c3fb27SDimitry Andric       });
84706c3fb27SDimitry Andric }
84806c3fb27SDimitry Andric 
84906c3fb27SDimitry Andric } // end anonymous namespace
85006c3fb27SDimitry Andric 
85106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
85206c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
85306c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
85406c3fb27SDimitry Andric     const CallInfo &C) {
85506c3fb27SDimitry Andric   ContextNode *Node = getNodeForAlloc(C);
85606c3fb27SDimitry Andric   if (Node)
85706c3fb27SDimitry Andric     return Node;
85806c3fb27SDimitry Andric 
85906c3fb27SDimitry Andric   return NonAllocationCallToContextNodeMap.lookup(C);
86006c3fb27SDimitry Andric }
86106c3fb27SDimitry Andric 
86206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
86306c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
86406c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
86506c3fb27SDimitry Andric     const CallInfo &C) {
86606c3fb27SDimitry Andric   return AllocationCallToContextNodeMap.lookup(C);
86706c3fb27SDimitry Andric }
86806c3fb27SDimitry Andric 
86906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
87006c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
87106c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
87206c3fb27SDimitry Andric     uint64_t StackId) {
87306c3fb27SDimitry Andric   auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
87406c3fb27SDimitry Andric   if (StackEntryNode != StackEntryIdToContextNodeMap.end())
87506c3fb27SDimitry Andric     return StackEntryNode->second;
87606c3fb27SDimitry Andric   return nullptr;
87706c3fb27SDimitry Andric }
87806c3fb27SDimitry Andric 
87906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
88006c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
88106c3fb27SDimitry Andric     addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType,
88206c3fb27SDimitry Andric                           unsigned int ContextId) {
88306c3fb27SDimitry Andric   for (auto &Edge : CallerEdges) {
88406c3fb27SDimitry Andric     if (Edge->Caller == Caller) {
88506c3fb27SDimitry Andric       Edge->AllocTypes |= (uint8_t)AllocType;
88606c3fb27SDimitry Andric       Edge->getContextIds().insert(ContextId);
88706c3fb27SDimitry Andric       return;
88806c3fb27SDimitry Andric     }
88906c3fb27SDimitry Andric   }
89006c3fb27SDimitry Andric   std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>(
89106c3fb27SDimitry Andric       this, Caller, (uint8_t)AllocType, DenseSet<uint32_t>({ContextId}));
89206c3fb27SDimitry Andric   CallerEdges.push_back(Edge);
89306c3fb27SDimitry Andric   Caller->CalleeEdges.push_back(Edge);
89406c3fb27SDimitry Andric }
89506c3fb27SDimitry Andric 
89606c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
89706c3fb27SDimitry Andric void CallsiteContextGraph<
89806c3fb27SDimitry Andric     DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
89906c3fb27SDimitry Andric   for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) {
90006c3fb27SDimitry Andric     auto Edge = *EI;
90106c3fb27SDimitry Andric     if (Edge->AllocTypes == (uint8_t)AllocationType::None) {
90206c3fb27SDimitry Andric       assert(Edge->ContextIds.empty());
90306c3fb27SDimitry Andric       Edge->Callee->eraseCallerEdge(Edge.get());
90406c3fb27SDimitry Andric       EI = Node->CalleeEdges.erase(EI);
90506c3fb27SDimitry Andric     } else
90606c3fb27SDimitry Andric       ++EI;
90706c3fb27SDimitry Andric   }
90806c3fb27SDimitry Andric }
90906c3fb27SDimitry Andric 
91006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
91106c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
91206c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
91306c3fb27SDimitry Andric     findEdgeFromCallee(const ContextNode *Callee) {
91406c3fb27SDimitry Andric   for (const auto &Edge : CalleeEdges)
91506c3fb27SDimitry Andric     if (Edge->Callee == Callee)
91606c3fb27SDimitry Andric       return Edge.get();
91706c3fb27SDimitry Andric   return nullptr;
91806c3fb27SDimitry Andric }
91906c3fb27SDimitry Andric 
92006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
92106c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
92206c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
92306c3fb27SDimitry Andric     findEdgeFromCaller(const ContextNode *Caller) {
92406c3fb27SDimitry Andric   for (const auto &Edge : CallerEdges)
92506c3fb27SDimitry Andric     if (Edge->Caller == Caller)
92606c3fb27SDimitry Andric       return Edge.get();
92706c3fb27SDimitry Andric   return nullptr;
92806c3fb27SDimitry Andric }
92906c3fb27SDimitry Andric 
93006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
93106c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
93206c3fb27SDimitry Andric     eraseCalleeEdge(const ContextEdge *Edge) {
9335f757f3fSDimitry Andric   auto EI = llvm::find_if(
9345f757f3fSDimitry Andric       CalleeEdges, [Edge](const std::shared_ptr<ContextEdge> &CalleeEdge) {
93506c3fb27SDimitry Andric         return CalleeEdge.get() == Edge;
93606c3fb27SDimitry Andric       });
93706c3fb27SDimitry Andric   assert(EI != CalleeEdges.end());
93806c3fb27SDimitry Andric   CalleeEdges.erase(EI);
93906c3fb27SDimitry Andric }
94006c3fb27SDimitry Andric 
94106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
94206c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
94306c3fb27SDimitry Andric     eraseCallerEdge(const ContextEdge *Edge) {
9445f757f3fSDimitry Andric   auto EI = llvm::find_if(
9455f757f3fSDimitry Andric       CallerEdges, [Edge](const std::shared_ptr<ContextEdge> &CallerEdge) {
94606c3fb27SDimitry Andric         return CallerEdge.get() == Edge;
94706c3fb27SDimitry Andric       });
94806c3fb27SDimitry Andric   assert(EI != CallerEdges.end());
94906c3fb27SDimitry Andric   CallerEdges.erase(EI);
95006c3fb27SDimitry Andric }
95106c3fb27SDimitry Andric 
95206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
95306c3fb27SDimitry Andric uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
95406c3fb27SDimitry Andric     DenseSet<uint32_t> &ContextIds) {
95506c3fb27SDimitry Andric   uint8_t BothTypes =
95606c3fb27SDimitry Andric       (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
95706c3fb27SDimitry Andric   uint8_t AllocType = (uint8_t)AllocationType::None;
95806c3fb27SDimitry Andric   for (auto Id : ContextIds) {
95906c3fb27SDimitry Andric     AllocType |= (uint8_t)ContextIdToAllocationType[Id];
96006c3fb27SDimitry Andric     // Bail early if alloc type reached both, no further refinement.
96106c3fb27SDimitry Andric     if (AllocType == BothTypes)
96206c3fb27SDimitry Andric       return AllocType;
96306c3fb27SDimitry Andric   }
96406c3fb27SDimitry Andric   return AllocType;
96506c3fb27SDimitry Andric }
96606c3fb27SDimitry Andric 
96706c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
96806c3fb27SDimitry Andric uint8_t
96906c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
97006c3fb27SDimitry Andric     const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) {
97106c3fb27SDimitry Andric   uint8_t BothTypes =
97206c3fb27SDimitry Andric       (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
97306c3fb27SDimitry Andric   uint8_t AllocType = (uint8_t)AllocationType::None;
97406c3fb27SDimitry Andric   for (auto Id : Node1Ids) {
97506c3fb27SDimitry Andric     if (!Node2Ids.count(Id))
97606c3fb27SDimitry Andric       continue;
97706c3fb27SDimitry Andric     AllocType |= (uint8_t)ContextIdToAllocationType[Id];
97806c3fb27SDimitry Andric     // Bail early if alloc type reached both, no further refinement.
97906c3fb27SDimitry Andric     if (AllocType == BothTypes)
98006c3fb27SDimitry Andric       return AllocType;
98106c3fb27SDimitry Andric   }
98206c3fb27SDimitry Andric   return AllocType;
98306c3fb27SDimitry Andric }
98406c3fb27SDimitry Andric 
98506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
98606c3fb27SDimitry Andric uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
98706c3fb27SDimitry Andric     const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) {
98806c3fb27SDimitry Andric   if (Node1Ids.size() < Node2Ids.size())
98906c3fb27SDimitry Andric     return intersectAllocTypesImpl(Node1Ids, Node2Ids);
99006c3fb27SDimitry Andric   else
99106c3fb27SDimitry Andric     return intersectAllocTypesImpl(Node2Ids, Node1Ids);
99206c3fb27SDimitry Andric }
99306c3fb27SDimitry Andric 
99406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
99506c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
99606c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
99706c3fb27SDimitry Andric     CallInfo Call, const FuncTy *F) {
99806c3fb27SDimitry Andric   assert(!getNodeForAlloc(Call));
99906c3fb27SDimitry Andric   NodeOwner.push_back(
100006c3fb27SDimitry Andric       std::make_unique<ContextNode>(/*IsAllocation=*/true, Call));
100106c3fb27SDimitry Andric   ContextNode *AllocNode = NodeOwner.back().get();
100206c3fb27SDimitry Andric   AllocationCallToContextNodeMap[Call] = AllocNode;
100306c3fb27SDimitry Andric   NodeToCallingFunc[AllocNode] = F;
100406c3fb27SDimitry Andric   // Use LastContextId as a uniq id for MIB allocation nodes.
100506c3fb27SDimitry Andric   AllocNode->OrigStackOrAllocId = LastContextId;
100606c3fb27SDimitry Andric   // Alloc type should be updated as we add in the MIBs. We should assert
100706c3fb27SDimitry Andric   // afterwards that it is not still None.
100806c3fb27SDimitry Andric   AllocNode->AllocTypes = (uint8_t)AllocationType::None;
100906c3fb27SDimitry Andric 
101006c3fb27SDimitry Andric   return AllocNode;
101106c3fb27SDimitry Andric }
101206c3fb27SDimitry Andric 
1013*0fca6ea1SDimitry Andric static std::string getAllocTypeString(uint8_t AllocTypes) {
1014*0fca6ea1SDimitry Andric   if (!AllocTypes)
1015*0fca6ea1SDimitry Andric     return "None";
1016*0fca6ea1SDimitry Andric   std::string Str;
1017*0fca6ea1SDimitry Andric   if (AllocTypes & (uint8_t)AllocationType::NotCold)
1018*0fca6ea1SDimitry Andric     Str += "NotCold";
1019*0fca6ea1SDimitry Andric   if (AllocTypes & (uint8_t)AllocationType::Cold)
1020*0fca6ea1SDimitry Andric     Str += "Cold";
1021*0fca6ea1SDimitry Andric   return Str;
1022*0fca6ea1SDimitry Andric }
1023*0fca6ea1SDimitry Andric 
102406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
102506c3fb27SDimitry Andric template <class NodeT, class IteratorT>
102606c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
102706c3fb27SDimitry Andric     ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1028*0fca6ea1SDimitry Andric     CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType,
1029*0fca6ea1SDimitry Andric     uint64_t TotalSize) {
1030*0fca6ea1SDimitry Andric   assert(!MemProfReportHintedSizes || TotalSize > 0);
103106c3fb27SDimitry Andric   // Treating the hot alloc type as NotCold before the disambiguation for "hot"
103206c3fb27SDimitry Andric   // is done.
103306c3fb27SDimitry Andric   if (AllocType == AllocationType::Hot)
103406c3fb27SDimitry Andric     AllocType = AllocationType::NotCold;
103506c3fb27SDimitry Andric 
103606c3fb27SDimitry Andric   ContextIdToAllocationType[++LastContextId] = AllocType;
103706c3fb27SDimitry Andric 
1038*0fca6ea1SDimitry Andric   if (MemProfReportHintedSizes) {
1039*0fca6ea1SDimitry Andric     assert(TotalSize);
1040*0fca6ea1SDimitry Andric     ContextIdToTotalSize[LastContextId] = TotalSize;
1041*0fca6ea1SDimitry Andric   }
1042*0fca6ea1SDimitry Andric 
104306c3fb27SDimitry Andric   // Update alloc type and context ids for this MIB.
104406c3fb27SDimitry Andric   AllocNode->AllocTypes |= (uint8_t)AllocType;
104506c3fb27SDimitry Andric 
104606c3fb27SDimitry Andric   // Now add or update nodes for each stack id in alloc's context.
104706c3fb27SDimitry Andric   // Later when processing the stack ids on non-alloc callsites we will adjust
104806c3fb27SDimitry Andric   // for any inlining in the context.
104906c3fb27SDimitry Andric   ContextNode *PrevNode = AllocNode;
105006c3fb27SDimitry Andric   // Look for recursion (direct recursion should have been collapsed by
105106c3fb27SDimitry Andric   // module summary analysis, here we should just be detecting mutual
105206c3fb27SDimitry Andric   // recursion). Mark these nodes so we don't try to clone.
105306c3fb27SDimitry Andric   SmallSet<uint64_t, 8> StackIdSet;
105406c3fb27SDimitry Andric   // Skip any on the allocation call (inlining).
105506c3fb27SDimitry Andric   for (auto ContextIter = StackContext.beginAfterSharedPrefix(CallsiteContext);
105606c3fb27SDimitry Andric        ContextIter != StackContext.end(); ++ContextIter) {
105706c3fb27SDimitry Andric     auto StackId = getStackId(*ContextIter);
105806c3fb27SDimitry Andric     ContextNode *StackNode = getNodeForStackId(StackId);
105906c3fb27SDimitry Andric     if (!StackNode) {
106006c3fb27SDimitry Andric       NodeOwner.push_back(
106106c3fb27SDimitry Andric           std::make_unique<ContextNode>(/*IsAllocation=*/false));
106206c3fb27SDimitry Andric       StackNode = NodeOwner.back().get();
106306c3fb27SDimitry Andric       StackEntryIdToContextNodeMap[StackId] = StackNode;
106406c3fb27SDimitry Andric       StackNode->OrigStackOrAllocId = StackId;
106506c3fb27SDimitry Andric     }
106606c3fb27SDimitry Andric     auto Ins = StackIdSet.insert(StackId);
106706c3fb27SDimitry Andric     if (!Ins.second)
106806c3fb27SDimitry Andric       StackNode->Recursive = true;
106906c3fb27SDimitry Andric     StackNode->AllocTypes |= (uint8_t)AllocType;
107006c3fb27SDimitry Andric     PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId);
107106c3fb27SDimitry Andric     PrevNode = StackNode;
107206c3fb27SDimitry Andric   }
107306c3fb27SDimitry Andric }
107406c3fb27SDimitry Andric 
107506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
107606c3fb27SDimitry Andric DenseSet<uint32_t>
107706c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
107806c3fb27SDimitry Andric     const DenseSet<uint32_t> &StackSequenceContextIds,
107906c3fb27SDimitry Andric     DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
108006c3fb27SDimitry Andric   DenseSet<uint32_t> NewContextIds;
108106c3fb27SDimitry Andric   for (auto OldId : StackSequenceContextIds) {
108206c3fb27SDimitry Andric     NewContextIds.insert(++LastContextId);
108306c3fb27SDimitry Andric     OldToNewContextIds[OldId].insert(LastContextId);
108406c3fb27SDimitry Andric     assert(ContextIdToAllocationType.count(OldId));
108506c3fb27SDimitry Andric     // The new context has the same allocation type as original.
108606c3fb27SDimitry Andric     ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1087*0fca6ea1SDimitry Andric     // For now set this to 0 so we don't duplicate sizes. Not clear how to divvy
1088*0fca6ea1SDimitry Andric     // up the size. Assume that if we are able to duplicate context ids that we
1089*0fca6ea1SDimitry Andric     // will be able to disambiguate all copies.
1090*0fca6ea1SDimitry Andric     ContextIdToTotalSize[LastContextId] = 0;
109106c3fb27SDimitry Andric   }
109206c3fb27SDimitry Andric   return NewContextIds;
109306c3fb27SDimitry Andric }
109406c3fb27SDimitry Andric 
109506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
109606c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
109706c3fb27SDimitry Andric     propagateDuplicateContextIds(
109806c3fb27SDimitry Andric         const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
109906c3fb27SDimitry Andric   // Build a set of duplicated context ids corresponding to the input id set.
110006c3fb27SDimitry Andric   auto GetNewIds = [&OldToNewContextIds](const DenseSet<uint32_t> &ContextIds) {
110106c3fb27SDimitry Andric     DenseSet<uint32_t> NewIds;
110206c3fb27SDimitry Andric     for (auto Id : ContextIds)
110306c3fb27SDimitry Andric       if (auto NewId = OldToNewContextIds.find(Id);
110406c3fb27SDimitry Andric           NewId != OldToNewContextIds.end())
110506c3fb27SDimitry Andric         NewIds.insert(NewId->second.begin(), NewId->second.end());
110606c3fb27SDimitry Andric     return NewIds;
110706c3fb27SDimitry Andric   };
110806c3fb27SDimitry Andric 
110906c3fb27SDimitry Andric   // Recursively update context ids sets along caller edges.
111006c3fb27SDimitry Andric   auto UpdateCallers = [&](ContextNode *Node,
111106c3fb27SDimitry Andric                            DenseSet<const ContextEdge *> &Visited,
111206c3fb27SDimitry Andric                            auto &&UpdateCallers) -> void {
111306c3fb27SDimitry Andric     for (const auto &Edge : Node->CallerEdges) {
111406c3fb27SDimitry Andric       auto Inserted = Visited.insert(Edge.get());
111506c3fb27SDimitry Andric       if (!Inserted.second)
111606c3fb27SDimitry Andric         continue;
111706c3fb27SDimitry Andric       ContextNode *NextNode = Edge->Caller;
111806c3fb27SDimitry Andric       DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Edge->getContextIds());
111906c3fb27SDimitry Andric       // Only need to recursively iterate to NextNode via this caller edge if
112006c3fb27SDimitry Andric       // it resulted in any added ids to NextNode.
112106c3fb27SDimitry Andric       if (!NewIdsToAdd.empty()) {
112206c3fb27SDimitry Andric         Edge->getContextIds().insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
112306c3fb27SDimitry Andric         UpdateCallers(NextNode, Visited, UpdateCallers);
112406c3fb27SDimitry Andric       }
112506c3fb27SDimitry Andric     }
112606c3fb27SDimitry Andric   };
112706c3fb27SDimitry Andric 
112806c3fb27SDimitry Andric   DenseSet<const ContextEdge *> Visited;
112906c3fb27SDimitry Andric   for (auto &Entry : AllocationCallToContextNodeMap) {
113006c3fb27SDimitry Andric     auto *Node = Entry.second;
113106c3fb27SDimitry Andric     UpdateCallers(Node, Visited, UpdateCallers);
113206c3fb27SDimitry Andric   }
113306c3fb27SDimitry Andric }
113406c3fb27SDimitry Andric 
113506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
113606c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1137*0fca6ea1SDimitry Andric     ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee,
1138*0fca6ea1SDimitry Andric     // This must be passed by value to make a copy since it will be adjusted
1139*0fca6ea1SDimitry Andric     // as ids are moved.
1140*0fca6ea1SDimitry Andric     DenseSet<uint32_t> RemainingContextIds) {
114106c3fb27SDimitry Andric   auto &OrigEdges =
114206c3fb27SDimitry Andric       TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
114306c3fb27SDimitry Andric   // Increment iterator in loop so that we can remove edges as needed.
114406c3fb27SDimitry Andric   for (auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
114506c3fb27SDimitry Andric     auto Edge = *EI;
114606c3fb27SDimitry Andric     // Remove any matching context ids from Edge, return set that were found and
114706c3fb27SDimitry Andric     // removed, these are the new edge's context ids. Also update the remaining
114806c3fb27SDimitry Andric     // (not found ids).
114906c3fb27SDimitry Andric     DenseSet<uint32_t> NewEdgeContextIds, NotFoundContextIds;
115006c3fb27SDimitry Andric     set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
115106c3fb27SDimitry Andric                  NotFoundContextIds);
115206c3fb27SDimitry Andric     RemainingContextIds.swap(NotFoundContextIds);
115306c3fb27SDimitry Andric     // If no matching context ids for this edge, skip it.
115406c3fb27SDimitry Andric     if (NewEdgeContextIds.empty()) {
115506c3fb27SDimitry Andric       ++EI;
115606c3fb27SDimitry Andric       continue;
115706c3fb27SDimitry Andric     }
115806c3fb27SDimitry Andric     if (TowardsCallee) {
1159*0fca6ea1SDimitry Andric       uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
116006c3fb27SDimitry Andric       auto NewEdge = std::make_shared<ContextEdge>(
1161*0fca6ea1SDimitry Andric           Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
116206c3fb27SDimitry Andric       NewNode->CalleeEdges.push_back(NewEdge);
116306c3fb27SDimitry Andric       NewEdge->Callee->CallerEdges.push_back(NewEdge);
116406c3fb27SDimitry Andric     } else {
1165*0fca6ea1SDimitry Andric       uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
116606c3fb27SDimitry Andric       auto NewEdge = std::make_shared<ContextEdge>(
1167*0fca6ea1SDimitry Andric           NewNode, Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
116806c3fb27SDimitry Andric       NewNode->CallerEdges.push_back(NewEdge);
116906c3fb27SDimitry Andric       NewEdge->Caller->CalleeEdges.push_back(NewEdge);
117006c3fb27SDimitry Andric     }
117106c3fb27SDimitry Andric     // Remove old edge if context ids empty.
117206c3fb27SDimitry Andric     if (Edge->getContextIds().empty()) {
117306c3fb27SDimitry Andric       if (TowardsCallee) {
117406c3fb27SDimitry Andric         Edge->Callee->eraseCallerEdge(Edge.get());
117506c3fb27SDimitry Andric         EI = OrigNode->CalleeEdges.erase(EI);
117606c3fb27SDimitry Andric       } else {
117706c3fb27SDimitry Andric         Edge->Caller->eraseCalleeEdge(Edge.get());
117806c3fb27SDimitry Andric         EI = OrigNode->CallerEdges.erase(EI);
117906c3fb27SDimitry Andric       }
118006c3fb27SDimitry Andric       continue;
118106c3fb27SDimitry Andric     }
118206c3fb27SDimitry Andric     ++EI;
118306c3fb27SDimitry Andric   }
118406c3fb27SDimitry Andric }
118506c3fb27SDimitry Andric 
118606c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1187*0fca6ea1SDimitry Andric static void checkEdge(
1188*0fca6ea1SDimitry Andric     const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1189*0fca6ea1SDimitry Andric   // Confirm that alloc type is not None and that we have at least one context
1190*0fca6ea1SDimitry Andric   // id.
1191*0fca6ea1SDimitry Andric   assert(Edge->AllocTypes != (uint8_t)AllocationType::None);
1192*0fca6ea1SDimitry Andric   assert(!Edge->ContextIds.empty());
1193*0fca6ea1SDimitry Andric }
1194*0fca6ea1SDimitry Andric 
1195*0fca6ea1SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1196*0fca6ea1SDimitry Andric static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
1197*0fca6ea1SDimitry Andric                       bool CheckEdges = true) {
1198*0fca6ea1SDimitry Andric   if (Node->isRemoved())
1199*0fca6ea1SDimitry Andric     return;
1200*0fca6ea1SDimitry Andric #ifndef NDEBUG
1201*0fca6ea1SDimitry Andric   // Compute node's context ids once for use in asserts.
1202*0fca6ea1SDimitry Andric   auto NodeContextIds = Node->getContextIds();
1203*0fca6ea1SDimitry Andric #endif
1204*0fca6ea1SDimitry Andric   // Node's context ids should be the union of both its callee and caller edge
1205*0fca6ea1SDimitry Andric   // context ids.
1206*0fca6ea1SDimitry Andric   if (Node->CallerEdges.size()) {
1207*0fca6ea1SDimitry Andric     DenseSet<uint32_t> CallerEdgeContextIds(
1208*0fca6ea1SDimitry Andric         Node->CallerEdges.front()->ContextIds);
1209*0fca6ea1SDimitry Andric     for (const auto &Edge : llvm::drop_begin(Node->CallerEdges)) {
1210*0fca6ea1SDimitry Andric       if (CheckEdges)
1211*0fca6ea1SDimitry Andric         checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1212*0fca6ea1SDimitry Andric       set_union(CallerEdgeContextIds, Edge->ContextIds);
1213*0fca6ea1SDimitry Andric     }
1214*0fca6ea1SDimitry Andric     // Node can have more context ids than callers if some contexts terminate at
1215*0fca6ea1SDimitry Andric     // node and some are longer.
1216*0fca6ea1SDimitry Andric     assert(NodeContextIds == CallerEdgeContextIds ||
1217*0fca6ea1SDimitry Andric            set_is_subset(CallerEdgeContextIds, NodeContextIds));
1218*0fca6ea1SDimitry Andric   }
1219*0fca6ea1SDimitry Andric   if (Node->CalleeEdges.size()) {
1220*0fca6ea1SDimitry Andric     DenseSet<uint32_t> CalleeEdgeContextIds(
1221*0fca6ea1SDimitry Andric         Node->CalleeEdges.front()->ContextIds);
1222*0fca6ea1SDimitry Andric     for (const auto &Edge : llvm::drop_begin(Node->CalleeEdges)) {
1223*0fca6ea1SDimitry Andric       if (CheckEdges)
1224*0fca6ea1SDimitry Andric         checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1225*0fca6ea1SDimitry Andric       set_union(CalleeEdgeContextIds, Edge->getContextIds());
1226*0fca6ea1SDimitry Andric     }
1227*0fca6ea1SDimitry Andric     assert(NodeContextIds == CalleeEdgeContextIds);
1228*0fca6ea1SDimitry Andric   }
1229*0fca6ea1SDimitry Andric }
1230*0fca6ea1SDimitry Andric 
1231*0fca6ea1SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
123206c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
123306c3fb27SDimitry Andric     assignStackNodesPostOrder(ContextNode *Node,
123406c3fb27SDimitry Andric                               DenseSet<const ContextNode *> &Visited,
123506c3fb27SDimitry Andric                               DenseMap<uint64_t, std::vector<CallContextInfo>>
123606c3fb27SDimitry Andric                                   &StackIdToMatchingCalls) {
123706c3fb27SDimitry Andric   auto Inserted = Visited.insert(Node);
123806c3fb27SDimitry Andric   if (!Inserted.second)
123906c3fb27SDimitry Andric     return;
124006c3fb27SDimitry Andric   // Post order traversal. Iterate over a copy since we may add nodes and
124106c3fb27SDimitry Andric   // therefore new callers during the recursive call, invalidating any
124206c3fb27SDimitry Andric   // iterator over the original edge vector. We don't need to process these
124306c3fb27SDimitry Andric   // new nodes as they were already processed on creation.
124406c3fb27SDimitry Andric   auto CallerEdges = Node->CallerEdges;
124506c3fb27SDimitry Andric   for (auto &Edge : CallerEdges) {
124606c3fb27SDimitry Andric     // Skip any that have been removed during the recursion.
124706c3fb27SDimitry Andric     if (!Edge)
124806c3fb27SDimitry Andric       continue;
124906c3fb27SDimitry Andric     assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls);
125006c3fb27SDimitry Andric   }
125106c3fb27SDimitry Andric 
125206c3fb27SDimitry Andric   // If this node's stack id is in the map, update the graph to contain new
125306c3fb27SDimitry Andric   // nodes representing any inlining at interior callsites. Note we move the
125406c3fb27SDimitry Andric   // associated context ids over to the new nodes.
125506c3fb27SDimitry Andric 
125606c3fb27SDimitry Andric   // Ignore this node if it is for an allocation or we didn't record any
125706c3fb27SDimitry Andric   // stack id lists ending at it.
125806c3fb27SDimitry Andric   if (Node->IsAllocation ||
125906c3fb27SDimitry Andric       !StackIdToMatchingCalls.count(Node->OrigStackOrAllocId))
126006c3fb27SDimitry Andric     return;
126106c3fb27SDimitry Andric 
126206c3fb27SDimitry Andric   auto &Calls = StackIdToMatchingCalls[Node->OrigStackOrAllocId];
126306c3fb27SDimitry Andric   // Handle the simple case first. A single call with a single stack id.
126406c3fb27SDimitry Andric   // In this case there is no need to create any new context nodes, simply
126506c3fb27SDimitry Andric   // assign the context node for stack id to this Call.
126606c3fb27SDimitry Andric   if (Calls.size() == 1) {
126706c3fb27SDimitry Andric     auto &[Call, Ids, Func, SavedContextIds] = Calls[0];
126806c3fb27SDimitry Andric     if (Ids.size() == 1) {
126906c3fb27SDimitry Andric       assert(SavedContextIds.empty());
127006c3fb27SDimitry Andric       // It should be this Node
127106c3fb27SDimitry Andric       assert(Node == getNodeForStackId(Ids[0]));
127206c3fb27SDimitry Andric       if (Node->Recursive)
127306c3fb27SDimitry Andric         return;
127406c3fb27SDimitry Andric       Node->setCall(Call);
127506c3fb27SDimitry Andric       NonAllocationCallToContextNodeMap[Call] = Node;
127606c3fb27SDimitry Andric       NodeToCallingFunc[Node] = Func;
127706c3fb27SDimitry Andric       return;
127806c3fb27SDimitry Andric     }
127906c3fb27SDimitry Andric   }
128006c3fb27SDimitry Andric 
128106c3fb27SDimitry Andric   // Find the node for the last stack id, which should be the same
128206c3fb27SDimitry Andric   // across all calls recorded for this id, and is this node's id.
128306c3fb27SDimitry Andric   uint64_t LastId = Node->OrigStackOrAllocId;
128406c3fb27SDimitry Andric   ContextNode *LastNode = getNodeForStackId(LastId);
128506c3fb27SDimitry Andric   // We should only have kept stack ids that had nodes.
128606c3fb27SDimitry Andric   assert(LastNode);
128706c3fb27SDimitry Andric 
128806c3fb27SDimitry Andric   for (unsigned I = 0; I < Calls.size(); I++) {
128906c3fb27SDimitry Andric     auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
129006c3fb27SDimitry Andric     // Skip any for which we didn't assign any ids, these don't get a node in
129106c3fb27SDimitry Andric     // the graph.
129206c3fb27SDimitry Andric     if (SavedContextIds.empty())
129306c3fb27SDimitry Andric       continue;
129406c3fb27SDimitry Andric 
129506c3fb27SDimitry Andric     assert(LastId == Ids.back());
129606c3fb27SDimitry Andric 
129706c3fb27SDimitry Andric     ContextNode *FirstNode = getNodeForStackId(Ids[0]);
129806c3fb27SDimitry Andric     assert(FirstNode);
129906c3fb27SDimitry Andric 
130006c3fb27SDimitry Andric     // Recompute the context ids for this stack id sequence (the
130106c3fb27SDimitry Andric     // intersection of the context ids of the corresponding nodes).
130206c3fb27SDimitry Andric     // Start with the ids we saved in the map for this call, which could be
130306c3fb27SDimitry Andric     // duplicated context ids. We have to recompute as we might have overlap
130406c3fb27SDimitry Andric     // overlap between the saved context ids for different last nodes, and
130506c3fb27SDimitry Andric     // removed them already during the post order traversal.
1306*0fca6ea1SDimitry Andric     set_intersect(SavedContextIds, FirstNode->getContextIds());
130706c3fb27SDimitry Andric     ContextNode *PrevNode = nullptr;
130806c3fb27SDimitry Andric     for (auto Id : Ids) {
130906c3fb27SDimitry Andric       ContextNode *CurNode = getNodeForStackId(Id);
131006c3fb27SDimitry Andric       // We should only have kept stack ids that had nodes and weren't
131106c3fb27SDimitry Andric       // recursive.
131206c3fb27SDimitry Andric       assert(CurNode);
131306c3fb27SDimitry Andric       assert(!CurNode->Recursive);
131406c3fb27SDimitry Andric       if (!PrevNode) {
131506c3fb27SDimitry Andric         PrevNode = CurNode;
131606c3fb27SDimitry Andric         continue;
131706c3fb27SDimitry Andric       }
131806c3fb27SDimitry Andric       auto *Edge = CurNode->findEdgeFromCallee(PrevNode);
131906c3fb27SDimitry Andric       if (!Edge) {
132006c3fb27SDimitry Andric         SavedContextIds.clear();
132106c3fb27SDimitry Andric         break;
132206c3fb27SDimitry Andric       }
132306c3fb27SDimitry Andric       PrevNode = CurNode;
132406c3fb27SDimitry Andric       set_intersect(SavedContextIds, Edge->getContextIds());
132506c3fb27SDimitry Andric 
132606c3fb27SDimitry Andric       // If we now have no context ids for clone, skip this call.
132706c3fb27SDimitry Andric       if (SavedContextIds.empty())
132806c3fb27SDimitry Andric         break;
132906c3fb27SDimitry Andric     }
133006c3fb27SDimitry Andric     if (SavedContextIds.empty())
133106c3fb27SDimitry Andric       continue;
133206c3fb27SDimitry Andric 
133306c3fb27SDimitry Andric     // Create new context node.
133406c3fb27SDimitry Andric     NodeOwner.push_back(
133506c3fb27SDimitry Andric         std::make_unique<ContextNode>(/*IsAllocation=*/false, Call));
133606c3fb27SDimitry Andric     ContextNode *NewNode = NodeOwner.back().get();
133706c3fb27SDimitry Andric     NodeToCallingFunc[NewNode] = Func;
133806c3fb27SDimitry Andric     NonAllocationCallToContextNodeMap[Call] = NewNode;
1339*0fca6ea1SDimitry Andric     NewNode->AllocTypes = computeAllocType(SavedContextIds);
134006c3fb27SDimitry Andric 
134106c3fb27SDimitry Andric     // Connect to callees of innermost stack frame in inlined call chain.
134206c3fb27SDimitry Andric     // This updates context ids for FirstNode's callee's to reflect those
134306c3fb27SDimitry Andric     // moved to NewNode.
1344*0fca6ea1SDimitry Andric     connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true, SavedContextIds);
134506c3fb27SDimitry Andric 
134606c3fb27SDimitry Andric     // Connect to callers of outermost stack frame in inlined call chain.
134706c3fb27SDimitry Andric     // This updates context ids for FirstNode's caller's to reflect those
134806c3fb27SDimitry Andric     // moved to NewNode.
1349*0fca6ea1SDimitry Andric     connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false, SavedContextIds);
135006c3fb27SDimitry Andric 
135106c3fb27SDimitry Andric     // Now we need to remove context ids from edges/nodes between First and
135206c3fb27SDimitry Andric     // Last Node.
135306c3fb27SDimitry Andric     PrevNode = nullptr;
135406c3fb27SDimitry Andric     for (auto Id : Ids) {
135506c3fb27SDimitry Andric       ContextNode *CurNode = getNodeForStackId(Id);
135606c3fb27SDimitry Andric       // We should only have kept stack ids that had nodes.
135706c3fb27SDimitry Andric       assert(CurNode);
135806c3fb27SDimitry Andric 
135906c3fb27SDimitry Andric       // Remove the context ids moved to NewNode from CurNode, and the
136006c3fb27SDimitry Andric       // edge from the prior node.
136106c3fb27SDimitry Andric       if (PrevNode) {
136206c3fb27SDimitry Andric         auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
136306c3fb27SDimitry Andric         assert(PrevEdge);
1364*0fca6ea1SDimitry Andric         set_subtract(PrevEdge->getContextIds(), SavedContextIds);
136506c3fb27SDimitry Andric         if (PrevEdge->getContextIds().empty()) {
136606c3fb27SDimitry Andric           PrevNode->eraseCallerEdge(PrevEdge);
136706c3fb27SDimitry Andric           CurNode->eraseCalleeEdge(PrevEdge);
136806c3fb27SDimitry Andric         }
136906c3fb27SDimitry Andric       }
1370*0fca6ea1SDimitry Andric       // Since we update the edges from leaf to tail, only look at the callee
1371*0fca6ea1SDimitry Andric       // edges. This isn't an alloc node, so if there are no callee edges, the
1372*0fca6ea1SDimitry Andric       // alloc type is None.
1373*0fca6ea1SDimitry Andric       CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1374*0fca6ea1SDimitry Andric                                 ? (uint8_t)AllocationType::None
1375*0fca6ea1SDimitry Andric                                 : CurNode->computeAllocType();
137606c3fb27SDimitry Andric       PrevNode = CurNode;
137706c3fb27SDimitry Andric     }
1378*0fca6ea1SDimitry Andric     if (VerifyNodes) {
1379*0fca6ea1SDimitry Andric       checkNode<DerivedCCG, FuncTy, CallTy>(NewNode, /*CheckEdges=*/true);
1380*0fca6ea1SDimitry Andric       for (auto Id : Ids) {
1381*0fca6ea1SDimitry Andric         ContextNode *CurNode = getNodeForStackId(Id);
1382*0fca6ea1SDimitry Andric         // We should only have kept stack ids that had nodes.
1383*0fca6ea1SDimitry Andric         assert(CurNode);
1384*0fca6ea1SDimitry Andric         checkNode<DerivedCCG, FuncTy, CallTy>(CurNode, /*CheckEdges=*/true);
1385*0fca6ea1SDimitry Andric       }
1386*0fca6ea1SDimitry Andric     }
138706c3fb27SDimitry Andric   }
138806c3fb27SDimitry Andric }
138906c3fb27SDimitry Andric 
139006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
139106c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
139206c3fb27SDimitry Andric   // Map of stack id to all calls with that as the last (outermost caller)
139306c3fb27SDimitry Andric   // callsite id that has a context node (some might not due to pruning
139406c3fb27SDimitry Andric   // performed during matching of the allocation profile contexts).
139506c3fb27SDimitry Andric   // The CallContextInfo contains the Call and a list of its stack ids with
139606c3fb27SDimitry Andric   // ContextNodes, the function containing Call, and the set of context ids
139706c3fb27SDimitry Andric   // the analysis will eventually identify for use in any new node created
139806c3fb27SDimitry Andric   // for that callsite.
139906c3fb27SDimitry Andric   DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
140006c3fb27SDimitry Andric   for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
140106c3fb27SDimitry Andric     for (auto &Call : CallsWithMetadata) {
140206c3fb27SDimitry Andric       // Ignore allocations, already handled.
140306c3fb27SDimitry Andric       if (AllocationCallToContextNodeMap.count(Call))
140406c3fb27SDimitry Andric         continue;
140506c3fb27SDimitry Andric       auto StackIdsWithContextNodes =
140606c3fb27SDimitry Andric           getStackIdsWithContextNodesForCall(Call.call());
140706c3fb27SDimitry Andric       // If there were no nodes created for MIBs on allocs (maybe this was in
140806c3fb27SDimitry Andric       // the unambiguous part of the MIB stack that was pruned), ignore.
140906c3fb27SDimitry Andric       if (StackIdsWithContextNodes.empty())
141006c3fb27SDimitry Andric         continue;
141106c3fb27SDimitry Andric       // Otherwise, record this Call along with the list of ids for the last
141206c3fb27SDimitry Andric       // (outermost caller) stack id with a node.
141306c3fb27SDimitry Andric       StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
141406c3fb27SDimitry Andric           {Call.call(), StackIdsWithContextNodes, Func, {}});
141506c3fb27SDimitry Andric     }
141606c3fb27SDimitry Andric   }
141706c3fb27SDimitry Andric 
141806c3fb27SDimitry Andric   // First make a pass through all stack ids that correspond to a call,
141906c3fb27SDimitry Andric   // as identified in the above loop. Compute the context ids corresponding to
142006c3fb27SDimitry Andric   // each of these calls when they correspond to multiple stack ids due to
142106c3fb27SDimitry Andric   // due to inlining. Perform any duplication of context ids required when
142206c3fb27SDimitry Andric   // there is more than one call with the same stack ids. Their (possibly newly
142306c3fb27SDimitry Andric   // duplicated) context ids are saved in the StackIdToMatchingCalls map.
142406c3fb27SDimitry Andric   DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
142506c3fb27SDimitry Andric   for (auto &It : StackIdToMatchingCalls) {
142606c3fb27SDimitry Andric     auto &Calls = It.getSecond();
142706c3fb27SDimitry Andric     // Skip single calls with a single stack id. These don't need a new node.
142806c3fb27SDimitry Andric     if (Calls.size() == 1) {
142906c3fb27SDimitry Andric       auto &Ids = std::get<1>(Calls[0]);
143006c3fb27SDimitry Andric       if (Ids.size() == 1)
143106c3fb27SDimitry Andric         continue;
143206c3fb27SDimitry Andric     }
143306c3fb27SDimitry Andric     // In order to do the best and maximal matching of inlined calls to context
143406c3fb27SDimitry Andric     // node sequences we will sort the vectors of stack ids in descending order
143506c3fb27SDimitry Andric     // of length, and within each length, lexicographically by stack id. The
143606c3fb27SDimitry Andric     // latter is so that we can specially handle calls that have identical stack
143706c3fb27SDimitry Andric     // id sequences (either due to cloning or artificially because of the MIB
143806c3fb27SDimitry Andric     // context pruning).
143906c3fb27SDimitry Andric     std::stable_sort(Calls.begin(), Calls.end(),
144006c3fb27SDimitry Andric                      [](const CallContextInfo &A, const CallContextInfo &B) {
144106c3fb27SDimitry Andric                        auto &IdsA = std::get<1>(A);
144206c3fb27SDimitry Andric                        auto &IdsB = std::get<1>(B);
144306c3fb27SDimitry Andric                        return IdsA.size() > IdsB.size() ||
144406c3fb27SDimitry Andric                               (IdsA.size() == IdsB.size() && IdsA < IdsB);
144506c3fb27SDimitry Andric                      });
144606c3fb27SDimitry Andric 
144706c3fb27SDimitry Andric     // Find the node for the last stack id, which should be the same
144806c3fb27SDimitry Andric     // across all calls recorded for this id, and is the id for this
144906c3fb27SDimitry Andric     // entry in the StackIdToMatchingCalls map.
145006c3fb27SDimitry Andric     uint64_t LastId = It.getFirst();
145106c3fb27SDimitry Andric     ContextNode *LastNode = getNodeForStackId(LastId);
145206c3fb27SDimitry Andric     // We should only have kept stack ids that had nodes.
145306c3fb27SDimitry Andric     assert(LastNode);
145406c3fb27SDimitry Andric 
145506c3fb27SDimitry Andric     if (LastNode->Recursive)
145606c3fb27SDimitry Andric       continue;
145706c3fb27SDimitry Andric 
145806c3fb27SDimitry Andric     // Initialize the context ids with the last node's. We will subsequently
145906c3fb27SDimitry Andric     // refine the context ids by computing the intersection along all edges.
1460*0fca6ea1SDimitry Andric     DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
146106c3fb27SDimitry Andric     assert(!LastNodeContextIds.empty());
146206c3fb27SDimitry Andric 
146306c3fb27SDimitry Andric     for (unsigned I = 0; I < Calls.size(); I++) {
146406c3fb27SDimitry Andric       auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
146506c3fb27SDimitry Andric       assert(SavedContextIds.empty());
146606c3fb27SDimitry Andric       assert(LastId == Ids.back());
146706c3fb27SDimitry Andric 
146806c3fb27SDimitry Andric       // First compute the context ids for this stack id sequence (the
146906c3fb27SDimitry Andric       // intersection of the context ids of the corresponding nodes).
147006c3fb27SDimitry Andric       // Start with the remaining saved ids for the last node.
147106c3fb27SDimitry Andric       assert(!LastNodeContextIds.empty());
147206c3fb27SDimitry Andric       DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
147306c3fb27SDimitry Andric 
147406c3fb27SDimitry Andric       ContextNode *PrevNode = LastNode;
147506c3fb27SDimitry Andric       ContextNode *CurNode = LastNode;
147606c3fb27SDimitry Andric       bool Skip = false;
147706c3fb27SDimitry Andric 
147806c3fb27SDimitry Andric       // Iterate backwards through the stack Ids, starting after the last Id
147906c3fb27SDimitry Andric       // in the list, which was handled once outside for all Calls.
148006c3fb27SDimitry Andric       for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
148106c3fb27SDimitry Andric         auto Id = *IdIter;
148206c3fb27SDimitry Andric         CurNode = getNodeForStackId(Id);
148306c3fb27SDimitry Andric         // We should only have kept stack ids that had nodes.
148406c3fb27SDimitry Andric         assert(CurNode);
148506c3fb27SDimitry Andric 
148606c3fb27SDimitry Andric         if (CurNode->Recursive) {
148706c3fb27SDimitry Andric           Skip = true;
148806c3fb27SDimitry Andric           break;
148906c3fb27SDimitry Andric         }
149006c3fb27SDimitry Andric 
149106c3fb27SDimitry Andric         auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
149206c3fb27SDimitry Andric         // If there is no edge then the nodes belong to different MIB contexts,
149306c3fb27SDimitry Andric         // and we should skip this inlined context sequence. For example, this
149406c3fb27SDimitry Andric         // particular inlined context may include stack ids A->B, and we may
149506c3fb27SDimitry Andric         // indeed have nodes for both A and B, but it is possible that they were
149606c3fb27SDimitry Andric         // never profiled in sequence in a single MIB for any allocation (i.e.
149706c3fb27SDimitry Andric         // we might have profiled an allocation that involves the callsite A,
149806c3fb27SDimitry Andric         // but through a different one of its callee callsites, and we might
149906c3fb27SDimitry Andric         // have profiled an allocation that involves callsite B, but reached
150006c3fb27SDimitry Andric         // from a different caller callsite).
150106c3fb27SDimitry Andric         if (!Edge) {
150206c3fb27SDimitry Andric           Skip = true;
150306c3fb27SDimitry Andric           break;
150406c3fb27SDimitry Andric         }
150506c3fb27SDimitry Andric         PrevNode = CurNode;
150606c3fb27SDimitry Andric 
150706c3fb27SDimitry Andric         // Update the context ids, which is the intersection of the ids along
150806c3fb27SDimitry Andric         // all edges in the sequence.
150906c3fb27SDimitry Andric         set_intersect(StackSequenceContextIds, Edge->getContextIds());
151006c3fb27SDimitry Andric 
151106c3fb27SDimitry Andric         // If we now have no context ids for clone, skip this call.
151206c3fb27SDimitry Andric         if (StackSequenceContextIds.empty()) {
151306c3fb27SDimitry Andric           Skip = true;
151406c3fb27SDimitry Andric           break;
151506c3fb27SDimitry Andric         }
151606c3fb27SDimitry Andric       }
151706c3fb27SDimitry Andric       if (Skip)
151806c3fb27SDimitry Andric         continue;
151906c3fb27SDimitry Andric 
152006c3fb27SDimitry Andric       // If some of this call's stack ids did not have corresponding nodes (due
152106c3fb27SDimitry Andric       // to pruning), don't include any context ids for contexts that extend
152206c3fb27SDimitry Andric       // beyond these nodes. Otherwise we would be matching part of unrelated /
152306c3fb27SDimitry Andric       // not fully matching stack contexts. To do this, subtract any context ids
152406c3fb27SDimitry Andric       // found in caller nodes of the last node found above.
152506c3fb27SDimitry Andric       if (Ids.back() != getLastStackId(Call)) {
1526*0fca6ea1SDimitry Andric         for (const auto &PE : LastNode->CallerEdges) {
152706c3fb27SDimitry Andric           set_subtract(StackSequenceContextIds, PE->getContextIds());
152806c3fb27SDimitry Andric           if (StackSequenceContextIds.empty())
152906c3fb27SDimitry Andric             break;
153006c3fb27SDimitry Andric         }
153106c3fb27SDimitry Andric         // If we now have no context ids for clone, skip this call.
153206c3fb27SDimitry Andric         if (StackSequenceContextIds.empty())
153306c3fb27SDimitry Andric           continue;
153406c3fb27SDimitry Andric       }
153506c3fb27SDimitry Andric 
153606c3fb27SDimitry Andric       // Check if the next set of stack ids is the same (since the Calls vector
153706c3fb27SDimitry Andric       // of tuples is sorted by the stack ids we can just look at the next one).
153806c3fb27SDimitry Andric       bool DuplicateContextIds = false;
153906c3fb27SDimitry Andric       if (I + 1 < Calls.size()) {
154006c3fb27SDimitry Andric         auto NextIds = std::get<1>(Calls[I + 1]);
154106c3fb27SDimitry Andric         DuplicateContextIds = Ids == NextIds;
154206c3fb27SDimitry Andric       }
154306c3fb27SDimitry Andric 
154406c3fb27SDimitry Andric       // If we don't have duplicate context ids, then we can assign all the
154506c3fb27SDimitry Andric       // context ids computed for the original node sequence to this call.
154606c3fb27SDimitry Andric       // If there are duplicate calls with the same stack ids then we synthesize
154706c3fb27SDimitry Andric       // new context ids that are duplicates of the originals. These are
154806c3fb27SDimitry Andric       // assigned to SavedContextIds, which is a reference into the map entry
154906c3fb27SDimitry Andric       // for this call, allowing us to access these ids later on.
155006c3fb27SDimitry Andric       OldToNewContextIds.reserve(OldToNewContextIds.size() +
155106c3fb27SDimitry Andric                                  StackSequenceContextIds.size());
155206c3fb27SDimitry Andric       SavedContextIds =
155306c3fb27SDimitry Andric           DuplicateContextIds
155406c3fb27SDimitry Andric               ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
155506c3fb27SDimitry Andric               : StackSequenceContextIds;
155606c3fb27SDimitry Andric       assert(!SavedContextIds.empty());
155706c3fb27SDimitry Andric 
155806c3fb27SDimitry Andric       if (!DuplicateContextIds) {
155906c3fb27SDimitry Andric         // Update saved last node's context ids to remove those that are
156006c3fb27SDimitry Andric         // assigned to other calls, so that it is ready for the next call at
156106c3fb27SDimitry Andric         // this stack id.
156206c3fb27SDimitry Andric         set_subtract(LastNodeContextIds, StackSequenceContextIds);
156306c3fb27SDimitry Andric         if (LastNodeContextIds.empty())
156406c3fb27SDimitry Andric           break;
156506c3fb27SDimitry Andric       }
156606c3fb27SDimitry Andric     }
156706c3fb27SDimitry Andric   }
156806c3fb27SDimitry Andric 
156906c3fb27SDimitry Andric   // Propagate the duplicate context ids over the graph.
157006c3fb27SDimitry Andric   propagateDuplicateContextIds(OldToNewContextIds);
157106c3fb27SDimitry Andric 
157206c3fb27SDimitry Andric   if (VerifyCCG)
157306c3fb27SDimitry Andric     check();
157406c3fb27SDimitry Andric 
157506c3fb27SDimitry Andric   // Now perform a post-order traversal over the graph, starting with the
157606c3fb27SDimitry Andric   // allocation nodes, essentially processing nodes from callers to callees.
157706c3fb27SDimitry Andric   // For any that contains an id in the map, update the graph to contain new
157806c3fb27SDimitry Andric   // nodes representing any inlining at interior callsites. Note we move the
157906c3fb27SDimitry Andric   // associated context ids over to the new nodes.
158006c3fb27SDimitry Andric   DenseSet<const ContextNode *> Visited;
158106c3fb27SDimitry Andric   for (auto &Entry : AllocationCallToContextNodeMap)
158206c3fb27SDimitry Andric     assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls);
1583*0fca6ea1SDimitry Andric   if (VerifyCCG)
1584*0fca6ea1SDimitry Andric     check();
158506c3fb27SDimitry Andric }
158606c3fb27SDimitry Andric 
158706c3fb27SDimitry Andric uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) {
158806c3fb27SDimitry Andric   CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
158906c3fb27SDimitry Andric       Call->getMetadata(LLVMContext::MD_callsite));
159006c3fb27SDimitry Andric   return CallsiteContext.back();
159106c3fb27SDimitry Andric }
159206c3fb27SDimitry Andric 
159306c3fb27SDimitry Andric uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {
159406c3fb27SDimitry Andric   assert(isa<CallsiteInfo *>(Call.getBase()));
159506c3fb27SDimitry Andric   CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
159606c3fb27SDimitry Andric       CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase()));
159706c3fb27SDimitry Andric   // Need to convert index into stack id.
159806c3fb27SDimitry Andric   return Index.getStackIdAtIndex(CallsiteContext.back());
159906c3fb27SDimitry Andric }
160006c3fb27SDimitry Andric 
160106c3fb27SDimitry Andric static const std::string MemProfCloneSuffix = ".memprof.";
160206c3fb27SDimitry Andric 
160306c3fb27SDimitry Andric static std::string getMemProfFuncName(Twine Base, unsigned CloneNo) {
160406c3fb27SDimitry Andric   // We use CloneNo == 0 to refer to the original version, which doesn't get
160506c3fb27SDimitry Andric   // renamed with a suffix.
160606c3fb27SDimitry Andric   if (!CloneNo)
160706c3fb27SDimitry Andric     return Base.str();
160806c3fb27SDimitry Andric   return (Base + MemProfCloneSuffix + Twine(CloneNo)).str();
160906c3fb27SDimitry Andric }
161006c3fb27SDimitry Andric 
161106c3fb27SDimitry Andric std::string ModuleCallsiteContextGraph::getLabel(const Function *Func,
161206c3fb27SDimitry Andric                                                  const Instruction *Call,
161306c3fb27SDimitry Andric                                                  unsigned CloneNo) const {
161406c3fb27SDimitry Andric   return (Twine(Call->getFunction()->getName()) + " -> " +
161506c3fb27SDimitry Andric           cast<CallBase>(Call)->getCalledFunction()->getName())
161606c3fb27SDimitry Andric       .str();
161706c3fb27SDimitry Andric }
161806c3fb27SDimitry Andric 
161906c3fb27SDimitry Andric std::string IndexCallsiteContextGraph::getLabel(const FunctionSummary *Func,
162006c3fb27SDimitry Andric                                                 const IndexCall &Call,
162106c3fb27SDimitry Andric                                                 unsigned CloneNo) const {
162206c3fb27SDimitry Andric   auto VI = FSToVIMap.find(Func);
162306c3fb27SDimitry Andric   assert(VI != FSToVIMap.end());
162406c3fb27SDimitry Andric   if (isa<AllocInfo *>(Call.getBase()))
162506c3fb27SDimitry Andric     return (VI->second.name() + " -> alloc").str();
162606c3fb27SDimitry Andric   else {
162706c3fb27SDimitry Andric     auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(Call.getBase());
162806c3fb27SDimitry Andric     return (VI->second.name() + " -> " +
162906c3fb27SDimitry Andric             getMemProfFuncName(Callsite->Callee.name(),
163006c3fb27SDimitry Andric                                Callsite->Clones[CloneNo]))
163106c3fb27SDimitry Andric         .str();
163206c3fb27SDimitry Andric   }
163306c3fb27SDimitry Andric }
163406c3fb27SDimitry Andric 
163506c3fb27SDimitry Andric std::vector<uint64_t>
163606c3fb27SDimitry Andric ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
163706c3fb27SDimitry Andric     Instruction *Call) {
163806c3fb27SDimitry Andric   CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
163906c3fb27SDimitry Andric       Call->getMetadata(LLVMContext::MD_callsite));
164006c3fb27SDimitry Andric   return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
164106c3fb27SDimitry Andric       CallsiteContext);
164206c3fb27SDimitry Andric }
164306c3fb27SDimitry Andric 
164406c3fb27SDimitry Andric std::vector<uint64_t>
164506c3fb27SDimitry Andric IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) {
164606c3fb27SDimitry Andric   assert(isa<CallsiteInfo *>(Call.getBase()));
164706c3fb27SDimitry Andric   CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
164806c3fb27SDimitry Andric       CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase()));
164906c3fb27SDimitry Andric   return getStackIdsWithContextNodes<CallsiteInfo,
165006c3fb27SDimitry Andric                                      SmallVector<unsigned>::const_iterator>(
165106c3fb27SDimitry Andric       CallsiteContext);
165206c3fb27SDimitry Andric }
165306c3fb27SDimitry Andric 
165406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
165506c3fb27SDimitry Andric template <class NodeT, class IteratorT>
165606c3fb27SDimitry Andric std::vector<uint64_t>
165706c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
165806c3fb27SDimitry Andric     CallStack<NodeT, IteratorT> &CallsiteContext) {
165906c3fb27SDimitry Andric   std::vector<uint64_t> StackIds;
166006c3fb27SDimitry Andric   for (auto IdOrIndex : CallsiteContext) {
166106c3fb27SDimitry Andric     auto StackId = getStackId(IdOrIndex);
166206c3fb27SDimitry Andric     ContextNode *Node = getNodeForStackId(StackId);
166306c3fb27SDimitry Andric     if (!Node)
166406c3fb27SDimitry Andric       break;
166506c3fb27SDimitry Andric     StackIds.push_back(StackId);
166606c3fb27SDimitry Andric   }
166706c3fb27SDimitry Andric   return StackIds;
166806c3fb27SDimitry Andric }
166906c3fb27SDimitry Andric 
167006c3fb27SDimitry Andric ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
16717a6dacacSDimitry Andric     Module &M,
16727a6dacacSDimitry Andric     llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
167306c3fb27SDimitry Andric     : Mod(M), OREGetter(OREGetter) {
167406c3fb27SDimitry Andric   for (auto &F : M) {
167506c3fb27SDimitry Andric     std::vector<CallInfo> CallsWithMetadata;
167606c3fb27SDimitry Andric     for (auto &BB : F) {
167706c3fb27SDimitry Andric       for (auto &I : BB) {
167806c3fb27SDimitry Andric         if (!isa<CallBase>(I))
167906c3fb27SDimitry Andric           continue;
168006c3fb27SDimitry Andric         if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) {
168106c3fb27SDimitry Andric           CallsWithMetadata.push_back(&I);
168206c3fb27SDimitry Andric           auto *AllocNode = addAllocNode(&I, &F);
168306c3fb27SDimitry Andric           auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite);
168406c3fb27SDimitry Andric           assert(CallsiteMD);
168506c3fb27SDimitry Andric           CallStack<MDNode, MDNode::op_iterator> CallsiteContext(CallsiteMD);
168606c3fb27SDimitry Andric           // Add all of the MIBs and their stack nodes.
168706c3fb27SDimitry Andric           for (auto &MDOp : MemProfMD->operands()) {
168806c3fb27SDimitry Andric             auto *MIBMD = cast<const MDNode>(MDOp);
168906c3fb27SDimitry Andric             MDNode *StackNode = getMIBStackNode(MIBMD);
169006c3fb27SDimitry Andric             assert(StackNode);
169106c3fb27SDimitry Andric             CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode);
169206c3fb27SDimitry Andric             addStackNodesForMIB<MDNode, MDNode::op_iterator>(
169306c3fb27SDimitry Andric                 AllocNode, StackContext, CallsiteContext,
1694*0fca6ea1SDimitry Andric                 getMIBAllocType(MIBMD), getMIBTotalSize(MIBMD));
169506c3fb27SDimitry Andric           }
169606c3fb27SDimitry Andric           assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
169706c3fb27SDimitry Andric           // Memprof and callsite metadata on memory allocations no longer
169806c3fb27SDimitry Andric           // needed.
169906c3fb27SDimitry Andric           I.setMetadata(LLVMContext::MD_memprof, nullptr);
170006c3fb27SDimitry Andric           I.setMetadata(LLVMContext::MD_callsite, nullptr);
170106c3fb27SDimitry Andric         }
170206c3fb27SDimitry Andric         // For callsite metadata, add to list for this function for later use.
170306c3fb27SDimitry Andric         else if (I.getMetadata(LLVMContext::MD_callsite))
170406c3fb27SDimitry Andric           CallsWithMetadata.push_back(&I);
170506c3fb27SDimitry Andric       }
170606c3fb27SDimitry Andric     }
170706c3fb27SDimitry Andric     if (!CallsWithMetadata.empty())
1708297eecfbSDimitry Andric       FuncToCallsWithMetadata[&F] = CallsWithMetadata;
170906c3fb27SDimitry Andric   }
171006c3fb27SDimitry Andric 
171106c3fb27SDimitry Andric   if (DumpCCG) {
171206c3fb27SDimitry Andric     dbgs() << "CCG before updating call stack chains:\n";
171306c3fb27SDimitry Andric     dbgs() << *this;
171406c3fb27SDimitry Andric   }
171506c3fb27SDimitry Andric 
171606c3fb27SDimitry Andric   if (ExportToDot)
171706c3fb27SDimitry Andric     exportToDot("prestackupdate");
171806c3fb27SDimitry Andric 
171906c3fb27SDimitry Andric   updateStackNodes();
172006c3fb27SDimitry Andric 
172106c3fb27SDimitry Andric   handleCallsitesWithMultipleTargets();
172206c3fb27SDimitry Andric 
172306c3fb27SDimitry Andric   // Strip off remaining callsite metadata, no longer needed.
172406c3fb27SDimitry Andric   for (auto &FuncEntry : FuncToCallsWithMetadata)
172506c3fb27SDimitry Andric     for (auto &Call : FuncEntry.second)
172606c3fb27SDimitry Andric       Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr);
172706c3fb27SDimitry Andric }
172806c3fb27SDimitry Andric 
172906c3fb27SDimitry Andric IndexCallsiteContextGraph::IndexCallsiteContextGraph(
173006c3fb27SDimitry Andric     ModuleSummaryIndex &Index,
17317a6dacacSDimitry Andric     llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
173206c3fb27SDimitry Andric         isPrevailing)
1733297eecfbSDimitry Andric     : Index(Index), isPrevailing(isPrevailing) {
173406c3fb27SDimitry Andric   for (auto &I : Index) {
173506c3fb27SDimitry Andric     auto VI = Index.getValueInfo(I);
173606c3fb27SDimitry Andric     for (auto &S : VI.getSummaryList()) {
173706c3fb27SDimitry Andric       // We should only add the prevailing nodes. Otherwise we may try to clone
173806c3fb27SDimitry Andric       // in a weak copy that won't be linked (and may be different than the
173906c3fb27SDimitry Andric       // prevailing version).
174006c3fb27SDimitry Andric       // We only keep the memprof summary on the prevailing copy now when
174106c3fb27SDimitry Andric       // building the combined index, as a space optimization, however don't
174206c3fb27SDimitry Andric       // rely on this optimization. The linker doesn't resolve local linkage
174306c3fb27SDimitry Andric       // values so don't check whether those are prevailing.
174406c3fb27SDimitry Andric       if (!GlobalValue::isLocalLinkage(S->linkage()) &&
174506c3fb27SDimitry Andric           !isPrevailing(VI.getGUID(), S.get()))
174606c3fb27SDimitry Andric         continue;
174706c3fb27SDimitry Andric       auto *FS = dyn_cast<FunctionSummary>(S.get());
174806c3fb27SDimitry Andric       if (!FS)
174906c3fb27SDimitry Andric         continue;
175006c3fb27SDimitry Andric       std::vector<CallInfo> CallsWithMetadata;
175106c3fb27SDimitry Andric       if (!FS->allocs().empty()) {
175206c3fb27SDimitry Andric         for (auto &AN : FS->mutableAllocs()) {
175306c3fb27SDimitry Andric           // This can happen because of recursion elimination handling that
175406c3fb27SDimitry Andric           // currently exists in ModuleSummaryAnalysis. Skip these for now.
175506c3fb27SDimitry Andric           // We still added them to the summary because we need to be able to
175606c3fb27SDimitry Andric           // correlate properly in applyImport in the backends.
175706c3fb27SDimitry Andric           if (AN.MIBs.empty())
175806c3fb27SDimitry Andric             continue;
175906c3fb27SDimitry Andric           CallsWithMetadata.push_back({&AN});
176006c3fb27SDimitry Andric           auto *AllocNode = addAllocNode({&AN}, FS);
176106c3fb27SDimitry Andric           // Pass an empty CallStack to the CallsiteContext (second)
176206c3fb27SDimitry Andric           // parameter, since for ThinLTO we already collapsed out the inlined
176306c3fb27SDimitry Andric           // stack ids on the allocation call during ModuleSummaryAnalysis.
176406c3fb27SDimitry Andric           CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
176506c3fb27SDimitry Andric               EmptyContext;
1766*0fca6ea1SDimitry Andric           unsigned I = 0;
1767*0fca6ea1SDimitry Andric           assert(!MemProfReportHintedSizes ||
1768*0fca6ea1SDimitry Andric                  AN.TotalSizes.size() == AN.MIBs.size());
176906c3fb27SDimitry Andric           // Now add all of the MIBs and their stack nodes.
177006c3fb27SDimitry Andric           for (auto &MIB : AN.MIBs) {
177106c3fb27SDimitry Andric             CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
177206c3fb27SDimitry Andric                 StackContext(&MIB);
1773*0fca6ea1SDimitry Andric             uint64_t TotalSize = 0;
1774*0fca6ea1SDimitry Andric             if (MemProfReportHintedSizes)
1775*0fca6ea1SDimitry Andric               TotalSize = AN.TotalSizes[I];
177606c3fb27SDimitry Andric             addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
1777*0fca6ea1SDimitry Andric                 AllocNode, StackContext, EmptyContext, MIB.AllocType,
1778*0fca6ea1SDimitry Andric                 TotalSize);
1779*0fca6ea1SDimitry Andric             I++;
178006c3fb27SDimitry Andric           }
178106c3fb27SDimitry Andric           assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
178206c3fb27SDimitry Andric           // Initialize version 0 on the summary alloc node to the current alloc
178306c3fb27SDimitry Andric           // type, unless it has both types in which case make it default, so
178406c3fb27SDimitry Andric           // that in the case where we aren't able to clone the original version
178506c3fb27SDimitry Andric           // always ends up with the default allocation behavior.
178606c3fb27SDimitry Andric           AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes);
178706c3fb27SDimitry Andric         }
178806c3fb27SDimitry Andric       }
178906c3fb27SDimitry Andric       // For callsite metadata, add to list for this function for later use.
179006c3fb27SDimitry Andric       if (!FS->callsites().empty())
179106c3fb27SDimitry Andric         for (auto &SN : FS->mutableCallsites())
179206c3fb27SDimitry Andric           CallsWithMetadata.push_back({&SN});
179306c3fb27SDimitry Andric 
179406c3fb27SDimitry Andric       if (!CallsWithMetadata.empty())
1795297eecfbSDimitry Andric         FuncToCallsWithMetadata[FS] = CallsWithMetadata;
179606c3fb27SDimitry Andric 
179706c3fb27SDimitry Andric       if (!FS->allocs().empty() || !FS->callsites().empty())
179806c3fb27SDimitry Andric         FSToVIMap[FS] = VI;
179906c3fb27SDimitry Andric     }
180006c3fb27SDimitry Andric   }
180106c3fb27SDimitry Andric 
180206c3fb27SDimitry Andric   if (DumpCCG) {
180306c3fb27SDimitry Andric     dbgs() << "CCG before updating call stack chains:\n";
180406c3fb27SDimitry Andric     dbgs() << *this;
180506c3fb27SDimitry Andric   }
180606c3fb27SDimitry Andric 
180706c3fb27SDimitry Andric   if (ExportToDot)
180806c3fb27SDimitry Andric     exportToDot("prestackupdate");
180906c3fb27SDimitry Andric 
181006c3fb27SDimitry Andric   updateStackNodes();
181106c3fb27SDimitry Andric 
181206c3fb27SDimitry Andric   handleCallsitesWithMultipleTargets();
181306c3fb27SDimitry Andric }
181406c3fb27SDimitry Andric 
181506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
181606c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy,
181706c3fb27SDimitry Andric                           CallTy>::handleCallsitesWithMultipleTargets() {
181806c3fb27SDimitry Andric   // Look for and workaround callsites that call multiple functions.
181906c3fb27SDimitry Andric   // This can happen for indirect calls, which needs better handling, and in
182006c3fb27SDimitry Andric   // more rare cases (e.g. macro expansion).
182106c3fb27SDimitry Andric   // TODO: To fix this for indirect calls we will want to perform speculative
182206c3fb27SDimitry Andric   // devirtualization using either the normal PGO info with ICP, or using the
182306c3fb27SDimitry Andric   // information in the profiled MemProf contexts. We can do this prior to
182406c3fb27SDimitry Andric   // this transformation for regular LTO, and for ThinLTO we can simulate that
182506c3fb27SDimitry Andric   // effect in the summary and perform the actual speculative devirtualization
182606c3fb27SDimitry Andric   // while cloning in the ThinLTO backend.
1827297eecfbSDimitry Andric 
1828297eecfbSDimitry Andric   // Keep track of the new nodes synthesized for discovered tail calls missing
1829297eecfbSDimitry Andric   // from the profiled contexts.
1830297eecfbSDimitry Andric   MapVector<CallInfo, ContextNode *> TailCallToContextNodeMap;
1831297eecfbSDimitry Andric 
1832*0fca6ea1SDimitry Andric   for (auto &Entry : NonAllocationCallToContextNodeMap) {
1833*0fca6ea1SDimitry Andric     auto *Node = Entry.second;
183406c3fb27SDimitry Andric     assert(Node->Clones.empty());
183506c3fb27SDimitry Andric     // Check all node callees and see if in the same function.
183606c3fb27SDimitry Andric     auto Call = Node->Call.call();
1837*0fca6ea1SDimitry Andric     for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();
1838*0fca6ea1SDimitry Andric          ++EI) {
1839297eecfbSDimitry Andric       auto Edge = *EI;
1840*0fca6ea1SDimitry Andric       if (!Edge->Callee->hasCall())
184106c3fb27SDimitry Andric         continue;
184206c3fb27SDimitry Andric       assert(NodeToCallingFunc.count(Edge->Callee));
184306c3fb27SDimitry Andric       // Check if the called function matches that of the callee node.
1844297eecfbSDimitry Andric       if (calleesMatch(Call, EI, TailCallToContextNodeMap))
184506c3fb27SDimitry Andric         continue;
1846297eecfbSDimitry Andric       RemovedEdgesWithMismatchedCallees++;
184706c3fb27SDimitry Andric       // Work around by setting Node to have a null call, so it gets
184806c3fb27SDimitry Andric       // skipped during cloning. Otherwise assignFunctions will assert
184906c3fb27SDimitry Andric       // because its data structures are not designed to handle this case.
185006c3fb27SDimitry Andric       Node->setCall(CallInfo());
185106c3fb27SDimitry Andric       break;
185206c3fb27SDimitry Andric     }
185306c3fb27SDimitry Andric   }
1854297eecfbSDimitry Andric 
1855*0fca6ea1SDimitry Andric   // Remove all mismatched nodes identified in the above loop from the node map
1856*0fca6ea1SDimitry Andric   // (checking whether they have a null call which is set above). For a
1857*0fca6ea1SDimitry Andric   // MapVector like NonAllocationCallToContextNodeMap it is much more efficient
1858*0fca6ea1SDimitry Andric   // to do the removal via remove_if than by individually erasing entries above.
1859*0fca6ea1SDimitry Andric   NonAllocationCallToContextNodeMap.remove_if(
1860*0fca6ea1SDimitry Andric       [](const auto &it) { return !it.second->hasCall(); });
1861*0fca6ea1SDimitry Andric 
1862297eecfbSDimitry Andric   // Add the new nodes after the above loop so that the iteration is not
1863297eecfbSDimitry Andric   // invalidated.
1864297eecfbSDimitry Andric   for (auto &[Call, Node] : TailCallToContextNodeMap)
1865297eecfbSDimitry Andric     NonAllocationCallToContextNodeMap[Call] = Node;
186606c3fb27SDimitry Andric }
186706c3fb27SDimitry Andric 
186806c3fb27SDimitry Andric uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {
186906c3fb27SDimitry Andric   // In the Module (IR) case this is already the Id.
187006c3fb27SDimitry Andric   return IdOrIndex;
187106c3fb27SDimitry Andric }
187206c3fb27SDimitry Andric 
187306c3fb27SDimitry Andric uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {
187406c3fb27SDimitry Andric   // In the Index case this is an index into the stack id list in the summary
187506c3fb27SDimitry Andric   // index, convert it to an Id.
187606c3fb27SDimitry Andric   return Index.getStackIdAtIndex(IdOrIndex);
187706c3fb27SDimitry Andric }
187806c3fb27SDimitry Andric 
1879297eecfbSDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
1880297eecfbSDimitry Andric bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
1881297eecfbSDimitry Andric     CallTy Call, EdgeIter &EI,
1882297eecfbSDimitry Andric     MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
1883297eecfbSDimitry Andric   auto Edge = *EI;
1884297eecfbSDimitry Andric   const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
1885297eecfbSDimitry Andric   const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller];
1886297eecfbSDimitry Andric   // Will be populated in order of callee to caller if we find a chain of tail
1887297eecfbSDimitry Andric   // calls between the profiled caller and callee.
1888297eecfbSDimitry Andric   std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
1889297eecfbSDimitry Andric   if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc,
1890*0fca6ea1SDimitry Andric                          FoundCalleeChain))
1891297eecfbSDimitry Andric     return false;
1892297eecfbSDimitry Andric 
1893297eecfbSDimitry Andric   // The usual case where the profiled callee matches that of the IR/summary.
1894*0fca6ea1SDimitry Andric   if (FoundCalleeChain.empty())
1895297eecfbSDimitry Andric     return true;
1896297eecfbSDimitry Andric 
1897297eecfbSDimitry Andric   auto AddEdge = [Edge, &EI](ContextNode *Caller, ContextNode *Callee) {
1898297eecfbSDimitry Andric     auto *CurEdge = Callee->findEdgeFromCaller(Caller);
1899297eecfbSDimitry Andric     // If there is already an edge between these nodes, simply update it and
1900297eecfbSDimitry Andric     // return.
1901297eecfbSDimitry Andric     if (CurEdge) {
1902297eecfbSDimitry Andric       CurEdge->ContextIds.insert(Edge->ContextIds.begin(),
1903297eecfbSDimitry Andric                                  Edge->ContextIds.end());
1904297eecfbSDimitry Andric       CurEdge->AllocTypes |= Edge->AllocTypes;
1905297eecfbSDimitry Andric       return;
1906297eecfbSDimitry Andric     }
1907297eecfbSDimitry Andric     // Otherwise, create a new edge and insert it into the caller and callee
1908297eecfbSDimitry Andric     // lists.
1909297eecfbSDimitry Andric     auto NewEdge = std::make_shared<ContextEdge>(
1910297eecfbSDimitry Andric         Callee, Caller, Edge->AllocTypes, Edge->ContextIds);
1911297eecfbSDimitry Andric     Callee->CallerEdges.push_back(NewEdge);
1912297eecfbSDimitry Andric     if (Caller == Edge->Caller) {
1913297eecfbSDimitry Andric       // If we are inserting the new edge into the current edge's caller, insert
1914297eecfbSDimitry Andric       // the new edge before the current iterator position, and then increment
1915297eecfbSDimitry Andric       // back to the current edge.
1916297eecfbSDimitry Andric       EI = Caller->CalleeEdges.insert(EI, NewEdge);
1917297eecfbSDimitry Andric       ++EI;
1918297eecfbSDimitry Andric       assert(*EI == Edge &&
1919297eecfbSDimitry Andric              "Iterator position not restored after insert and increment");
1920297eecfbSDimitry Andric     } else
1921297eecfbSDimitry Andric       Caller->CalleeEdges.push_back(NewEdge);
1922297eecfbSDimitry Andric   };
1923297eecfbSDimitry Andric 
1924297eecfbSDimitry Andric   // Create new nodes for each found callee and connect in between the profiled
1925297eecfbSDimitry Andric   // caller and callee.
1926297eecfbSDimitry Andric   auto *CurCalleeNode = Edge->Callee;
1927297eecfbSDimitry Andric   for (auto &[NewCall, Func] : FoundCalleeChain) {
1928297eecfbSDimitry Andric     ContextNode *NewNode = nullptr;
1929297eecfbSDimitry Andric     // First check if we have already synthesized a node for this tail call.
1930297eecfbSDimitry Andric     if (TailCallToContextNodeMap.count(NewCall)) {
1931297eecfbSDimitry Andric       NewNode = TailCallToContextNodeMap[NewCall];
1932297eecfbSDimitry Andric       NewNode->AllocTypes |= Edge->AllocTypes;
1933297eecfbSDimitry Andric     } else {
1934297eecfbSDimitry Andric       FuncToCallsWithMetadata[Func].push_back({NewCall});
1935297eecfbSDimitry Andric       // Create Node and record node info.
1936297eecfbSDimitry Andric       NodeOwner.push_back(
1937297eecfbSDimitry Andric           std::make_unique<ContextNode>(/*IsAllocation=*/false, NewCall));
1938297eecfbSDimitry Andric       NewNode = NodeOwner.back().get();
1939297eecfbSDimitry Andric       NodeToCallingFunc[NewNode] = Func;
1940297eecfbSDimitry Andric       TailCallToContextNodeMap[NewCall] = NewNode;
1941297eecfbSDimitry Andric       NewNode->AllocTypes = Edge->AllocTypes;
1942297eecfbSDimitry Andric     }
1943297eecfbSDimitry Andric 
1944297eecfbSDimitry Andric     // Hook up node to its callee node
1945297eecfbSDimitry Andric     AddEdge(NewNode, CurCalleeNode);
1946297eecfbSDimitry Andric 
1947297eecfbSDimitry Andric     CurCalleeNode = NewNode;
1948297eecfbSDimitry Andric   }
1949297eecfbSDimitry Andric 
1950297eecfbSDimitry Andric   // Hook up edge's original caller to new callee node.
1951297eecfbSDimitry Andric   AddEdge(Edge->Caller, CurCalleeNode);
1952297eecfbSDimitry Andric 
1953297eecfbSDimitry Andric   // Remove old edge
1954297eecfbSDimitry Andric   Edge->Callee->eraseCallerEdge(Edge.get());
1955297eecfbSDimitry Andric   EI = Edge->Caller->CalleeEdges.erase(EI);
1956297eecfbSDimitry Andric 
1957*0fca6ea1SDimitry Andric   // To simplify the increment of EI in the caller, subtract one from EI.
1958*0fca6ea1SDimitry Andric   // In the final AddEdge call we would have either added a new callee edge,
1959*0fca6ea1SDimitry Andric   // to Edge->Caller, or found an existing one. Either way we are guaranteed
1960*0fca6ea1SDimitry Andric   // that there is at least one callee edge.
1961*0fca6ea1SDimitry Andric   assert(!Edge->Caller->CalleeEdges.empty());
1962*0fca6ea1SDimitry Andric   --EI;
1963*0fca6ea1SDimitry Andric 
1964297eecfbSDimitry Andric   return true;
1965297eecfbSDimitry Andric }
1966297eecfbSDimitry Andric 
1967297eecfbSDimitry Andric bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
1968297eecfbSDimitry Andric     const Function *ProfiledCallee, Value *CurCallee, unsigned Depth,
1969297eecfbSDimitry Andric     std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
1970297eecfbSDimitry Andric     bool &FoundMultipleCalleeChains) {
1971297eecfbSDimitry Andric   // Stop recursive search if we have already explored the maximum specified
1972297eecfbSDimitry Andric   // depth.
1973297eecfbSDimitry Andric   if (Depth > TailCallSearchDepth)
1974297eecfbSDimitry Andric     return false;
1975297eecfbSDimitry Andric 
1976297eecfbSDimitry Andric   auto SaveCallsiteInfo = [&](Instruction *Callsite, Function *F) {
1977297eecfbSDimitry Andric     FoundCalleeChain.push_back({Callsite, F});
1978297eecfbSDimitry Andric   };
1979297eecfbSDimitry Andric 
1980297eecfbSDimitry Andric   auto *CalleeFunc = dyn_cast<Function>(CurCallee);
1981297eecfbSDimitry Andric   if (!CalleeFunc) {
1982297eecfbSDimitry Andric     auto *Alias = dyn_cast<GlobalAlias>(CurCallee);
1983297eecfbSDimitry Andric     assert(Alias);
1984297eecfbSDimitry Andric     CalleeFunc = dyn_cast<Function>(Alias->getAliasee());
1985297eecfbSDimitry Andric     assert(CalleeFunc);
1986297eecfbSDimitry Andric   }
1987297eecfbSDimitry Andric 
1988297eecfbSDimitry Andric   // Look for tail calls in this function, and check if they either call the
1989297eecfbSDimitry Andric   // profiled callee directly, or indirectly (via a recursive search).
1990297eecfbSDimitry Andric   // Only succeed if there is a single unique tail call chain found between the
1991297eecfbSDimitry Andric   // profiled caller and callee, otherwise we could perform incorrect cloning.
1992297eecfbSDimitry Andric   bool FoundSingleCalleeChain = false;
1993297eecfbSDimitry Andric   for (auto &BB : *CalleeFunc) {
1994297eecfbSDimitry Andric     for (auto &I : BB) {
1995297eecfbSDimitry Andric       auto *CB = dyn_cast<CallBase>(&I);
1996297eecfbSDimitry Andric       if (!CB || !CB->isTailCall())
1997297eecfbSDimitry Andric         continue;
1998297eecfbSDimitry Andric       auto *CalledValue = CB->getCalledOperand();
1999297eecfbSDimitry Andric       auto *CalledFunction = CB->getCalledFunction();
2000297eecfbSDimitry Andric       if (CalledValue && !CalledFunction) {
2001297eecfbSDimitry Andric         CalledValue = CalledValue->stripPointerCasts();
2002297eecfbSDimitry Andric         // Stripping pointer casts can reveal a called function.
2003297eecfbSDimitry Andric         CalledFunction = dyn_cast<Function>(CalledValue);
2004297eecfbSDimitry Andric       }
2005297eecfbSDimitry Andric       // Check if this is an alias to a function. If so, get the
2006297eecfbSDimitry Andric       // called aliasee for the checks below.
2007297eecfbSDimitry Andric       if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
2008297eecfbSDimitry Andric         assert(!CalledFunction &&
2009297eecfbSDimitry Andric                "Expected null called function in callsite for alias");
2010297eecfbSDimitry Andric         CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
2011297eecfbSDimitry Andric       }
2012297eecfbSDimitry Andric       if (!CalledFunction)
2013297eecfbSDimitry Andric         continue;
2014297eecfbSDimitry Andric       if (CalledFunction == ProfiledCallee) {
2015297eecfbSDimitry Andric         if (FoundSingleCalleeChain) {
2016297eecfbSDimitry Andric           FoundMultipleCalleeChains = true;
2017297eecfbSDimitry Andric           return false;
2018297eecfbSDimitry Andric         }
2019297eecfbSDimitry Andric         FoundSingleCalleeChain = true;
2020297eecfbSDimitry Andric         FoundProfiledCalleeCount++;
2021297eecfbSDimitry Andric         FoundProfiledCalleeDepth += Depth;
2022297eecfbSDimitry Andric         if (Depth > FoundProfiledCalleeMaxDepth)
2023297eecfbSDimitry Andric           FoundProfiledCalleeMaxDepth = Depth;
2024297eecfbSDimitry Andric         SaveCallsiteInfo(&I, CalleeFunc);
2025297eecfbSDimitry Andric       } else if (findProfiledCalleeThroughTailCalls(
2026297eecfbSDimitry Andric                      ProfiledCallee, CalledFunction, Depth + 1,
2027297eecfbSDimitry Andric                      FoundCalleeChain, FoundMultipleCalleeChains)) {
2028*0fca6ea1SDimitry Andric         // findProfiledCalleeThroughTailCalls should not have returned
2029*0fca6ea1SDimitry Andric         // true if FoundMultipleCalleeChains.
2030*0fca6ea1SDimitry Andric         assert(!FoundMultipleCalleeChains);
2031297eecfbSDimitry Andric         if (FoundSingleCalleeChain) {
2032297eecfbSDimitry Andric           FoundMultipleCalleeChains = true;
2033297eecfbSDimitry Andric           return false;
2034297eecfbSDimitry Andric         }
2035297eecfbSDimitry Andric         FoundSingleCalleeChain = true;
2036297eecfbSDimitry Andric         SaveCallsiteInfo(&I, CalleeFunc);
2037*0fca6ea1SDimitry Andric       } else if (FoundMultipleCalleeChains)
2038*0fca6ea1SDimitry Andric         return false;
2039297eecfbSDimitry Andric     }
2040297eecfbSDimitry Andric   }
2041297eecfbSDimitry Andric 
2042297eecfbSDimitry Andric   return FoundSingleCalleeChain;
2043297eecfbSDimitry Andric }
2044297eecfbSDimitry Andric 
2045297eecfbSDimitry Andric bool ModuleCallsiteContextGraph::calleeMatchesFunc(
2046297eecfbSDimitry Andric     Instruction *Call, const Function *Func, const Function *CallerFunc,
2047297eecfbSDimitry Andric     std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
204806c3fb27SDimitry Andric   auto *CB = dyn_cast<CallBase>(Call);
204906c3fb27SDimitry Andric   if (!CB->getCalledOperand())
205006c3fb27SDimitry Andric     return false;
205106c3fb27SDimitry Andric   auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
205206c3fb27SDimitry Andric   auto *CalleeFunc = dyn_cast<Function>(CalleeVal);
205306c3fb27SDimitry Andric   if (CalleeFunc == Func)
205406c3fb27SDimitry Andric     return true;
205506c3fb27SDimitry Andric   auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
2056297eecfbSDimitry Andric   if (Alias && Alias->getAliasee() == Func)
2057297eecfbSDimitry Andric     return true;
2058297eecfbSDimitry Andric 
2059297eecfbSDimitry Andric   // Recursively search for the profiled callee through tail calls starting with
2060297eecfbSDimitry Andric   // the actual Callee. The discovered tail call chain is saved in
2061297eecfbSDimitry Andric   // FoundCalleeChain, and we will fixup the graph to include these callsites
2062297eecfbSDimitry Andric   // after returning.
2063297eecfbSDimitry Andric   // FIXME: We will currently redo the same recursive walk if we find the same
2064297eecfbSDimitry Andric   // mismatched callee from another callsite. We can improve this with more
2065297eecfbSDimitry Andric   // bookkeeping of the created chain of new nodes for each mismatch.
2066297eecfbSDimitry Andric   unsigned Depth = 1;
2067297eecfbSDimitry Andric   bool FoundMultipleCalleeChains = false;
2068297eecfbSDimitry Andric   if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal, Depth,
2069297eecfbSDimitry Andric                                           FoundCalleeChain,
2070297eecfbSDimitry Andric                                           FoundMultipleCalleeChains)) {
2071297eecfbSDimitry Andric     LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: "
2072297eecfbSDimitry Andric                       << Func->getName() << " from " << CallerFunc->getName()
2073297eecfbSDimitry Andric                       << " that actually called " << CalleeVal->getName()
2074297eecfbSDimitry Andric                       << (FoundMultipleCalleeChains
2075297eecfbSDimitry Andric                               ? " (found multiple possible chains)"
2076297eecfbSDimitry Andric                               : "")
2077297eecfbSDimitry Andric                       << "\n");
2078297eecfbSDimitry Andric     if (FoundMultipleCalleeChains)
2079297eecfbSDimitry Andric       FoundProfiledCalleeNonUniquelyCount++;
2080297eecfbSDimitry Andric     return false;
208106c3fb27SDimitry Andric   }
208206c3fb27SDimitry Andric 
2083297eecfbSDimitry Andric   return true;
2084297eecfbSDimitry Andric }
2085297eecfbSDimitry Andric 
2086297eecfbSDimitry Andric bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2087297eecfbSDimitry Andric     ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth,
2088297eecfbSDimitry Andric     std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
2089297eecfbSDimitry Andric     bool &FoundMultipleCalleeChains) {
2090297eecfbSDimitry Andric   // Stop recursive search if we have already explored the maximum specified
2091297eecfbSDimitry Andric   // depth.
2092297eecfbSDimitry Andric   if (Depth > TailCallSearchDepth)
2093297eecfbSDimitry Andric     return false;
2094297eecfbSDimitry Andric 
2095297eecfbSDimitry Andric   auto CreateAndSaveCallsiteInfo = [&](ValueInfo Callee, FunctionSummary *FS) {
2096297eecfbSDimitry Andric     // Make a CallsiteInfo for each discovered callee, if one hasn't already
2097297eecfbSDimitry Andric     // been synthesized.
2098297eecfbSDimitry Andric     if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
2099297eecfbSDimitry Andric         !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
2100297eecfbSDimitry Andric       // StackIds is empty (we don't have debug info available in the index for
2101297eecfbSDimitry Andric       // these callsites)
2102297eecfbSDimitry Andric       FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] =
2103297eecfbSDimitry Andric           std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
2104297eecfbSDimitry Andric     CallsiteInfo *NewCallsiteInfo =
2105297eecfbSDimitry Andric         FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee].get();
2106297eecfbSDimitry Andric     FoundCalleeChain.push_back({NewCallsiteInfo, FS});
2107297eecfbSDimitry Andric   };
2108297eecfbSDimitry Andric 
2109297eecfbSDimitry Andric   // Look for tail calls in this function, and check if they either call the
2110297eecfbSDimitry Andric   // profiled callee directly, or indirectly (via a recursive search).
2111297eecfbSDimitry Andric   // Only succeed if there is a single unique tail call chain found between the
2112297eecfbSDimitry Andric   // profiled caller and callee, otherwise we could perform incorrect cloning.
2113297eecfbSDimitry Andric   bool FoundSingleCalleeChain = false;
2114297eecfbSDimitry Andric   for (auto &S : CurCallee.getSummaryList()) {
2115297eecfbSDimitry Andric     if (!GlobalValue::isLocalLinkage(S->linkage()) &&
2116297eecfbSDimitry Andric         !isPrevailing(CurCallee.getGUID(), S.get()))
2117297eecfbSDimitry Andric       continue;
2118297eecfbSDimitry Andric     auto *FS = dyn_cast<FunctionSummary>(S->getBaseObject());
2119297eecfbSDimitry Andric     if (!FS)
2120297eecfbSDimitry Andric       continue;
2121297eecfbSDimitry Andric     auto FSVI = CurCallee;
2122297eecfbSDimitry Andric     auto *AS = dyn_cast<AliasSummary>(S.get());
2123297eecfbSDimitry Andric     if (AS)
2124297eecfbSDimitry Andric       FSVI = AS->getAliaseeVI();
2125297eecfbSDimitry Andric     for (auto &CallEdge : FS->calls()) {
2126297eecfbSDimitry Andric       if (!CallEdge.second.hasTailCall())
2127297eecfbSDimitry Andric         continue;
2128297eecfbSDimitry Andric       if (CallEdge.first == ProfiledCallee) {
2129297eecfbSDimitry Andric         if (FoundSingleCalleeChain) {
2130297eecfbSDimitry Andric           FoundMultipleCalleeChains = true;
2131297eecfbSDimitry Andric           return false;
2132297eecfbSDimitry Andric         }
2133297eecfbSDimitry Andric         FoundSingleCalleeChain = true;
2134297eecfbSDimitry Andric         FoundProfiledCalleeCount++;
2135297eecfbSDimitry Andric         FoundProfiledCalleeDepth += Depth;
2136297eecfbSDimitry Andric         if (Depth > FoundProfiledCalleeMaxDepth)
2137297eecfbSDimitry Andric           FoundProfiledCalleeMaxDepth = Depth;
2138297eecfbSDimitry Andric         CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2139297eecfbSDimitry Andric         // Add FS to FSToVIMap  in case it isn't already there.
2140297eecfbSDimitry Andric         assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2141297eecfbSDimitry Andric         FSToVIMap[FS] = FSVI;
2142297eecfbSDimitry Andric       } else if (findProfiledCalleeThroughTailCalls(
2143297eecfbSDimitry Andric                      ProfiledCallee, CallEdge.first, Depth + 1,
2144297eecfbSDimitry Andric                      FoundCalleeChain, FoundMultipleCalleeChains)) {
2145*0fca6ea1SDimitry Andric         // findProfiledCalleeThroughTailCalls should not have returned
2146*0fca6ea1SDimitry Andric         // true if FoundMultipleCalleeChains.
2147*0fca6ea1SDimitry Andric         assert(!FoundMultipleCalleeChains);
2148297eecfbSDimitry Andric         if (FoundSingleCalleeChain) {
2149297eecfbSDimitry Andric           FoundMultipleCalleeChains = true;
2150297eecfbSDimitry Andric           return false;
2151297eecfbSDimitry Andric         }
2152297eecfbSDimitry Andric         FoundSingleCalleeChain = true;
2153297eecfbSDimitry Andric         CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2154297eecfbSDimitry Andric         // Add FS to FSToVIMap  in case it isn't already there.
2155297eecfbSDimitry Andric         assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2156297eecfbSDimitry Andric         FSToVIMap[FS] = FSVI;
2157*0fca6ea1SDimitry Andric       } else if (FoundMultipleCalleeChains)
2158*0fca6ea1SDimitry Andric         return false;
2159297eecfbSDimitry Andric     }
2160297eecfbSDimitry Andric   }
2161297eecfbSDimitry Andric 
2162297eecfbSDimitry Andric   return FoundSingleCalleeChain;
2163297eecfbSDimitry Andric }
2164297eecfbSDimitry Andric 
2165297eecfbSDimitry Andric bool IndexCallsiteContextGraph::calleeMatchesFunc(
2166297eecfbSDimitry Andric     IndexCall &Call, const FunctionSummary *Func,
2167297eecfbSDimitry Andric     const FunctionSummary *CallerFunc,
2168297eecfbSDimitry Andric     std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
216906c3fb27SDimitry Andric   ValueInfo Callee =
217006c3fb27SDimitry Andric       dyn_cast_if_present<CallsiteInfo *>(Call.getBase())->Callee;
217106c3fb27SDimitry Andric   // If there is no summary list then this is a call to an externally defined
217206c3fb27SDimitry Andric   // symbol.
217306c3fb27SDimitry Andric   AliasSummary *Alias =
217406c3fb27SDimitry Andric       Callee.getSummaryList().empty()
217506c3fb27SDimitry Andric           ? nullptr
217606c3fb27SDimitry Andric           : dyn_cast<AliasSummary>(Callee.getSummaryList()[0].get());
217706c3fb27SDimitry Andric   assert(FSToVIMap.count(Func));
2178297eecfbSDimitry Andric   auto FuncVI = FSToVIMap[Func];
2179297eecfbSDimitry Andric   if (Callee == FuncVI ||
218006c3fb27SDimitry Andric       // If callee is an alias, check the aliasee, since only function
218106c3fb27SDimitry Andric       // summary base objects will contain the stack node summaries and thus
218206c3fb27SDimitry Andric       // get a context node.
2183297eecfbSDimitry Andric       (Alias && Alias->getAliaseeVI() == FuncVI))
2184297eecfbSDimitry Andric     return true;
2185297eecfbSDimitry Andric 
2186297eecfbSDimitry Andric   // Recursively search for the profiled callee through tail calls starting with
2187297eecfbSDimitry Andric   // the actual Callee. The discovered tail call chain is saved in
2188297eecfbSDimitry Andric   // FoundCalleeChain, and we will fixup the graph to include these callsites
2189297eecfbSDimitry Andric   // after returning.
2190297eecfbSDimitry Andric   // FIXME: We will currently redo the same recursive walk if we find the same
2191297eecfbSDimitry Andric   // mismatched callee from another callsite. We can improve this with more
2192297eecfbSDimitry Andric   // bookkeeping of the created chain of new nodes for each mismatch.
2193297eecfbSDimitry Andric   unsigned Depth = 1;
2194297eecfbSDimitry Andric   bool FoundMultipleCalleeChains = false;
2195297eecfbSDimitry Andric   if (!findProfiledCalleeThroughTailCalls(
2196297eecfbSDimitry Andric           FuncVI, Callee, Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
2197297eecfbSDimitry Andric     LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: " << FuncVI
2198297eecfbSDimitry Andric                       << " from " << FSToVIMap[CallerFunc]
2199297eecfbSDimitry Andric                       << " that actually called " << Callee
2200297eecfbSDimitry Andric                       << (FoundMultipleCalleeChains
2201297eecfbSDimitry Andric                               ? " (found multiple possible chains)"
2202297eecfbSDimitry Andric                               : "")
2203297eecfbSDimitry Andric                       << "\n");
2204297eecfbSDimitry Andric     if (FoundMultipleCalleeChains)
2205297eecfbSDimitry Andric       FoundProfiledCalleeNonUniquelyCount++;
2206297eecfbSDimitry Andric     return false;
2207297eecfbSDimitry Andric   }
2208297eecfbSDimitry Andric 
2209297eecfbSDimitry Andric   return true;
221006c3fb27SDimitry Andric }
221106c3fb27SDimitry Andric 
221206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
221306c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
221406c3fb27SDimitry Andric     const {
221506c3fb27SDimitry Andric   print(dbgs());
221606c3fb27SDimitry Andric   dbgs() << "\n";
221706c3fb27SDimitry Andric }
221806c3fb27SDimitry Andric 
221906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
222006c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
222106c3fb27SDimitry Andric     raw_ostream &OS) const {
222206c3fb27SDimitry Andric   OS << "Node " << this << "\n";
222306c3fb27SDimitry Andric   OS << "\t";
222406c3fb27SDimitry Andric   printCall(OS);
222506c3fb27SDimitry Andric   if (Recursive)
222606c3fb27SDimitry Andric     OS << " (recursive)";
222706c3fb27SDimitry Andric   OS << "\n";
222806c3fb27SDimitry Andric   OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n";
222906c3fb27SDimitry Andric   OS << "\tContextIds:";
2230*0fca6ea1SDimitry Andric   // Make a copy of the computed context ids that we can sort for stability.
2231*0fca6ea1SDimitry Andric   auto ContextIds = getContextIds();
223206c3fb27SDimitry Andric   std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
223306c3fb27SDimitry Andric   std::sort(SortedIds.begin(), SortedIds.end());
223406c3fb27SDimitry Andric   for (auto Id : SortedIds)
223506c3fb27SDimitry Andric     OS << " " << Id;
223606c3fb27SDimitry Andric   OS << "\n";
223706c3fb27SDimitry Andric   OS << "\tCalleeEdges:\n";
223806c3fb27SDimitry Andric   for (auto &Edge : CalleeEdges)
223906c3fb27SDimitry Andric     OS << "\t\t" << *Edge << "\n";
224006c3fb27SDimitry Andric   OS << "\tCallerEdges:\n";
224106c3fb27SDimitry Andric   for (auto &Edge : CallerEdges)
224206c3fb27SDimitry Andric     OS << "\t\t" << *Edge << "\n";
224306c3fb27SDimitry Andric   if (!Clones.empty()) {
224406c3fb27SDimitry Andric     OS << "\tClones: ";
224506c3fb27SDimitry Andric     FieldSeparator FS;
224606c3fb27SDimitry Andric     for (auto *Clone : Clones)
224706c3fb27SDimitry Andric       OS << FS << Clone;
224806c3fb27SDimitry Andric     OS << "\n";
224906c3fb27SDimitry Andric   } else if (CloneOf) {
225006c3fb27SDimitry Andric     OS << "\tClone of " << CloneOf << "\n";
225106c3fb27SDimitry Andric   }
225206c3fb27SDimitry Andric }
225306c3fb27SDimitry Andric 
225406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
225506c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
225606c3fb27SDimitry Andric     const {
225706c3fb27SDimitry Andric   print(dbgs());
225806c3fb27SDimitry Andric   dbgs() << "\n";
225906c3fb27SDimitry Andric }
226006c3fb27SDimitry Andric 
226106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
226206c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
226306c3fb27SDimitry Andric     raw_ostream &OS) const {
226406c3fb27SDimitry Andric   OS << "Edge from Callee " << Callee << " to Caller: " << Caller
226506c3fb27SDimitry Andric      << " AllocTypes: " << getAllocTypeString(AllocTypes);
226606c3fb27SDimitry Andric   OS << " ContextIds:";
226706c3fb27SDimitry Andric   std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
226806c3fb27SDimitry Andric   std::sort(SortedIds.begin(), SortedIds.end());
226906c3fb27SDimitry Andric   for (auto Id : SortedIds)
227006c3fb27SDimitry Andric     OS << " " << Id;
227106c3fb27SDimitry Andric }
227206c3fb27SDimitry Andric 
227306c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
227406c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump() const {
227506c3fb27SDimitry Andric   print(dbgs());
227606c3fb27SDimitry Andric }
227706c3fb27SDimitry Andric 
227806c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
227906c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
228006c3fb27SDimitry Andric     raw_ostream &OS) const {
228106c3fb27SDimitry Andric   OS << "Callsite Context Graph:\n";
228206c3fb27SDimitry Andric   using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
228306c3fb27SDimitry Andric   for (const auto Node : nodes<GraphType>(this)) {
228406c3fb27SDimitry Andric     if (Node->isRemoved())
228506c3fb27SDimitry Andric       continue;
228606c3fb27SDimitry Andric     Node->print(OS);
228706c3fb27SDimitry Andric     OS << "\n";
228806c3fb27SDimitry Andric   }
228906c3fb27SDimitry Andric }
229006c3fb27SDimitry Andric 
229106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
2292*0fca6ea1SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
2293*0fca6ea1SDimitry Andric     raw_ostream &OS) const {
2294*0fca6ea1SDimitry Andric   using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2295*0fca6ea1SDimitry Andric   for (const auto Node : nodes<GraphType>(this)) {
229606c3fb27SDimitry Andric     if (Node->isRemoved())
2297*0fca6ea1SDimitry Andric       continue;
2298*0fca6ea1SDimitry Andric     if (!Node->IsAllocation)
2299*0fca6ea1SDimitry Andric       continue;
2300*0fca6ea1SDimitry Andric     DenseSet<uint32_t> ContextIds = Node->getContextIds();
2301*0fca6ea1SDimitry Andric     std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2302*0fca6ea1SDimitry Andric     std::sort(SortedIds.begin(), SortedIds.end());
2303*0fca6ea1SDimitry Andric     for (auto Id : SortedIds) {
2304*0fca6ea1SDimitry Andric       auto SizeI = ContextIdToTotalSize.find(Id);
2305*0fca6ea1SDimitry Andric       assert(SizeI != ContextIdToTotalSize.end());
2306*0fca6ea1SDimitry Andric       auto TypeI = ContextIdToAllocationType.find(Id);
2307*0fca6ea1SDimitry Andric       assert(TypeI != ContextIdToAllocationType.end());
2308*0fca6ea1SDimitry Andric       OS << getAllocTypeString((uint8_t)TypeI->second) << " context " << Id
2309*0fca6ea1SDimitry Andric          << " with total size " << SizeI->second << " is "
2310*0fca6ea1SDimitry Andric          << getAllocTypeString(Node->AllocTypes) << " after cloning\n";
231106c3fb27SDimitry Andric     }
231206c3fb27SDimitry Andric   }
231306c3fb27SDimitry Andric }
231406c3fb27SDimitry Andric 
231506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
231606c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const {
231706c3fb27SDimitry Andric   using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
231806c3fb27SDimitry Andric   for (const auto Node : nodes<GraphType>(this)) {
231906c3fb27SDimitry Andric     checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false);
232006c3fb27SDimitry Andric     for (auto &Edge : Node->CallerEdges)
232106c3fb27SDimitry Andric       checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
232206c3fb27SDimitry Andric   }
232306c3fb27SDimitry Andric }
232406c3fb27SDimitry Andric 
232506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
232606c3fb27SDimitry Andric struct GraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> {
232706c3fb27SDimitry Andric   using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
232806c3fb27SDimitry Andric   using NodeRef = const ContextNode<DerivedCCG, FuncTy, CallTy> *;
232906c3fb27SDimitry Andric 
233006c3fb27SDimitry Andric   using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
233106c3fb27SDimitry Andric   static NodeRef getNode(const NodePtrTy &P) { return P.get(); }
233206c3fb27SDimitry Andric 
233306c3fb27SDimitry Andric   using nodes_iterator =
233406c3fb27SDimitry Andric       mapped_iterator<typename std::vector<NodePtrTy>::const_iterator,
233506c3fb27SDimitry Andric                       decltype(&getNode)>;
233606c3fb27SDimitry Andric 
233706c3fb27SDimitry Andric   static nodes_iterator nodes_begin(GraphType G) {
233806c3fb27SDimitry Andric     return nodes_iterator(G->NodeOwner.begin(), &getNode);
233906c3fb27SDimitry Andric   }
234006c3fb27SDimitry Andric 
234106c3fb27SDimitry Andric   static nodes_iterator nodes_end(GraphType G) {
234206c3fb27SDimitry Andric     return nodes_iterator(G->NodeOwner.end(), &getNode);
234306c3fb27SDimitry Andric   }
234406c3fb27SDimitry Andric 
234506c3fb27SDimitry Andric   static NodeRef getEntryNode(GraphType G) {
234606c3fb27SDimitry Andric     return G->NodeOwner.begin()->get();
234706c3fb27SDimitry Andric   }
234806c3fb27SDimitry Andric 
234906c3fb27SDimitry Andric   using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
235006c3fb27SDimitry Andric   static const ContextNode<DerivedCCG, FuncTy, CallTy> *
235106c3fb27SDimitry Andric   GetCallee(const EdgePtrTy &P) {
235206c3fb27SDimitry Andric     return P->Callee;
235306c3fb27SDimitry Andric   }
235406c3fb27SDimitry Andric 
235506c3fb27SDimitry Andric   using ChildIteratorType =
235606c3fb27SDimitry Andric       mapped_iterator<typename std::vector<std::shared_ptr<ContextEdge<
235706c3fb27SDimitry Andric                           DerivedCCG, FuncTy, CallTy>>>::const_iterator,
235806c3fb27SDimitry Andric                       decltype(&GetCallee)>;
235906c3fb27SDimitry Andric 
236006c3fb27SDimitry Andric   static ChildIteratorType child_begin(NodeRef N) {
236106c3fb27SDimitry Andric     return ChildIteratorType(N->CalleeEdges.begin(), &GetCallee);
236206c3fb27SDimitry Andric   }
236306c3fb27SDimitry Andric 
236406c3fb27SDimitry Andric   static ChildIteratorType child_end(NodeRef N) {
236506c3fb27SDimitry Andric     return ChildIteratorType(N->CalleeEdges.end(), &GetCallee);
236606c3fb27SDimitry Andric   }
236706c3fb27SDimitry Andric };
236806c3fb27SDimitry Andric 
236906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
237006c3fb27SDimitry Andric struct DOTGraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>
237106c3fb27SDimitry Andric     : public DefaultDOTGraphTraits {
237206c3fb27SDimitry Andric   DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
237306c3fb27SDimitry Andric 
237406c3fb27SDimitry Andric   using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
237506c3fb27SDimitry Andric   using GTraits = GraphTraits<GraphType>;
237606c3fb27SDimitry Andric   using NodeRef = typename GTraits::NodeRef;
237706c3fb27SDimitry Andric   using ChildIteratorType = typename GTraits::ChildIteratorType;
237806c3fb27SDimitry Andric 
237906c3fb27SDimitry Andric   static std::string getNodeLabel(NodeRef Node, GraphType G) {
238006c3fb27SDimitry Andric     std::string LabelString =
238106c3fb27SDimitry Andric         (Twine("OrigId: ") + (Node->IsAllocation ? "Alloc" : "") +
238206c3fb27SDimitry Andric          Twine(Node->OrigStackOrAllocId))
238306c3fb27SDimitry Andric             .str();
238406c3fb27SDimitry Andric     LabelString += "\n";
238506c3fb27SDimitry Andric     if (Node->hasCall()) {
238606c3fb27SDimitry Andric       auto Func = G->NodeToCallingFunc.find(Node);
238706c3fb27SDimitry Andric       assert(Func != G->NodeToCallingFunc.end());
238806c3fb27SDimitry Andric       LabelString +=
238906c3fb27SDimitry Andric           G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());
239006c3fb27SDimitry Andric     } else {
239106c3fb27SDimitry Andric       LabelString += "null call";
239206c3fb27SDimitry Andric       if (Node->Recursive)
239306c3fb27SDimitry Andric         LabelString += " (recursive)";
239406c3fb27SDimitry Andric       else
239506c3fb27SDimitry Andric         LabelString += " (external)";
239606c3fb27SDimitry Andric     }
239706c3fb27SDimitry Andric     return LabelString;
239806c3fb27SDimitry Andric   }
239906c3fb27SDimitry Andric 
240006c3fb27SDimitry Andric   static std::string getNodeAttributes(NodeRef Node, GraphType) {
240106c3fb27SDimitry Andric     std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " +
2402*0fca6ea1SDimitry Andric                                    getContextIds(Node->getContextIds()) + "\"")
240306c3fb27SDimitry Andric                                       .str();
240406c3fb27SDimitry Andric     AttributeString +=
240506c3fb27SDimitry Andric         (Twine(",fillcolor=\"") + getColor(Node->AllocTypes) + "\"").str();
240606c3fb27SDimitry Andric     AttributeString += ",style=\"filled\"";
240706c3fb27SDimitry Andric     if (Node->CloneOf) {
240806c3fb27SDimitry Andric       AttributeString += ",color=\"blue\"";
240906c3fb27SDimitry Andric       AttributeString += ",style=\"filled,bold,dashed\"";
241006c3fb27SDimitry Andric     } else
241106c3fb27SDimitry Andric       AttributeString += ",style=\"filled\"";
241206c3fb27SDimitry Andric     return AttributeString;
241306c3fb27SDimitry Andric   }
241406c3fb27SDimitry Andric 
241506c3fb27SDimitry Andric   static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter,
241606c3fb27SDimitry Andric                                        GraphType) {
241706c3fb27SDimitry Andric     auto &Edge = *(ChildIter.getCurrent());
241806c3fb27SDimitry Andric     return (Twine("tooltip=\"") + getContextIds(Edge->ContextIds) + "\"" +
241906c3fb27SDimitry Andric             Twine(",fillcolor=\"") + getColor(Edge->AllocTypes) + "\"")
242006c3fb27SDimitry Andric         .str();
242106c3fb27SDimitry Andric   }
242206c3fb27SDimitry Andric 
242306c3fb27SDimitry Andric   // Since the NodeOwners list includes nodes that are no longer connected to
242406c3fb27SDimitry Andric   // the graph, skip them here.
242506c3fb27SDimitry Andric   static bool isNodeHidden(NodeRef Node, GraphType) {
242606c3fb27SDimitry Andric     return Node->isRemoved();
242706c3fb27SDimitry Andric   }
242806c3fb27SDimitry Andric 
242906c3fb27SDimitry Andric private:
243006c3fb27SDimitry Andric   static std::string getContextIds(const DenseSet<uint32_t> &ContextIds) {
243106c3fb27SDimitry Andric     std::string IdString = "ContextIds:";
243206c3fb27SDimitry Andric     if (ContextIds.size() < 100) {
243306c3fb27SDimitry Andric       std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
243406c3fb27SDimitry Andric       std::sort(SortedIds.begin(), SortedIds.end());
243506c3fb27SDimitry Andric       for (auto Id : SortedIds)
243606c3fb27SDimitry Andric         IdString += (" " + Twine(Id)).str();
243706c3fb27SDimitry Andric     } else {
243806c3fb27SDimitry Andric       IdString += (" (" + Twine(ContextIds.size()) + " ids)").str();
243906c3fb27SDimitry Andric     }
244006c3fb27SDimitry Andric     return IdString;
244106c3fb27SDimitry Andric   }
244206c3fb27SDimitry Andric 
244306c3fb27SDimitry Andric   static std::string getColor(uint8_t AllocTypes) {
244406c3fb27SDimitry Andric     if (AllocTypes == (uint8_t)AllocationType::NotCold)
244506c3fb27SDimitry Andric       // Color "brown1" actually looks like a lighter red.
244606c3fb27SDimitry Andric       return "brown1";
244706c3fb27SDimitry Andric     if (AllocTypes == (uint8_t)AllocationType::Cold)
244806c3fb27SDimitry Andric       return "cyan";
244906c3fb27SDimitry Andric     if (AllocTypes ==
245006c3fb27SDimitry Andric         ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
245106c3fb27SDimitry Andric       // Lighter purple.
245206c3fb27SDimitry Andric       return "mediumorchid1";
245306c3fb27SDimitry Andric     return "gray";
245406c3fb27SDimitry Andric   }
245506c3fb27SDimitry Andric 
245606c3fb27SDimitry Andric   static std::string getNodeId(NodeRef Node) {
245706c3fb27SDimitry Andric     std::stringstream SStream;
245806c3fb27SDimitry Andric     SStream << std::hex << "N0x" << (unsigned long long)Node;
245906c3fb27SDimitry Andric     std::string Result = SStream.str();
246006c3fb27SDimitry Andric     return Result;
246106c3fb27SDimitry Andric   }
246206c3fb27SDimitry Andric };
246306c3fb27SDimitry Andric 
246406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
246506c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
246606c3fb27SDimitry Andric     std::string Label) const {
246706c3fb27SDimitry Andric   WriteGraph(this, "", false, Label,
246806c3fb27SDimitry Andric              DotFilePathPrefix + "ccg." + Label + ".dot");
246906c3fb27SDimitry Andric }
247006c3fb27SDimitry Andric 
247106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
247206c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
247306c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
2474*0fca6ea1SDimitry Andric     const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI,
2475*0fca6ea1SDimitry Andric     DenseSet<uint32_t> ContextIdsToMove) {
247606c3fb27SDimitry Andric   ContextNode *Node = Edge->Callee;
247706c3fb27SDimitry Andric   NodeOwner.push_back(
247806c3fb27SDimitry Andric       std::make_unique<ContextNode>(Node->IsAllocation, Node->Call));
247906c3fb27SDimitry Andric   ContextNode *Clone = NodeOwner.back().get();
248006c3fb27SDimitry Andric   Node->addClone(Clone);
248106c3fb27SDimitry Andric   assert(NodeToCallingFunc.count(Node));
248206c3fb27SDimitry Andric   NodeToCallingFunc[Clone] = NodeToCallingFunc[Node];
2483*0fca6ea1SDimitry Andric   moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true,
2484*0fca6ea1SDimitry Andric                                 ContextIdsToMove);
248506c3fb27SDimitry Andric   return Clone;
248606c3fb27SDimitry Andric }
248706c3fb27SDimitry Andric 
248806c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
248906c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
249006c3fb27SDimitry Andric     moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
249106c3fb27SDimitry Andric                                   ContextNode *NewCallee, EdgeIter *CallerEdgeI,
2492*0fca6ea1SDimitry Andric                                   bool NewClone,
2493*0fca6ea1SDimitry Andric                                   DenseSet<uint32_t> ContextIdsToMove) {
249406c3fb27SDimitry Andric   // NewCallee and Edge's current callee must be clones of the same original
249506c3fb27SDimitry Andric   // node (Edge's current callee may be the original node too).
249606c3fb27SDimitry Andric   assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
2497*0fca6ea1SDimitry Andric 
249806c3fb27SDimitry Andric   ContextNode *OldCallee = Edge->Callee;
2499*0fca6ea1SDimitry Andric 
2500*0fca6ea1SDimitry Andric   // We might already have an edge to the new callee from earlier cloning for a
2501*0fca6ea1SDimitry Andric   // different allocation. If one exists we will reuse it.
2502*0fca6ea1SDimitry Andric   auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(Edge->Caller);
2503*0fca6ea1SDimitry Andric 
2504*0fca6ea1SDimitry Andric   // Callers will pass an empty ContextIdsToMove set when they want to move the
2505*0fca6ea1SDimitry Andric   // edge. Copy in Edge's ids for simplicity.
2506*0fca6ea1SDimitry Andric   if (ContextIdsToMove.empty())
2507*0fca6ea1SDimitry Andric     ContextIdsToMove = Edge->getContextIds();
2508*0fca6ea1SDimitry Andric 
2509*0fca6ea1SDimitry Andric   // If we are moving all of Edge's ids, then just move the whole Edge.
2510*0fca6ea1SDimitry Andric   // Otherwise only move the specified subset, to a new edge if needed.
2511*0fca6ea1SDimitry Andric   if (Edge->getContextIds().size() == ContextIdsToMove.size()) {
2512*0fca6ea1SDimitry Andric     // Moving the whole Edge.
251306c3fb27SDimitry Andric     if (CallerEdgeI)
251406c3fb27SDimitry Andric       *CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI);
251506c3fb27SDimitry Andric     else
251606c3fb27SDimitry Andric       OldCallee->eraseCallerEdge(Edge.get());
2517*0fca6ea1SDimitry Andric     if (ExistingEdgeToNewCallee) {
2518*0fca6ea1SDimitry Andric       // Since we already have an edge to NewCallee, simply move the ids
2519*0fca6ea1SDimitry Andric       // onto it, and remove the existing Edge.
2520*0fca6ea1SDimitry Andric       ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.begin(),
2521*0fca6ea1SDimitry Andric                                                       ContextIdsToMove.end());
2522*0fca6ea1SDimitry Andric       ExistingEdgeToNewCallee->AllocTypes |= Edge->AllocTypes;
2523*0fca6ea1SDimitry Andric       assert(Edge->ContextIds == ContextIdsToMove);
2524*0fca6ea1SDimitry Andric       Edge->ContextIds.clear();
2525*0fca6ea1SDimitry Andric       Edge->AllocTypes = (uint8_t)AllocationType::None;
2526*0fca6ea1SDimitry Andric       Edge->Caller->eraseCalleeEdge(Edge.get());
2527*0fca6ea1SDimitry Andric     } else {
2528*0fca6ea1SDimitry Andric       // Otherwise just reconnect Edge to NewCallee.
252906c3fb27SDimitry Andric       Edge->Callee = NewCallee;
253006c3fb27SDimitry Andric       NewCallee->CallerEdges.push_back(Edge);
2531*0fca6ea1SDimitry Andric       // Don't need to update Edge's context ids since we are simply
2532*0fca6ea1SDimitry Andric       // reconnecting it.
2533*0fca6ea1SDimitry Andric     }
2534*0fca6ea1SDimitry Andric     // In either case, need to update the alloc types on New Callee.
253506c3fb27SDimitry Andric     NewCallee->AllocTypes |= Edge->AllocTypes;
2536*0fca6ea1SDimitry Andric   } else {
2537*0fca6ea1SDimitry Andric     // Only moving a subset of Edge's ids.
2538*0fca6ea1SDimitry Andric     if (CallerEdgeI)
2539*0fca6ea1SDimitry Andric       ++CallerEdgeI;
2540*0fca6ea1SDimitry Andric     // Compute the alloc type of the subset of ids being moved.
2541*0fca6ea1SDimitry Andric     auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
2542*0fca6ea1SDimitry Andric     if (ExistingEdgeToNewCallee) {
2543*0fca6ea1SDimitry Andric       // Since we already have an edge to NewCallee, simply move the ids
2544*0fca6ea1SDimitry Andric       // onto it.
2545*0fca6ea1SDimitry Andric       ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.begin(),
2546*0fca6ea1SDimitry Andric                                                       ContextIdsToMove.end());
2547*0fca6ea1SDimitry Andric       ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
2548*0fca6ea1SDimitry Andric     } else {
2549*0fca6ea1SDimitry Andric       // Otherwise, create a new edge to NewCallee for the ids being moved.
2550*0fca6ea1SDimitry Andric       auto NewEdge = std::make_shared<ContextEdge>(
2551*0fca6ea1SDimitry Andric           NewCallee, Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
2552*0fca6ea1SDimitry Andric       Edge->Caller->CalleeEdges.push_back(NewEdge);
2553*0fca6ea1SDimitry Andric       NewCallee->CallerEdges.push_back(NewEdge);
2554*0fca6ea1SDimitry Andric     }
2555*0fca6ea1SDimitry Andric     // In either case, need to update the alloc types on NewCallee, and remove
2556*0fca6ea1SDimitry Andric     // those ids and update the alloc type on the original Edge.
2557*0fca6ea1SDimitry Andric     NewCallee->AllocTypes |= CallerEdgeAllocType;
2558*0fca6ea1SDimitry Andric     set_subtract(Edge->ContextIds, ContextIdsToMove);
2559*0fca6ea1SDimitry Andric     Edge->AllocTypes = computeAllocType(Edge->ContextIds);
2560*0fca6ea1SDimitry Andric   }
256106c3fb27SDimitry Andric   // Now walk the old callee node's callee edges and move Edge's context ids
256206c3fb27SDimitry Andric   // over to the corresponding edge into the clone (which is created here if
256306c3fb27SDimitry Andric   // this is a newly created clone).
256406c3fb27SDimitry Andric   for (auto &OldCalleeEdge : OldCallee->CalleeEdges) {
256506c3fb27SDimitry Andric     // The context ids moving to the new callee are the subset of this edge's
256606c3fb27SDimitry Andric     // context ids and the context ids on the caller edge being moved.
256706c3fb27SDimitry Andric     DenseSet<uint32_t> EdgeContextIdsToMove =
2568*0fca6ea1SDimitry Andric         set_intersection(OldCalleeEdge->getContextIds(), ContextIdsToMove);
256906c3fb27SDimitry Andric     set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
257006c3fb27SDimitry Andric     OldCalleeEdge->AllocTypes =
257106c3fb27SDimitry Andric         computeAllocType(OldCalleeEdge->getContextIds());
257206c3fb27SDimitry Andric     if (!NewClone) {
257306c3fb27SDimitry Andric       // Update context ids / alloc type on corresponding edge to NewCallee.
257406c3fb27SDimitry Andric       // There is a chance this may not exist if we are reusing an existing
257506c3fb27SDimitry Andric       // clone, specifically during function assignment, where we would have
257606c3fb27SDimitry Andric       // removed none type edges after creating the clone. If we can't find
257706c3fb27SDimitry Andric       // a corresponding edge there, fall through to the cloning below.
257806c3fb27SDimitry Andric       if (auto *NewCalleeEdge =
257906c3fb27SDimitry Andric               NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) {
258006c3fb27SDimitry Andric         NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.begin(),
258106c3fb27SDimitry Andric                                               EdgeContextIdsToMove.end());
258206c3fb27SDimitry Andric         NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
258306c3fb27SDimitry Andric         continue;
258406c3fb27SDimitry Andric       }
258506c3fb27SDimitry Andric     }
258606c3fb27SDimitry Andric     auto NewEdge = std::make_shared<ContextEdge>(
258706c3fb27SDimitry Andric         OldCalleeEdge->Callee, NewCallee,
258806c3fb27SDimitry Andric         computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
258906c3fb27SDimitry Andric     NewCallee->CalleeEdges.push_back(NewEdge);
259006c3fb27SDimitry Andric     NewEdge->Callee->CallerEdges.push_back(NewEdge);
259106c3fb27SDimitry Andric   }
2592*0fca6ea1SDimitry Andric   // Recompute the node alloc type now that its callee edges have been
2593*0fca6ea1SDimitry Andric   // updated (since we will compute from those edges).
2594*0fca6ea1SDimitry Andric   OldCallee->AllocTypes = OldCallee->computeAllocType();
2595*0fca6ea1SDimitry Andric   // OldCallee alloc type should be None iff its context id set is now empty.
2596*0fca6ea1SDimitry Andric   assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
2597*0fca6ea1SDimitry Andric          OldCallee->emptyContextIds());
259806c3fb27SDimitry Andric   if (VerifyCCG) {
259906c3fb27SDimitry Andric     checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee, /*CheckEdges=*/false);
260006c3fb27SDimitry Andric     checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee, /*CheckEdges=*/false);
260106c3fb27SDimitry Andric     for (const auto &OldCalleeEdge : OldCallee->CalleeEdges)
260206c3fb27SDimitry Andric       checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee,
260306c3fb27SDimitry Andric                                             /*CheckEdges=*/false);
260406c3fb27SDimitry Andric     for (const auto &NewCalleeEdge : NewCallee->CalleeEdges)
260506c3fb27SDimitry Andric       checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee,
260606c3fb27SDimitry Andric                                             /*CheckEdges=*/false);
260706c3fb27SDimitry Andric   }
260806c3fb27SDimitry Andric }
260906c3fb27SDimitry Andric 
261006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
2611*0fca6ea1SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
2612*0fca6ea1SDimitry Andric     recursivelyRemoveNoneTypeCalleeEdges(
2613*0fca6ea1SDimitry Andric         ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
2614*0fca6ea1SDimitry Andric   auto Inserted = Visited.insert(Node);
2615*0fca6ea1SDimitry Andric   if (!Inserted.second)
2616*0fca6ea1SDimitry Andric     return;
2617*0fca6ea1SDimitry Andric 
2618*0fca6ea1SDimitry Andric   removeNoneTypeCalleeEdges(Node);
2619*0fca6ea1SDimitry Andric 
2620*0fca6ea1SDimitry Andric   for (auto *Clone : Node->Clones)
2621*0fca6ea1SDimitry Andric     recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
2622*0fca6ea1SDimitry Andric 
2623*0fca6ea1SDimitry Andric   // The recursive call may remove some of this Node's caller edges.
2624*0fca6ea1SDimitry Andric   // Iterate over a copy and skip any that were removed.
2625*0fca6ea1SDimitry Andric   auto CallerEdges = Node->CallerEdges;
2626*0fca6ea1SDimitry Andric   for (auto &Edge : CallerEdges) {
2627*0fca6ea1SDimitry Andric     // Skip any that have been removed by an earlier recursive call.
2628*0fca6ea1SDimitry Andric     if (Edge->Callee == nullptr && Edge->Caller == nullptr) {
2629*0fca6ea1SDimitry Andric       assert(!is_contained(Node->CallerEdges, Edge));
2630*0fca6ea1SDimitry Andric       continue;
2631*0fca6ea1SDimitry Andric     }
2632*0fca6ea1SDimitry Andric     recursivelyRemoveNoneTypeCalleeEdges(Edge->Caller, Visited);
2633*0fca6ea1SDimitry Andric   }
2634*0fca6ea1SDimitry Andric }
2635*0fca6ea1SDimitry Andric 
2636*0fca6ea1SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
263706c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
263806c3fb27SDimitry Andric   DenseSet<const ContextNode *> Visited;
2639*0fca6ea1SDimitry Andric   for (auto &Entry : AllocationCallToContextNodeMap) {
2640*0fca6ea1SDimitry Andric     Visited.clear();
2641*0fca6ea1SDimitry Andric     identifyClones(Entry.second, Visited, Entry.second->getContextIds());
2642*0fca6ea1SDimitry Andric   }
2643*0fca6ea1SDimitry Andric   Visited.clear();
264406c3fb27SDimitry Andric   for (auto &Entry : AllocationCallToContextNodeMap)
2645*0fca6ea1SDimitry Andric     recursivelyRemoveNoneTypeCalleeEdges(Entry.second, Visited);
2646*0fca6ea1SDimitry Andric   if (VerifyCCG)
2647*0fca6ea1SDimitry Andric     check();
264806c3fb27SDimitry Andric }
264906c3fb27SDimitry Andric 
265006c3fb27SDimitry Andric // helper function to check an AllocType is cold or notcold or both.
265106c3fb27SDimitry Andric bool checkColdOrNotCold(uint8_t AllocType) {
265206c3fb27SDimitry Andric   return (AllocType == (uint8_t)AllocationType::Cold) ||
265306c3fb27SDimitry Andric          (AllocType == (uint8_t)AllocationType::NotCold) ||
265406c3fb27SDimitry Andric          (AllocType ==
265506c3fb27SDimitry Andric           ((uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold));
265606c3fb27SDimitry Andric }
265706c3fb27SDimitry Andric 
265806c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
265906c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
2660*0fca6ea1SDimitry Andric     ContextNode *Node, DenseSet<const ContextNode *> &Visited,
2661*0fca6ea1SDimitry Andric     const DenseSet<uint32_t> &AllocContextIds) {
266206c3fb27SDimitry Andric   if (VerifyNodes)
2663*0fca6ea1SDimitry Andric     checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false);
266406c3fb27SDimitry Andric   assert(!Node->CloneOf);
266506c3fb27SDimitry Andric 
266606c3fb27SDimitry Andric   // If Node as a null call, then either it wasn't found in the module (regular
266706c3fb27SDimitry Andric   // LTO) or summary index (ThinLTO), or there were other conditions blocking
266806c3fb27SDimitry Andric   // cloning (e.g. recursion, calls multiple targets, etc).
266906c3fb27SDimitry Andric   // Do this here so that we don't try to recursively clone callers below, which
267006c3fb27SDimitry Andric   // isn't useful at least for this node.
267106c3fb27SDimitry Andric   if (!Node->hasCall())
267206c3fb27SDimitry Andric     return;
267306c3fb27SDimitry Andric 
267406c3fb27SDimitry Andric #ifndef NDEBUG
267506c3fb27SDimitry Andric   auto Insert =
267606c3fb27SDimitry Andric #endif
267706c3fb27SDimitry Andric       Visited.insert(Node);
267806c3fb27SDimitry Andric   // We should not have visited this node yet.
267906c3fb27SDimitry Andric   assert(Insert.second);
268006c3fb27SDimitry Andric   // The recursive call to identifyClones may delete the current edge from the
268106c3fb27SDimitry Andric   // CallerEdges vector. Make a copy and iterate on that, simpler than passing
268206c3fb27SDimitry Andric   // in an iterator and having recursive call erase from it. Other edges may
268306c3fb27SDimitry Andric   // also get removed during the recursion, which will have null Callee and
268406c3fb27SDimitry Andric   // Caller pointers (and are deleted later), so we skip those below.
268506c3fb27SDimitry Andric   {
268606c3fb27SDimitry Andric     auto CallerEdges = Node->CallerEdges;
268706c3fb27SDimitry Andric     for (auto &Edge : CallerEdges) {
268806c3fb27SDimitry Andric       // Skip any that have been removed by an earlier recursive call.
268906c3fb27SDimitry Andric       if (Edge->Callee == nullptr && Edge->Caller == nullptr) {
26905f757f3fSDimitry Andric         assert(!llvm::count(Node->CallerEdges, Edge));
269106c3fb27SDimitry Andric         continue;
269206c3fb27SDimitry Andric       }
269306c3fb27SDimitry Andric       // Ignore any caller we previously visited via another edge.
269406c3fb27SDimitry Andric       if (!Visited.count(Edge->Caller) && !Edge->Caller->CloneOf) {
2695*0fca6ea1SDimitry Andric         identifyClones(Edge->Caller, Visited, AllocContextIds);
269606c3fb27SDimitry Andric       }
269706c3fb27SDimitry Andric     }
269806c3fb27SDimitry Andric   }
269906c3fb27SDimitry Andric 
270006c3fb27SDimitry Andric   // Check if we reached an unambiguous call or have have only a single caller.
270106c3fb27SDimitry Andric   if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
270206c3fb27SDimitry Andric     return;
270306c3fb27SDimitry Andric 
270406c3fb27SDimitry Andric   // We need to clone.
270506c3fb27SDimitry Andric 
270606c3fb27SDimitry Andric   // Try to keep the original version as alloc type NotCold. This will make
270706c3fb27SDimitry Andric   // cases with indirect calls or any other situation with an unknown call to
270806c3fb27SDimitry Andric   // the original function get the default behavior. We do this by sorting the
270906c3fb27SDimitry Andric   // CallerEdges of the Node we will clone by alloc type.
271006c3fb27SDimitry Andric   //
271106c3fb27SDimitry Andric   // Give NotCold edge the lowest sort priority so those edges are at the end of
271206c3fb27SDimitry Andric   // the caller edges vector, and stay on the original version (since the below
271306c3fb27SDimitry Andric   // code clones greedily until it finds all remaining edges have the same type
271406c3fb27SDimitry Andric   // and leaves the remaining ones on the original Node).
271506c3fb27SDimitry Andric   //
271606c3fb27SDimitry Andric   // We shouldn't actually have any None type edges, so the sorting priority for
271706c3fb27SDimitry Andric   // that is arbitrary, and we assert in that case below.
271806c3fb27SDimitry Andric   const unsigned AllocTypeCloningPriority[] = {/*None*/ 3, /*NotCold*/ 4,
271906c3fb27SDimitry Andric                                                /*Cold*/ 1,
272006c3fb27SDimitry Andric                                                /*NotColdCold*/ 2};
272106c3fb27SDimitry Andric   std::stable_sort(Node->CallerEdges.begin(), Node->CallerEdges.end(),
272206c3fb27SDimitry Andric                    [&](const std::shared_ptr<ContextEdge> &A,
272306c3fb27SDimitry Andric                        const std::shared_ptr<ContextEdge> &B) {
2724*0fca6ea1SDimitry Andric                      // Nodes with non-empty context ids should be sorted before
2725*0fca6ea1SDimitry Andric                      // those with empty context ids.
2726*0fca6ea1SDimitry Andric                      if (A->ContextIds.empty())
2727*0fca6ea1SDimitry Andric                        // Either B ContextIds are non-empty (in which case we
2728*0fca6ea1SDimitry Andric                        // should return false because B < A), or B ContextIds
2729*0fca6ea1SDimitry Andric                        // are empty, in which case they are equal, and we should
2730*0fca6ea1SDimitry Andric                        // maintain the original relative ordering.
2731*0fca6ea1SDimitry Andric                        return false;
2732*0fca6ea1SDimitry Andric                      if (B->ContextIds.empty())
2733*0fca6ea1SDimitry Andric                        return true;
273406c3fb27SDimitry Andric 
273506c3fb27SDimitry Andric                      if (A->AllocTypes == B->AllocTypes)
273606c3fb27SDimitry Andric                        // Use the first context id for each edge as a
273706c3fb27SDimitry Andric                        // tie-breaker.
273806c3fb27SDimitry Andric                        return *A->ContextIds.begin() < *B->ContextIds.begin();
273906c3fb27SDimitry Andric                      return AllocTypeCloningPriority[A->AllocTypes] <
274006c3fb27SDimitry Andric                             AllocTypeCloningPriority[B->AllocTypes];
274106c3fb27SDimitry Andric                    });
274206c3fb27SDimitry Andric 
274306c3fb27SDimitry Andric   assert(Node->AllocTypes != (uint8_t)AllocationType::None);
274406c3fb27SDimitry Andric 
274506c3fb27SDimitry Andric   // Iterate until we find no more opportunities for disambiguating the alloc
274606c3fb27SDimitry Andric   // types via cloning. In most cases this loop will terminate once the Node
274706c3fb27SDimitry Andric   // has a single allocation type, in which case no more cloning is needed.
274806c3fb27SDimitry Andric   // We need to be able to remove Edge from CallerEdges, so need to adjust
274906c3fb27SDimitry Andric   // iterator inside the loop.
275006c3fb27SDimitry Andric   for (auto EI = Node->CallerEdges.begin(); EI != Node->CallerEdges.end();) {
275106c3fb27SDimitry Andric     auto CallerEdge = *EI;
275206c3fb27SDimitry Andric 
275306c3fb27SDimitry Andric     // See if cloning the prior caller edge left this node with a single alloc
275406c3fb27SDimitry Andric     // type or a single caller. In that case no more cloning of Node is needed.
275506c3fb27SDimitry Andric     if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
275606c3fb27SDimitry Andric       break;
275706c3fb27SDimitry Andric 
2758*0fca6ea1SDimitry Andric     // Only need to process the ids along this edge pertaining to the given
2759*0fca6ea1SDimitry Andric     // allocation.
2760*0fca6ea1SDimitry Andric     auto CallerEdgeContextsForAlloc =
2761*0fca6ea1SDimitry Andric         set_intersection(CallerEdge->getContextIds(), AllocContextIds);
2762*0fca6ea1SDimitry Andric     if (CallerEdgeContextsForAlloc.empty()) {
2763*0fca6ea1SDimitry Andric       ++EI;
2764*0fca6ea1SDimitry Andric       continue;
2765*0fca6ea1SDimitry Andric     }
2766*0fca6ea1SDimitry Andric     auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
2767*0fca6ea1SDimitry Andric 
276806c3fb27SDimitry Andric     // Compute the node callee edge alloc types corresponding to the context ids
276906c3fb27SDimitry Andric     // for this caller edge.
277006c3fb27SDimitry Andric     std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
277106c3fb27SDimitry Andric     CalleeEdgeAllocTypesForCallerEdge.reserve(Node->CalleeEdges.size());
277206c3fb27SDimitry Andric     for (auto &CalleeEdge : Node->CalleeEdges)
277306c3fb27SDimitry Andric       CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
2774*0fca6ea1SDimitry Andric           CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
277506c3fb27SDimitry Andric 
277606c3fb27SDimitry Andric     // Don't clone if doing so will not disambiguate any alloc types amongst
277706c3fb27SDimitry Andric     // caller edges (including the callee edges that would be cloned).
277806c3fb27SDimitry Andric     // Otherwise we will simply move all edges to the clone.
277906c3fb27SDimitry Andric     //
278006c3fb27SDimitry Andric     // First check if by cloning we will disambiguate the caller allocation
278106c3fb27SDimitry Andric     // type from node's allocation type. Query allocTypeToUse so that we don't
278206c3fb27SDimitry Andric     // bother cloning to distinguish NotCold+Cold from NotCold. Note that
278306c3fb27SDimitry Andric     // neither of these should be None type.
278406c3fb27SDimitry Andric     //
278506c3fb27SDimitry Andric     // Then check if by cloning node at least one of the callee edges will be
278606c3fb27SDimitry Andric     // disambiguated by splitting out different context ids.
278706c3fb27SDimitry Andric     assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
278806c3fb27SDimitry Andric     assert(Node->AllocTypes != (uint8_t)AllocationType::None);
2789*0fca6ea1SDimitry Andric     if (allocTypeToUse(CallerAllocTypeForAlloc) ==
279006c3fb27SDimitry Andric             allocTypeToUse(Node->AllocTypes) &&
279106c3fb27SDimitry Andric         allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
279206c3fb27SDimitry Andric             CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) {
279306c3fb27SDimitry Andric       ++EI;
279406c3fb27SDimitry Andric       continue;
279506c3fb27SDimitry Andric     }
279606c3fb27SDimitry Andric 
279706c3fb27SDimitry Andric     // First see if we can use an existing clone. Check each clone and its
279806c3fb27SDimitry Andric     // callee edges for matching alloc types.
279906c3fb27SDimitry Andric     ContextNode *Clone = nullptr;
280006c3fb27SDimitry Andric     for (auto *CurClone : Node->Clones) {
280106c3fb27SDimitry Andric       if (allocTypeToUse(CurClone->AllocTypes) !=
2802*0fca6ea1SDimitry Andric           allocTypeToUse(CallerAllocTypeForAlloc))
280306c3fb27SDimitry Andric         continue;
280406c3fb27SDimitry Andric 
280506c3fb27SDimitry Andric       if (!allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
280606c3fb27SDimitry Andric               CalleeEdgeAllocTypesForCallerEdge, CurClone->CalleeEdges))
280706c3fb27SDimitry Andric         continue;
280806c3fb27SDimitry Andric       Clone = CurClone;
280906c3fb27SDimitry Andric       break;
281006c3fb27SDimitry Andric     }
281106c3fb27SDimitry Andric 
281206c3fb27SDimitry Andric     // The edge iterator is adjusted when we move the CallerEdge to the clone.
281306c3fb27SDimitry Andric     if (Clone)
2814*0fca6ea1SDimitry Andric       moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI, /*NewClone=*/false,
2815*0fca6ea1SDimitry Andric                                     CallerEdgeContextsForAlloc);
281606c3fb27SDimitry Andric     else
2817*0fca6ea1SDimitry Andric       Clone =
2818*0fca6ea1SDimitry Andric           moveEdgeToNewCalleeClone(CallerEdge, &EI, CallerEdgeContextsForAlloc);
281906c3fb27SDimitry Andric 
282006c3fb27SDimitry Andric     assert(EI == Node->CallerEdges.end() ||
282106c3fb27SDimitry Andric            Node->AllocTypes != (uint8_t)AllocationType::None);
282206c3fb27SDimitry Andric     // Sanity check that no alloc types on clone or its edges are None.
282306c3fb27SDimitry Andric     assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
282406c3fb27SDimitry Andric   }
282506c3fb27SDimitry Andric 
282606c3fb27SDimitry Andric   // We should still have some context ids on the original Node.
2827*0fca6ea1SDimitry Andric   assert(!Node->emptyContextIds());
282806c3fb27SDimitry Andric 
282906c3fb27SDimitry Andric   // Sanity check that no alloc types on node or edges are None.
283006c3fb27SDimitry Andric   assert(Node->AllocTypes != (uint8_t)AllocationType::None);
283106c3fb27SDimitry Andric 
283206c3fb27SDimitry Andric   if (VerifyNodes)
2833*0fca6ea1SDimitry Andric     checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false);
283406c3fb27SDimitry Andric }
283506c3fb27SDimitry Andric 
283606c3fb27SDimitry Andric void ModuleCallsiteContextGraph::updateAllocationCall(
283706c3fb27SDimitry Andric     CallInfo &Call, AllocationType AllocType) {
283806c3fb27SDimitry Andric   std::string AllocTypeString = getAllocTypeAttributeString(AllocType);
283906c3fb27SDimitry Andric   auto A = llvm::Attribute::get(Call.call()->getFunction()->getContext(),
284006c3fb27SDimitry Andric                                 "memprof", AllocTypeString);
284106c3fb27SDimitry Andric   cast<CallBase>(Call.call())->addFnAttr(A);
284206c3fb27SDimitry Andric   OREGetter(Call.call()->getFunction())
284306c3fb27SDimitry Andric       .emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", Call.call())
284406c3fb27SDimitry Andric             << ore::NV("AllocationCall", Call.call()) << " in clone "
284506c3fb27SDimitry Andric             << ore::NV("Caller", Call.call()->getFunction())
284606c3fb27SDimitry Andric             << " marked with memprof allocation attribute "
284706c3fb27SDimitry Andric             << ore::NV("Attribute", AllocTypeString));
284806c3fb27SDimitry Andric }
284906c3fb27SDimitry Andric 
285006c3fb27SDimitry Andric void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &Call,
285106c3fb27SDimitry Andric                                                      AllocationType AllocType) {
285206c3fb27SDimitry Andric   auto *AI = Call.call().dyn_cast<AllocInfo *>();
285306c3fb27SDimitry Andric   assert(AI);
285406c3fb27SDimitry Andric   assert(AI->Versions.size() > Call.cloneNo());
285506c3fb27SDimitry Andric   AI->Versions[Call.cloneNo()] = (uint8_t)AllocType;
285606c3fb27SDimitry Andric }
285706c3fb27SDimitry Andric 
285806c3fb27SDimitry Andric void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
285906c3fb27SDimitry Andric                                             FuncInfo CalleeFunc) {
286006c3fb27SDimitry Andric   if (CalleeFunc.cloneNo() > 0)
286106c3fb27SDimitry Andric     cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
286206c3fb27SDimitry Andric   OREGetter(CallerCall.call()->getFunction())
286306c3fb27SDimitry Andric       .emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call())
286406c3fb27SDimitry Andric             << ore::NV("Call", CallerCall.call()) << " in clone "
286506c3fb27SDimitry Andric             << ore::NV("Caller", CallerCall.call()->getFunction())
286606c3fb27SDimitry Andric             << " assigned to call function clone "
286706c3fb27SDimitry Andric             << ore::NV("Callee", CalleeFunc.func()));
286806c3fb27SDimitry Andric }
286906c3fb27SDimitry Andric 
287006c3fb27SDimitry Andric void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
287106c3fb27SDimitry Andric                                            FuncInfo CalleeFunc) {
287206c3fb27SDimitry Andric   auto *CI = CallerCall.call().dyn_cast<CallsiteInfo *>();
287306c3fb27SDimitry Andric   assert(CI &&
287406c3fb27SDimitry Andric          "Caller cannot be an allocation which should not have profiled calls");
287506c3fb27SDimitry Andric   assert(CI->Clones.size() > CallerCall.cloneNo());
287606c3fb27SDimitry Andric   CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
287706c3fb27SDimitry Andric }
287806c3fb27SDimitry Andric 
287906c3fb27SDimitry Andric CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
288006c3fb27SDimitry Andric                      Instruction *>::FuncInfo
288106c3fb27SDimitry Andric ModuleCallsiteContextGraph::cloneFunctionForCallsite(
288206c3fb27SDimitry Andric     FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
288306c3fb27SDimitry Andric     std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) {
288406c3fb27SDimitry Andric   // Use existing LLVM facilities for cloning and obtaining Call in clone
288506c3fb27SDimitry Andric   ValueToValueMapTy VMap;
288606c3fb27SDimitry Andric   auto *NewFunc = CloneFunction(Func.func(), VMap);
288706c3fb27SDimitry Andric   std::string Name = getMemProfFuncName(Func.func()->getName(), CloneNo);
288806c3fb27SDimitry Andric   assert(!Func.func()->getParent()->getFunction(Name));
288906c3fb27SDimitry Andric   NewFunc->setName(Name);
289006c3fb27SDimitry Andric   for (auto &Inst : CallsWithMetadataInFunc) {
289106c3fb27SDimitry Andric     // This map always has the initial version in it.
289206c3fb27SDimitry Andric     assert(Inst.cloneNo() == 0);
289306c3fb27SDimitry Andric     CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo};
289406c3fb27SDimitry Andric   }
289506c3fb27SDimitry Andric   OREGetter(Func.func())
289606c3fb27SDimitry Andric       .emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", Func.func())
289706c3fb27SDimitry Andric             << "created clone " << ore::NV("NewFunction", NewFunc));
289806c3fb27SDimitry Andric   return {NewFunc, CloneNo};
289906c3fb27SDimitry Andric }
290006c3fb27SDimitry Andric 
290106c3fb27SDimitry Andric CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
290206c3fb27SDimitry Andric                      IndexCall>::FuncInfo
290306c3fb27SDimitry Andric IndexCallsiteContextGraph::cloneFunctionForCallsite(
290406c3fb27SDimitry Andric     FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
290506c3fb27SDimitry Andric     std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) {
290606c3fb27SDimitry Andric   // Check how many clones we have of Call (and therefore function).
290706c3fb27SDimitry Andric   // The next clone number is the current size of versions array.
290806c3fb27SDimitry Andric   // Confirm this matches the CloneNo provided by the caller, which is based on
290906c3fb27SDimitry Andric   // the number of function clones we have.
291006c3fb27SDimitry Andric   assert(CloneNo ==
291106c3fb27SDimitry Andric          (Call.call().is<AllocInfo *>()
291206c3fb27SDimitry Andric               ? Call.call().dyn_cast<AllocInfo *>()->Versions.size()
291306c3fb27SDimitry Andric               : Call.call().dyn_cast<CallsiteInfo *>()->Clones.size()));
291406c3fb27SDimitry Andric   // Walk all the instructions in this function. Create a new version for
291506c3fb27SDimitry Andric   // each (by adding an entry to the Versions/Clones summary array), and copy
291606c3fb27SDimitry Andric   // over the version being called for the function clone being cloned here.
291706c3fb27SDimitry Andric   // Additionally, add an entry to the CallMap for the new function clone,
291806c3fb27SDimitry Andric   // mapping the original call (clone 0, what is in CallsWithMetadataInFunc)
291906c3fb27SDimitry Andric   // to the new call clone.
292006c3fb27SDimitry Andric   for (auto &Inst : CallsWithMetadataInFunc) {
292106c3fb27SDimitry Andric     // This map always has the initial version in it.
292206c3fb27SDimitry Andric     assert(Inst.cloneNo() == 0);
292306c3fb27SDimitry Andric     if (auto *AI = Inst.call().dyn_cast<AllocInfo *>()) {
292406c3fb27SDimitry Andric       assert(AI->Versions.size() == CloneNo);
292506c3fb27SDimitry Andric       // We assign the allocation type later (in updateAllocationCall), just add
292606c3fb27SDimitry Andric       // an entry for it here.
292706c3fb27SDimitry Andric       AI->Versions.push_back(0);
292806c3fb27SDimitry Andric     } else {
292906c3fb27SDimitry Andric       auto *CI = Inst.call().dyn_cast<CallsiteInfo *>();
293006c3fb27SDimitry Andric       assert(CI && CI->Clones.size() == CloneNo);
293106c3fb27SDimitry Andric       // We assign the clone number later (in updateCall), just add an entry for
293206c3fb27SDimitry Andric       // it here.
293306c3fb27SDimitry Andric       CI->Clones.push_back(0);
293406c3fb27SDimitry Andric     }
293506c3fb27SDimitry Andric     CallMap[Inst] = {Inst.call(), CloneNo};
293606c3fb27SDimitry Andric   }
293706c3fb27SDimitry Andric   return {Func.func(), CloneNo};
293806c3fb27SDimitry Andric }
293906c3fb27SDimitry Andric 
294006c3fb27SDimitry Andric // This method assigns cloned callsites to functions, cloning the functions as
294106c3fb27SDimitry Andric // needed. The assignment is greedy and proceeds roughly as follows:
294206c3fb27SDimitry Andric //
294306c3fb27SDimitry Andric // For each function Func:
294406c3fb27SDimitry Andric //   For each call with graph Node having clones:
294506c3fb27SDimitry Andric //     Initialize ClonesWorklist to Node and its clones
294606c3fb27SDimitry Andric //     Initialize NodeCloneCount to 0
294706c3fb27SDimitry Andric //     While ClonesWorklist is not empty:
294806c3fb27SDimitry Andric //        Clone = pop front ClonesWorklist
294906c3fb27SDimitry Andric //        NodeCloneCount++
295006c3fb27SDimitry Andric //        If Func has been cloned less than NodeCloneCount times:
295106c3fb27SDimitry Andric //           If NodeCloneCount is 1:
295206c3fb27SDimitry Andric //             Assign Clone to original Func
295306c3fb27SDimitry Andric //             Continue
295406c3fb27SDimitry Andric //           Create a new function clone
295506c3fb27SDimitry Andric //           If other callers not assigned to call a function clone yet:
295606c3fb27SDimitry Andric //              Assign them to call new function clone
295706c3fb27SDimitry Andric //              Continue
295806c3fb27SDimitry Andric //           Assign any other caller calling the cloned version to new clone
295906c3fb27SDimitry Andric //
296006c3fb27SDimitry Andric //        For each caller of Clone:
296106c3fb27SDimitry Andric //           If caller is assigned to call a specific function clone:
296206c3fb27SDimitry Andric //             If we cannot assign Clone to that function clone:
296306c3fb27SDimitry Andric //               Create new callsite Clone NewClone
296406c3fb27SDimitry Andric //               Add NewClone to ClonesWorklist
296506c3fb27SDimitry Andric //               Continue
296606c3fb27SDimitry Andric //             Assign Clone to existing caller's called function clone
296706c3fb27SDimitry Andric //           Else:
296806c3fb27SDimitry Andric //             If Clone not already assigned to a function clone:
296906c3fb27SDimitry Andric //                Assign to first function clone without assignment
297006c3fb27SDimitry Andric //             Assign caller to selected function clone
297106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
297206c3fb27SDimitry Andric bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
297306c3fb27SDimitry Andric   bool Changed = false;
297406c3fb27SDimitry Andric 
297506c3fb27SDimitry Andric   // Keep track of the assignment of nodes (callsites) to function clones they
297606c3fb27SDimitry Andric   // call.
297706c3fb27SDimitry Andric   DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
297806c3fb27SDimitry Andric 
297906c3fb27SDimitry Andric   // Update caller node to call function version CalleeFunc, by recording the
298006c3fb27SDimitry Andric   // assignment in CallsiteToCalleeFuncCloneMap.
298106c3fb27SDimitry Andric   auto RecordCalleeFuncOfCallsite = [&](ContextNode *Caller,
298206c3fb27SDimitry Andric                                         const FuncInfo &CalleeFunc) {
298306c3fb27SDimitry Andric     assert(Caller->hasCall());
298406c3fb27SDimitry Andric     CallsiteToCalleeFuncCloneMap[Caller] = CalleeFunc;
298506c3fb27SDimitry Andric   };
298606c3fb27SDimitry Andric 
298706c3fb27SDimitry Andric   // Walk all functions for which we saw calls with memprof metadata, and handle
298806c3fb27SDimitry Andric   // cloning for each of its calls.
298906c3fb27SDimitry Andric   for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
299006c3fb27SDimitry Andric     FuncInfo OrigFunc(Func);
299106c3fb27SDimitry Andric     // Map from each clone of OrigFunc to a map of remappings of each call of
299206c3fb27SDimitry Andric     // interest (from original uncloned call to the corresponding cloned call in
299306c3fb27SDimitry Andric     // that function clone).
299406c3fb27SDimitry Andric     std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap;
299506c3fb27SDimitry Andric     for (auto &Call : CallsWithMetadata) {
299606c3fb27SDimitry Andric       ContextNode *Node = getNodeForInst(Call);
299706c3fb27SDimitry Andric       // Skip call if we do not have a node for it (all uses of its stack ids
299806c3fb27SDimitry Andric       // were either on inlined chains or pruned from the MIBs), or if we did
299906c3fb27SDimitry Andric       // not create any clones for it.
300006c3fb27SDimitry Andric       if (!Node || Node->Clones.empty())
300106c3fb27SDimitry Andric         continue;
300206c3fb27SDimitry Andric       assert(Node->hasCall() &&
300306c3fb27SDimitry Andric              "Not having a call should have prevented cloning");
300406c3fb27SDimitry Andric 
300506c3fb27SDimitry Andric       // Track the assignment of function clones to clones of the current
300606c3fb27SDimitry Andric       // callsite Node being handled.
300706c3fb27SDimitry Andric       std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
300806c3fb27SDimitry Andric 
300906c3fb27SDimitry Andric       // Assign callsite version CallsiteClone to function version FuncClone,
301006c3fb27SDimitry Andric       // and also assign (possibly cloned) Call to CallsiteClone.
301106c3fb27SDimitry Andric       auto AssignCallsiteCloneToFuncClone = [&](const FuncInfo &FuncClone,
301206c3fb27SDimitry Andric                                                 CallInfo &Call,
301306c3fb27SDimitry Andric                                                 ContextNode *CallsiteClone,
301406c3fb27SDimitry Andric                                                 bool IsAlloc) {
301506c3fb27SDimitry Andric         // Record the clone of callsite node assigned to this function clone.
301606c3fb27SDimitry Andric         FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
301706c3fb27SDimitry Andric 
301806c3fb27SDimitry Andric         assert(FuncClonesToCallMap.count(FuncClone));
301906c3fb27SDimitry Andric         std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone];
302006c3fb27SDimitry Andric         CallInfo CallClone(Call);
302106c3fb27SDimitry Andric         if (CallMap.count(Call))
302206c3fb27SDimitry Andric           CallClone = CallMap[Call];
302306c3fb27SDimitry Andric         CallsiteClone->setCall(CallClone);
302406c3fb27SDimitry Andric       };
302506c3fb27SDimitry Andric 
302606c3fb27SDimitry Andric       // Keep track of the clones of callsite Node that need to be assigned to
302706c3fb27SDimitry Andric       // function clones. This list may be expanded in the loop body below if we
302806c3fb27SDimitry Andric       // find additional cloning is required.
302906c3fb27SDimitry Andric       std::deque<ContextNode *> ClonesWorklist;
303006c3fb27SDimitry Andric       // Ignore original Node if we moved all of its contexts to clones.
3031*0fca6ea1SDimitry Andric       if (!Node->emptyContextIds())
303206c3fb27SDimitry Andric         ClonesWorklist.push_back(Node);
303306c3fb27SDimitry Andric       ClonesWorklist.insert(ClonesWorklist.end(), Node->Clones.begin(),
303406c3fb27SDimitry Andric                             Node->Clones.end());
303506c3fb27SDimitry Andric 
303606c3fb27SDimitry Andric       // Now walk through all of the clones of this callsite Node that we need,
303706c3fb27SDimitry Andric       // and determine the assignment to a corresponding clone of the current
303806c3fb27SDimitry Andric       // function (creating new function clones as needed).
303906c3fb27SDimitry Andric       unsigned NodeCloneCount = 0;
304006c3fb27SDimitry Andric       while (!ClonesWorklist.empty()) {
304106c3fb27SDimitry Andric         ContextNode *Clone = ClonesWorklist.front();
304206c3fb27SDimitry Andric         ClonesWorklist.pop_front();
304306c3fb27SDimitry Andric         NodeCloneCount++;
304406c3fb27SDimitry Andric         if (VerifyNodes)
304506c3fb27SDimitry Andric           checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
304606c3fb27SDimitry Andric 
304706c3fb27SDimitry Andric         // Need to create a new function clone if we have more callsite clones
304806c3fb27SDimitry Andric         // than existing function clones, which would have been assigned to an
304906c3fb27SDimitry Andric         // earlier clone in the list (we assign callsite clones to function
305006c3fb27SDimitry Andric         // clones greedily).
305106c3fb27SDimitry Andric         if (FuncClonesToCallMap.size() < NodeCloneCount) {
305206c3fb27SDimitry Andric           // If this is the first callsite copy, assign to original function.
305306c3fb27SDimitry Andric           if (NodeCloneCount == 1) {
305406c3fb27SDimitry Andric             // Since FuncClonesToCallMap is empty in this case, no clones have
305506c3fb27SDimitry Andric             // been created for this function yet, and no callers should have
305606c3fb27SDimitry Andric             // been assigned a function clone for this callee node yet.
305706c3fb27SDimitry Andric             assert(llvm::none_of(
305806c3fb27SDimitry Andric                 Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) {
305906c3fb27SDimitry Andric                   return CallsiteToCalleeFuncCloneMap.count(E->Caller);
306006c3fb27SDimitry Andric                 }));
306106c3fb27SDimitry Andric             // Initialize with empty call map, assign Clone to original function
306206c3fb27SDimitry Andric             // and its callers, and skip to the next clone.
306306c3fb27SDimitry Andric             FuncClonesToCallMap[OrigFunc] = {};
306406c3fb27SDimitry Andric             AssignCallsiteCloneToFuncClone(
306506c3fb27SDimitry Andric                 OrigFunc, Call, Clone,
306606c3fb27SDimitry Andric                 AllocationCallToContextNodeMap.count(Call));
306706c3fb27SDimitry Andric             for (auto &CE : Clone->CallerEdges) {
306806c3fb27SDimitry Andric               // Ignore any caller that does not have a recorded callsite Call.
306906c3fb27SDimitry Andric               if (!CE->Caller->hasCall())
307006c3fb27SDimitry Andric                 continue;
307106c3fb27SDimitry Andric               RecordCalleeFuncOfCallsite(CE->Caller, OrigFunc);
307206c3fb27SDimitry Andric             }
307306c3fb27SDimitry Andric             continue;
307406c3fb27SDimitry Andric           }
307506c3fb27SDimitry Andric 
307606c3fb27SDimitry Andric           // First locate which copy of OrigFunc to clone again. If a caller
307706c3fb27SDimitry Andric           // of this callsite clone was already assigned to call a particular
307806c3fb27SDimitry Andric           // function clone, we need to redirect all of those callers to the
307906c3fb27SDimitry Andric           // new function clone, and update their other callees within this
308006c3fb27SDimitry Andric           // function.
308106c3fb27SDimitry Andric           FuncInfo PreviousAssignedFuncClone;
308206c3fb27SDimitry Andric           auto EI = llvm::find_if(
308306c3fb27SDimitry Andric               Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) {
308406c3fb27SDimitry Andric                 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
308506c3fb27SDimitry Andric               });
308606c3fb27SDimitry Andric           bool CallerAssignedToCloneOfFunc = false;
308706c3fb27SDimitry Andric           if (EI != Clone->CallerEdges.end()) {
308806c3fb27SDimitry Andric             const std::shared_ptr<ContextEdge> &Edge = *EI;
308906c3fb27SDimitry Andric             PreviousAssignedFuncClone =
309006c3fb27SDimitry Andric                 CallsiteToCalleeFuncCloneMap[Edge->Caller];
309106c3fb27SDimitry Andric             CallerAssignedToCloneOfFunc = true;
309206c3fb27SDimitry Andric           }
309306c3fb27SDimitry Andric 
309406c3fb27SDimitry Andric           // Clone function and save it along with the CallInfo map created
309506c3fb27SDimitry Andric           // during cloning in the FuncClonesToCallMap.
309606c3fb27SDimitry Andric           std::map<CallInfo, CallInfo> NewCallMap;
309706c3fb27SDimitry Andric           unsigned CloneNo = FuncClonesToCallMap.size();
309806c3fb27SDimitry Andric           assert(CloneNo > 0 && "Clone 0 is the original function, which "
309906c3fb27SDimitry Andric                                 "should already exist in the map");
310006c3fb27SDimitry Andric           FuncInfo NewFuncClone = cloneFunctionForCallsite(
310106c3fb27SDimitry Andric               OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);
310206c3fb27SDimitry Andric           FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap));
310306c3fb27SDimitry Andric           FunctionClonesAnalysis++;
310406c3fb27SDimitry Andric           Changed = true;
310506c3fb27SDimitry Andric 
310606c3fb27SDimitry Andric           // If no caller callsites were already assigned to a clone of this
310706c3fb27SDimitry Andric           // function, we can simply assign this clone to the new func clone
310806c3fb27SDimitry Andric           // and update all callers to it, then skip to the next clone.
310906c3fb27SDimitry Andric           if (!CallerAssignedToCloneOfFunc) {
311006c3fb27SDimitry Andric             AssignCallsiteCloneToFuncClone(
311106c3fb27SDimitry Andric                 NewFuncClone, Call, Clone,
311206c3fb27SDimitry Andric                 AllocationCallToContextNodeMap.count(Call));
311306c3fb27SDimitry Andric             for (auto &CE : Clone->CallerEdges) {
311406c3fb27SDimitry Andric               // Ignore any caller that does not have a recorded callsite Call.
311506c3fb27SDimitry Andric               if (!CE->Caller->hasCall())
311606c3fb27SDimitry Andric                 continue;
311706c3fb27SDimitry Andric               RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone);
311806c3fb27SDimitry Andric             }
311906c3fb27SDimitry Andric             continue;
312006c3fb27SDimitry Andric           }
312106c3fb27SDimitry Andric 
312206c3fb27SDimitry Andric           // We may need to do additional node cloning in this case.
312306c3fb27SDimitry Andric           // Reset the CallsiteToCalleeFuncCloneMap entry for any callers
312406c3fb27SDimitry Andric           // that were previously assigned to call PreviousAssignedFuncClone,
312506c3fb27SDimitry Andric           // to record that they now call NewFuncClone.
312606c3fb27SDimitry Andric           for (auto CE : Clone->CallerEdges) {
3127297eecfbSDimitry Andric             // Skip any that have been removed on an earlier iteration.
3128297eecfbSDimitry Andric             if (!CE)
3129297eecfbSDimitry Andric               continue;
313006c3fb27SDimitry Andric             // Ignore any caller that does not have a recorded callsite Call.
313106c3fb27SDimitry Andric             if (!CE->Caller->hasCall())
313206c3fb27SDimitry Andric               continue;
313306c3fb27SDimitry Andric 
313406c3fb27SDimitry Andric             if (!CallsiteToCalleeFuncCloneMap.count(CE->Caller) ||
313506c3fb27SDimitry Andric                 // We subsequently fall through to later handling that
313606c3fb27SDimitry Andric                 // will perform any additional cloning required for
313706c3fb27SDimitry Andric                 // callers that were calling other function clones.
313806c3fb27SDimitry Andric                 CallsiteToCalleeFuncCloneMap[CE->Caller] !=
313906c3fb27SDimitry Andric                     PreviousAssignedFuncClone)
314006c3fb27SDimitry Andric               continue;
314106c3fb27SDimitry Andric 
314206c3fb27SDimitry Andric             RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone);
314306c3fb27SDimitry Andric 
314406c3fb27SDimitry Andric             // If we are cloning a function that was already assigned to some
314506c3fb27SDimitry Andric             // callers, then essentially we are creating new callsite clones
314606c3fb27SDimitry Andric             // of the other callsites in that function that are reached by those
314706c3fb27SDimitry Andric             // callers. Clone the other callees of the current callsite's caller
314806c3fb27SDimitry Andric             // that were already assigned to PreviousAssignedFuncClone
314906c3fb27SDimitry Andric             // accordingly. This is important since we subsequently update the
315006c3fb27SDimitry Andric             // calls from the nodes in the graph and their assignments to callee
315106c3fb27SDimitry Andric             // functions recorded in CallsiteToCalleeFuncCloneMap.
315206c3fb27SDimitry Andric             for (auto CalleeEdge : CE->Caller->CalleeEdges) {
315306c3fb27SDimitry Andric               // Skip any that have been removed on an earlier iteration when
315406c3fb27SDimitry Andric               // cleaning up newly None type callee edges.
315506c3fb27SDimitry Andric               if (!CalleeEdge)
315606c3fb27SDimitry Andric                 continue;
315706c3fb27SDimitry Andric               ContextNode *Callee = CalleeEdge->Callee;
315806c3fb27SDimitry Andric               // Skip the current callsite, we are looking for other
315906c3fb27SDimitry Andric               // callsites Caller calls, as well as any that does not have a
316006c3fb27SDimitry Andric               // recorded callsite Call.
316106c3fb27SDimitry Andric               if (Callee == Clone || !Callee->hasCall())
316206c3fb27SDimitry Andric                 continue;
316306c3fb27SDimitry Andric               ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge);
316406c3fb27SDimitry Andric               removeNoneTypeCalleeEdges(NewClone);
316506c3fb27SDimitry Andric               // Moving the edge may have resulted in some none type
316606c3fb27SDimitry Andric               // callee edges on the original Callee.
316706c3fb27SDimitry Andric               removeNoneTypeCalleeEdges(Callee);
316806c3fb27SDimitry Andric               assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
316906c3fb27SDimitry Andric               // If the Callee node was already assigned to call a specific
317006c3fb27SDimitry Andric               // function version, make sure its new clone is assigned to call
317106c3fb27SDimitry Andric               // that same function clone.
317206c3fb27SDimitry Andric               if (CallsiteToCalleeFuncCloneMap.count(Callee))
317306c3fb27SDimitry Andric                 RecordCalleeFuncOfCallsite(
317406c3fb27SDimitry Andric                     NewClone, CallsiteToCalleeFuncCloneMap[Callee]);
317506c3fb27SDimitry Andric               // Update NewClone with the new Call clone of this callsite's Call
317606c3fb27SDimitry Andric               // created for the new function clone created earlier.
317706c3fb27SDimitry Andric               // Recall that we have already ensured when building the graph
317806c3fb27SDimitry Andric               // that each caller can only call callsites within the same
317906c3fb27SDimitry Andric               // function, so we are guaranteed that Callee Call is in the
318006c3fb27SDimitry Andric               // current OrigFunc.
318106c3fb27SDimitry Andric               // CallMap is set up as indexed by original Call at clone 0.
318206c3fb27SDimitry Andric               CallInfo OrigCall(Callee->getOrigNode()->Call);
318306c3fb27SDimitry Andric               OrigCall.setCloneNo(0);
318406c3fb27SDimitry Andric               std::map<CallInfo, CallInfo> &CallMap =
318506c3fb27SDimitry Andric                   FuncClonesToCallMap[NewFuncClone];
318606c3fb27SDimitry Andric               assert(CallMap.count(OrigCall));
318706c3fb27SDimitry Andric               CallInfo NewCall(CallMap[OrigCall]);
318806c3fb27SDimitry Andric               assert(NewCall);
318906c3fb27SDimitry Andric               NewClone->setCall(NewCall);
319006c3fb27SDimitry Andric             }
319106c3fb27SDimitry Andric           }
319206c3fb27SDimitry Andric           // Fall through to handling below to perform the recording of the
319306c3fb27SDimitry Andric           // function for this callsite clone. This enables handling of cases
319406c3fb27SDimitry Andric           // where the callers were assigned to different clones of a function.
319506c3fb27SDimitry Andric         }
319606c3fb27SDimitry Andric 
319706c3fb27SDimitry Andric         // See if we can use existing function clone. Walk through
319806c3fb27SDimitry Andric         // all caller edges to see if any have already been assigned to
319906c3fb27SDimitry Andric         // a clone of this callsite's function. If we can use it, do so. If not,
320006c3fb27SDimitry Andric         // because that function clone is already assigned to a different clone
320106c3fb27SDimitry Andric         // of this callsite, then we need to clone again.
320206c3fb27SDimitry Andric         // Basically, this checking is needed to handle the case where different
320306c3fb27SDimitry Andric         // caller functions/callsites may need versions of this function
320406c3fb27SDimitry Andric         // containing different mixes of callsite clones across the different
320506c3fb27SDimitry Andric         // callsites within the function. If that happens, we need to create
320606c3fb27SDimitry Andric         // additional function clones to handle the various combinations.
320706c3fb27SDimitry Andric         //
320806c3fb27SDimitry Andric         // Keep track of any new clones of this callsite created by the
320906c3fb27SDimitry Andric         // following loop, as well as any existing clone that we decided to
321006c3fb27SDimitry Andric         // assign this clone to.
321106c3fb27SDimitry Andric         std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
321206c3fb27SDimitry Andric         FuncInfo FuncCloneAssignedToCurCallsiteClone;
321306c3fb27SDimitry Andric         // We need to be able to remove Edge from CallerEdges, so need to adjust
321406c3fb27SDimitry Andric         // iterator in the loop.
321506c3fb27SDimitry Andric         for (auto EI = Clone->CallerEdges.begin();
321606c3fb27SDimitry Andric              EI != Clone->CallerEdges.end();) {
321706c3fb27SDimitry Andric           auto Edge = *EI;
321806c3fb27SDimitry Andric           // Ignore any caller that does not have a recorded callsite Call.
321906c3fb27SDimitry Andric           if (!Edge->Caller->hasCall()) {
322006c3fb27SDimitry Andric             EI++;
322106c3fb27SDimitry Andric             continue;
322206c3fb27SDimitry Andric           }
322306c3fb27SDimitry Andric           // If this caller already assigned to call a version of OrigFunc, need
322406c3fb27SDimitry Andric           // to ensure we can assign this callsite clone to that function clone.
322506c3fb27SDimitry Andric           if (CallsiteToCalleeFuncCloneMap.count(Edge->Caller)) {
322606c3fb27SDimitry Andric             FuncInfo FuncCloneCalledByCaller =
322706c3fb27SDimitry Andric                 CallsiteToCalleeFuncCloneMap[Edge->Caller];
322806c3fb27SDimitry Andric             // First we need to confirm that this function clone is available
322906c3fb27SDimitry Andric             // for use by this callsite node clone.
323006c3fb27SDimitry Andric             //
323106c3fb27SDimitry Andric             // While FuncCloneToCurNodeCloneMap is built only for this Node and
323206c3fb27SDimitry Andric             // its callsite clones, one of those callsite clones X could have
323306c3fb27SDimitry Andric             // been assigned to the same function clone called by Edge's caller
323406c3fb27SDimitry Andric             // - if Edge's caller calls another callsite within Node's original
323506c3fb27SDimitry Andric             // function, and that callsite has another caller reaching clone X.
323606c3fb27SDimitry Andric             // We need to clone Node again in this case.
323706c3fb27SDimitry Andric             if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
323806c3fb27SDimitry Andric                  FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
323906c3fb27SDimitry Andric                      Clone) ||
324006c3fb27SDimitry Andric                 // Detect when we have multiple callers of this callsite that
324106c3fb27SDimitry Andric                 // have already been assigned to specific, and different, clones
324206c3fb27SDimitry Andric                 // of OrigFunc (due to other unrelated callsites in Func they
324306c3fb27SDimitry Andric                 // reach via call contexts). Is this Clone of callsite Node
324406c3fb27SDimitry Andric                 // assigned to a different clone of OrigFunc? If so, clone Node
324506c3fb27SDimitry Andric                 // again.
324606c3fb27SDimitry Andric                 (FuncCloneAssignedToCurCallsiteClone &&
324706c3fb27SDimitry Andric                  FuncCloneAssignedToCurCallsiteClone !=
324806c3fb27SDimitry Andric                      FuncCloneCalledByCaller)) {
324906c3fb27SDimitry Andric               // We need to use a different newly created callsite clone, in
325006c3fb27SDimitry Andric               // order to assign it to another new function clone on a
325106c3fb27SDimitry Andric               // subsequent iteration over the Clones array (adjusted below).
325206c3fb27SDimitry Andric               // Note we specifically do not reset the
325306c3fb27SDimitry Andric               // CallsiteToCalleeFuncCloneMap entry for this caller, so that
325406c3fb27SDimitry Andric               // when this new clone is processed later we know which version of
325506c3fb27SDimitry Andric               // the function to copy (so that other callsite clones we have
325606c3fb27SDimitry Andric               // assigned to that function clone are properly cloned over). See
325706c3fb27SDimitry Andric               // comments in the function cloning handling earlier.
325806c3fb27SDimitry Andric 
325906c3fb27SDimitry Andric               // Check if we already have cloned this callsite again while
326006c3fb27SDimitry Andric               // walking through caller edges, for a caller calling the same
326106c3fb27SDimitry Andric               // function clone. If so, we can move this edge to that new clone
326206c3fb27SDimitry Andric               // rather than creating yet another new clone.
326306c3fb27SDimitry Andric               if (FuncCloneToNewCallsiteCloneMap.count(
326406c3fb27SDimitry Andric                       FuncCloneCalledByCaller)) {
326506c3fb27SDimitry Andric                 ContextNode *NewClone =
326606c3fb27SDimitry Andric                     FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
326706c3fb27SDimitry Andric                 moveEdgeToExistingCalleeClone(Edge, NewClone, &EI);
326806c3fb27SDimitry Andric                 // Cleanup any none type edges cloned over.
326906c3fb27SDimitry Andric                 removeNoneTypeCalleeEdges(NewClone);
327006c3fb27SDimitry Andric               } else {
327106c3fb27SDimitry Andric                 // Create a new callsite clone.
327206c3fb27SDimitry Andric                 ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge, &EI);
327306c3fb27SDimitry Andric                 removeNoneTypeCalleeEdges(NewClone);
327406c3fb27SDimitry Andric                 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
327506c3fb27SDimitry Andric                     NewClone;
327606c3fb27SDimitry Andric                 // Add to list of clones and process later.
327706c3fb27SDimitry Andric                 ClonesWorklist.push_back(NewClone);
327806c3fb27SDimitry Andric                 assert(EI == Clone->CallerEdges.end() ||
327906c3fb27SDimitry Andric                        Clone->AllocTypes != (uint8_t)AllocationType::None);
328006c3fb27SDimitry Andric                 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
328106c3fb27SDimitry Andric               }
328206c3fb27SDimitry Andric               // Moving the caller edge may have resulted in some none type
328306c3fb27SDimitry Andric               // callee edges.
328406c3fb27SDimitry Andric               removeNoneTypeCalleeEdges(Clone);
328506c3fb27SDimitry Andric               // We will handle the newly created callsite clone in a subsequent
328606c3fb27SDimitry Andric               // iteration over this Node's Clones. Continue here since we
328706c3fb27SDimitry Andric               // already adjusted iterator EI while moving the edge.
328806c3fb27SDimitry Andric               continue;
328906c3fb27SDimitry Andric             }
329006c3fb27SDimitry Andric 
329106c3fb27SDimitry Andric             // Otherwise, we can use the function clone already assigned to this
329206c3fb27SDimitry Andric             // caller.
329306c3fb27SDimitry Andric             if (!FuncCloneAssignedToCurCallsiteClone) {
329406c3fb27SDimitry Andric               FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
329506c3fb27SDimitry Andric               // Assign Clone to FuncCloneCalledByCaller
329606c3fb27SDimitry Andric               AssignCallsiteCloneToFuncClone(
329706c3fb27SDimitry Andric                   FuncCloneCalledByCaller, Call, Clone,
329806c3fb27SDimitry Andric                   AllocationCallToContextNodeMap.count(Call));
329906c3fb27SDimitry Andric             } else
330006c3fb27SDimitry Andric               // Don't need to do anything - callsite is already calling this
330106c3fb27SDimitry Andric               // function clone.
330206c3fb27SDimitry Andric               assert(FuncCloneAssignedToCurCallsiteClone ==
330306c3fb27SDimitry Andric                      FuncCloneCalledByCaller);
330406c3fb27SDimitry Andric 
330506c3fb27SDimitry Andric           } else {
330606c3fb27SDimitry Andric             // We have not already assigned this caller to a version of
330706c3fb27SDimitry Andric             // OrigFunc. Do the assignment now.
330806c3fb27SDimitry Andric 
330906c3fb27SDimitry Andric             // First check if we have already assigned this callsite clone to a
331006c3fb27SDimitry Andric             // clone of OrigFunc for another caller during this iteration over
331106c3fb27SDimitry Andric             // its caller edges.
331206c3fb27SDimitry Andric             if (!FuncCloneAssignedToCurCallsiteClone) {
331306c3fb27SDimitry Andric               // Find first function in FuncClonesToCallMap without an assigned
331406c3fb27SDimitry Andric               // clone of this callsite Node. We should always have one
331506c3fb27SDimitry Andric               // available at this point due to the earlier cloning when the
331606c3fb27SDimitry Andric               // FuncClonesToCallMap size was smaller than the clone number.
331706c3fb27SDimitry Andric               for (auto &CF : FuncClonesToCallMap) {
331806c3fb27SDimitry Andric                 if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
331906c3fb27SDimitry Andric                   FuncCloneAssignedToCurCallsiteClone = CF.first;
332006c3fb27SDimitry Andric                   break;
332106c3fb27SDimitry Andric                 }
332206c3fb27SDimitry Andric               }
332306c3fb27SDimitry Andric               assert(FuncCloneAssignedToCurCallsiteClone);
332406c3fb27SDimitry Andric               // Assign Clone to FuncCloneAssignedToCurCallsiteClone
332506c3fb27SDimitry Andric               AssignCallsiteCloneToFuncClone(
332606c3fb27SDimitry Andric                   FuncCloneAssignedToCurCallsiteClone, Call, Clone,
332706c3fb27SDimitry Andric                   AllocationCallToContextNodeMap.count(Call));
332806c3fb27SDimitry Andric             } else
332906c3fb27SDimitry Andric               assert(FuncCloneToCurNodeCloneMap
333006c3fb27SDimitry Andric                          [FuncCloneAssignedToCurCallsiteClone] == Clone);
333106c3fb27SDimitry Andric             // Update callers to record function version called.
333206c3fb27SDimitry Andric             RecordCalleeFuncOfCallsite(Edge->Caller,
333306c3fb27SDimitry Andric                                        FuncCloneAssignedToCurCallsiteClone);
333406c3fb27SDimitry Andric           }
333506c3fb27SDimitry Andric 
333606c3fb27SDimitry Andric           EI++;
333706c3fb27SDimitry Andric         }
333806c3fb27SDimitry Andric       }
333906c3fb27SDimitry Andric       if (VerifyCCG) {
334006c3fb27SDimitry Andric         checkNode<DerivedCCG, FuncTy, CallTy>(Node);
334106c3fb27SDimitry Andric         for (const auto &PE : Node->CalleeEdges)
334206c3fb27SDimitry Andric           checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
334306c3fb27SDimitry Andric         for (const auto &CE : Node->CallerEdges)
334406c3fb27SDimitry Andric           checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller);
334506c3fb27SDimitry Andric         for (auto *Clone : Node->Clones) {
334606c3fb27SDimitry Andric           checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
334706c3fb27SDimitry Andric           for (const auto &PE : Clone->CalleeEdges)
334806c3fb27SDimitry Andric             checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
334906c3fb27SDimitry Andric           for (const auto &CE : Clone->CallerEdges)
335006c3fb27SDimitry Andric             checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller);
335106c3fb27SDimitry Andric         }
335206c3fb27SDimitry Andric       }
335306c3fb27SDimitry Andric     }
335406c3fb27SDimitry Andric   }
335506c3fb27SDimitry Andric 
335606c3fb27SDimitry Andric   auto UpdateCalls = [&](ContextNode *Node,
335706c3fb27SDimitry Andric                          DenseSet<const ContextNode *> &Visited,
335806c3fb27SDimitry Andric                          auto &&UpdateCalls) {
335906c3fb27SDimitry Andric     auto Inserted = Visited.insert(Node);
336006c3fb27SDimitry Andric     if (!Inserted.second)
336106c3fb27SDimitry Andric       return;
336206c3fb27SDimitry Andric 
336306c3fb27SDimitry Andric     for (auto *Clone : Node->Clones)
336406c3fb27SDimitry Andric       UpdateCalls(Clone, Visited, UpdateCalls);
336506c3fb27SDimitry Andric 
336606c3fb27SDimitry Andric     for (auto &Edge : Node->CallerEdges)
336706c3fb27SDimitry Andric       UpdateCalls(Edge->Caller, Visited, UpdateCalls);
336806c3fb27SDimitry Andric 
336906c3fb27SDimitry Andric     // Skip if either no call to update, or if we ended up with no context ids
337006c3fb27SDimitry Andric     // (we moved all edges onto other clones).
3371*0fca6ea1SDimitry Andric     if (!Node->hasCall() || Node->emptyContextIds())
337206c3fb27SDimitry Andric       return;
337306c3fb27SDimitry Andric 
337406c3fb27SDimitry Andric     if (Node->IsAllocation) {
337506c3fb27SDimitry Andric       updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes));
337606c3fb27SDimitry Andric       return;
337706c3fb27SDimitry Andric     }
337806c3fb27SDimitry Andric 
337906c3fb27SDimitry Andric     if (!CallsiteToCalleeFuncCloneMap.count(Node))
338006c3fb27SDimitry Andric       return;
338106c3fb27SDimitry Andric 
338206c3fb27SDimitry Andric     auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node];
338306c3fb27SDimitry Andric     updateCall(Node->Call, CalleeFunc);
338406c3fb27SDimitry Andric   };
338506c3fb27SDimitry Andric 
338606c3fb27SDimitry Andric   // Performs DFS traversal starting from allocation nodes to update calls to
338706c3fb27SDimitry Andric   // reflect cloning decisions recorded earlier. For regular LTO this will
338806c3fb27SDimitry Andric   // update the actual calls in the IR to call the appropriate function clone
338906c3fb27SDimitry Andric   // (and add attributes to allocation calls), whereas for ThinLTO the decisions
339006c3fb27SDimitry Andric   // are recorded in the summary entries.
339106c3fb27SDimitry Andric   DenseSet<const ContextNode *> Visited;
339206c3fb27SDimitry Andric   for (auto &Entry : AllocationCallToContextNodeMap)
339306c3fb27SDimitry Andric     UpdateCalls(Entry.second, Visited, UpdateCalls);
339406c3fb27SDimitry Andric 
339506c3fb27SDimitry Andric   return Changed;
339606c3fb27SDimitry Andric }
339706c3fb27SDimitry Andric 
339806c3fb27SDimitry Andric static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
339906c3fb27SDimitry Andric     Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE,
340006c3fb27SDimitry Andric     std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
340106c3fb27SDimitry Andric         &FuncToAliasMap) {
340206c3fb27SDimitry Andric   // The first "clone" is the original copy, we should only call this if we
340306c3fb27SDimitry Andric   // needed to create new clones.
340406c3fb27SDimitry Andric   assert(NumClones > 1);
340506c3fb27SDimitry Andric   SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
340606c3fb27SDimitry Andric   VMaps.reserve(NumClones - 1);
340706c3fb27SDimitry Andric   FunctionsClonedThinBackend++;
340806c3fb27SDimitry Andric   for (unsigned I = 1; I < NumClones; I++) {
340906c3fb27SDimitry Andric     VMaps.emplace_back(std::make_unique<ValueToValueMapTy>());
341006c3fb27SDimitry Andric     auto *NewF = CloneFunction(&F, *VMaps.back());
341106c3fb27SDimitry Andric     FunctionClonesThinBackend++;
341206c3fb27SDimitry Andric     // Strip memprof and callsite metadata from clone as they are no longer
341306c3fb27SDimitry Andric     // needed.
341406c3fb27SDimitry Andric     for (auto &BB : *NewF) {
341506c3fb27SDimitry Andric       for (auto &Inst : BB) {
341606c3fb27SDimitry Andric         Inst.setMetadata(LLVMContext::MD_memprof, nullptr);
341706c3fb27SDimitry Andric         Inst.setMetadata(LLVMContext::MD_callsite, nullptr);
341806c3fb27SDimitry Andric       }
341906c3fb27SDimitry Andric     }
342006c3fb27SDimitry Andric     std::string Name = getMemProfFuncName(F.getName(), I);
342106c3fb27SDimitry Andric     auto *PrevF = M.getFunction(Name);
342206c3fb27SDimitry Andric     if (PrevF) {
342306c3fb27SDimitry Andric       // We might have created this when adjusting callsite in another
342406c3fb27SDimitry Andric       // function. It should be a declaration.
342506c3fb27SDimitry Andric       assert(PrevF->isDeclaration());
342606c3fb27SDimitry Andric       NewF->takeName(PrevF);
342706c3fb27SDimitry Andric       PrevF->replaceAllUsesWith(NewF);
342806c3fb27SDimitry Andric       PrevF->eraseFromParent();
342906c3fb27SDimitry Andric     } else
343006c3fb27SDimitry Andric       NewF->setName(Name);
343106c3fb27SDimitry Andric     ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
343206c3fb27SDimitry Andric              << "created clone " << ore::NV("NewFunction", NewF));
343306c3fb27SDimitry Andric 
343406c3fb27SDimitry Andric     // Now handle aliases to this function, and clone those as well.
343506c3fb27SDimitry Andric     if (!FuncToAliasMap.count(&F))
343606c3fb27SDimitry Andric       continue;
343706c3fb27SDimitry Andric     for (auto *A : FuncToAliasMap[&F]) {
343806c3fb27SDimitry Andric       std::string Name = getMemProfFuncName(A->getName(), I);
343906c3fb27SDimitry Andric       auto *PrevA = M.getNamedAlias(Name);
344006c3fb27SDimitry Andric       auto *NewA = GlobalAlias::create(A->getValueType(),
344106c3fb27SDimitry Andric                                        A->getType()->getPointerAddressSpace(),
344206c3fb27SDimitry Andric                                        A->getLinkage(), Name, NewF);
344306c3fb27SDimitry Andric       NewA->copyAttributesFrom(A);
344406c3fb27SDimitry Andric       if (PrevA) {
344506c3fb27SDimitry Andric         // We might have created this when adjusting callsite in another
344606c3fb27SDimitry Andric         // function. It should be a declaration.
344706c3fb27SDimitry Andric         assert(PrevA->isDeclaration());
344806c3fb27SDimitry Andric         NewA->takeName(PrevA);
344906c3fb27SDimitry Andric         PrevA->replaceAllUsesWith(NewA);
345006c3fb27SDimitry Andric         PrevA->eraseFromParent();
345106c3fb27SDimitry Andric       }
345206c3fb27SDimitry Andric     }
345306c3fb27SDimitry Andric   }
345406c3fb27SDimitry Andric   return VMaps;
345506c3fb27SDimitry Andric }
345606c3fb27SDimitry Andric 
345706c3fb27SDimitry Andric // Locate the summary for F. This is complicated by the fact that it might
345806c3fb27SDimitry Andric // have been internalized or promoted.
345906c3fb27SDimitry Andric static ValueInfo findValueInfoForFunc(const Function &F, const Module &M,
346006c3fb27SDimitry Andric                                       const ModuleSummaryIndex *ImportSummary) {
346106c3fb27SDimitry Andric   // FIXME: Ideally we would retain the original GUID in some fashion on the
346206c3fb27SDimitry Andric   // function (e.g. as metadata), but for now do our best to locate the
346306c3fb27SDimitry Andric   // summary without that information.
346406c3fb27SDimitry Andric   ValueInfo TheFnVI = ImportSummary->getValueInfo(F.getGUID());
346506c3fb27SDimitry Andric   if (!TheFnVI)
346606c3fb27SDimitry Andric     // See if theFn was internalized, by checking index directly with
346706c3fb27SDimitry Andric     // original name (this avoids the name adjustment done by getGUID() for
346806c3fb27SDimitry Andric     // internal symbols).
346906c3fb27SDimitry Andric     TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(F.getName()));
347006c3fb27SDimitry Andric   if (TheFnVI)
347106c3fb27SDimitry Andric     return TheFnVI;
347206c3fb27SDimitry Andric   // Now query with the original name before any promotion was performed.
347306c3fb27SDimitry Andric   StringRef OrigName =
347406c3fb27SDimitry Andric       ModuleSummaryIndex::getOriginalNameBeforePromote(F.getName());
347506c3fb27SDimitry Andric   std::string OrigId = GlobalValue::getGlobalIdentifier(
347606c3fb27SDimitry Andric       OrigName, GlobalValue::InternalLinkage, M.getSourceFileName());
347706c3fb27SDimitry Andric   TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(OrigId));
347806c3fb27SDimitry Andric   if (TheFnVI)
347906c3fb27SDimitry Andric     return TheFnVI;
348006c3fb27SDimitry Andric   // Could be a promoted local imported from another module. We need to pass
348106c3fb27SDimitry Andric   // down more info here to find the original module id. For now, try with
348206c3fb27SDimitry Andric   // the OrigName which might have been stored in the OidGuidMap in the
348306c3fb27SDimitry Andric   // index. This would not work if there were same-named locals in multiple
348406c3fb27SDimitry Andric   // modules, however.
348506c3fb27SDimitry Andric   auto OrigGUID =
348606c3fb27SDimitry Andric       ImportSummary->getGUIDFromOriginalID(GlobalValue::getGUID(OrigName));
348706c3fb27SDimitry Andric   if (OrigGUID)
348806c3fb27SDimitry Andric     TheFnVI = ImportSummary->getValueInfo(OrigGUID);
348906c3fb27SDimitry Andric   return TheFnVI;
349006c3fb27SDimitry Andric }
349106c3fb27SDimitry Andric 
349206c3fb27SDimitry Andric bool MemProfContextDisambiguation::applyImport(Module &M) {
349306c3fb27SDimitry Andric   assert(ImportSummary);
349406c3fb27SDimitry Andric   bool Changed = false;
349506c3fb27SDimitry Andric 
349606c3fb27SDimitry Andric   auto IsMemProfClone = [](const Function &F) {
349706c3fb27SDimitry Andric     return F.getName().contains(MemProfCloneSuffix);
349806c3fb27SDimitry Andric   };
349906c3fb27SDimitry Andric 
350006c3fb27SDimitry Andric   // We also need to clone any aliases that reference cloned functions, because
350106c3fb27SDimitry Andric   // the modified callsites may invoke via the alias. Keep track of the aliases
350206c3fb27SDimitry Andric   // for each function.
350306c3fb27SDimitry Andric   std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
350406c3fb27SDimitry Andric       FuncToAliasMap;
350506c3fb27SDimitry Andric   for (auto &A : M.aliases()) {
350606c3fb27SDimitry Andric     auto *Aliasee = A.getAliaseeObject();
350706c3fb27SDimitry Andric     if (auto *F = dyn_cast<Function>(Aliasee))
350806c3fb27SDimitry Andric       FuncToAliasMap[F].insert(&A);
350906c3fb27SDimitry Andric   }
351006c3fb27SDimitry Andric 
351106c3fb27SDimitry Andric   for (auto &F : M) {
351206c3fb27SDimitry Andric     if (F.isDeclaration() || IsMemProfClone(F))
351306c3fb27SDimitry Andric       continue;
351406c3fb27SDimitry Andric 
351506c3fb27SDimitry Andric     OptimizationRemarkEmitter ORE(&F);
351606c3fb27SDimitry Andric 
351706c3fb27SDimitry Andric     SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
351806c3fb27SDimitry Andric     bool ClonesCreated = false;
351906c3fb27SDimitry Andric     unsigned NumClonesCreated = 0;
352006c3fb27SDimitry Andric     auto CloneFuncIfNeeded = [&](unsigned NumClones) {
352106c3fb27SDimitry Andric       // We should at least have version 0 which is the original copy.
352206c3fb27SDimitry Andric       assert(NumClones > 0);
352306c3fb27SDimitry Andric       // If only one copy needed use original.
352406c3fb27SDimitry Andric       if (NumClones == 1)
352506c3fb27SDimitry Andric         return;
352606c3fb27SDimitry Andric       // If we already performed cloning of this function, confirm that the
352706c3fb27SDimitry Andric       // requested number of clones matches (the thin link should ensure the
352806c3fb27SDimitry Andric       // number of clones for each constituent callsite is consistent within
352906c3fb27SDimitry Andric       // each function), before returning.
353006c3fb27SDimitry Andric       if (ClonesCreated) {
353106c3fb27SDimitry Andric         assert(NumClonesCreated == NumClones);
353206c3fb27SDimitry Andric         return;
353306c3fb27SDimitry Andric       }
353406c3fb27SDimitry Andric       VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap);
353506c3fb27SDimitry Andric       // The first "clone" is the original copy, which doesn't have a VMap.
353606c3fb27SDimitry Andric       assert(VMaps.size() == NumClones - 1);
353706c3fb27SDimitry Andric       Changed = true;
353806c3fb27SDimitry Andric       ClonesCreated = true;
353906c3fb27SDimitry Andric       NumClonesCreated = NumClones;
354006c3fb27SDimitry Andric     };
354106c3fb27SDimitry Andric 
3542297eecfbSDimitry Andric     auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB,
3543297eecfbSDimitry Andric                              Function *CalledFunction) {
3544297eecfbSDimitry Andric       // Perform cloning if not yet done.
3545297eecfbSDimitry Andric       CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size());
3546297eecfbSDimitry Andric 
3547297eecfbSDimitry Andric       // Should have skipped indirect calls via mayHaveMemprofSummary.
3548297eecfbSDimitry Andric       assert(CalledFunction);
3549297eecfbSDimitry Andric       assert(!IsMemProfClone(*CalledFunction));
3550297eecfbSDimitry Andric 
3551297eecfbSDimitry Andric       // Update the calls per the summary info.
3552297eecfbSDimitry Andric       // Save orig name since it gets updated in the first iteration
3553297eecfbSDimitry Andric       // below.
3554297eecfbSDimitry Andric       auto CalleeOrigName = CalledFunction->getName();
3555297eecfbSDimitry Andric       for (unsigned J = 0; J < StackNode.Clones.size(); J++) {
3556297eecfbSDimitry Andric         // Do nothing if this version calls the original version of its
3557297eecfbSDimitry Andric         // callee.
3558297eecfbSDimitry Andric         if (!StackNode.Clones[J])
3559297eecfbSDimitry Andric           continue;
3560297eecfbSDimitry Andric         auto NewF = M.getOrInsertFunction(
3561297eecfbSDimitry Andric             getMemProfFuncName(CalleeOrigName, StackNode.Clones[J]),
3562297eecfbSDimitry Andric             CalledFunction->getFunctionType());
3563297eecfbSDimitry Andric         CallBase *CBClone;
3564297eecfbSDimitry Andric         // Copy 0 is the original function.
3565297eecfbSDimitry Andric         if (!J)
3566297eecfbSDimitry Andric           CBClone = CB;
3567297eecfbSDimitry Andric         else
3568297eecfbSDimitry Andric           CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
3569297eecfbSDimitry Andric         CBClone->setCalledFunction(NewF);
3570297eecfbSDimitry Andric         ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
3571297eecfbSDimitry Andric                  << ore::NV("Call", CBClone) << " in clone "
3572297eecfbSDimitry Andric                  << ore::NV("Caller", CBClone->getFunction())
3573297eecfbSDimitry Andric                  << " assigned to call function clone "
3574297eecfbSDimitry Andric                  << ore::NV("Callee", NewF.getCallee()));
3575297eecfbSDimitry Andric       }
3576297eecfbSDimitry Andric     };
3577297eecfbSDimitry Andric 
357806c3fb27SDimitry Andric     // Locate the summary for F.
357906c3fb27SDimitry Andric     ValueInfo TheFnVI = findValueInfoForFunc(F, M, ImportSummary);
358006c3fb27SDimitry Andric     // If not found, this could be an imported local (see comment in
358106c3fb27SDimitry Andric     // findValueInfoForFunc). Skip for now as it will be cloned in its original
358206c3fb27SDimitry Andric     // module (where it would have been promoted to global scope so should
358306c3fb27SDimitry Andric     // satisfy any reference in this module).
358406c3fb27SDimitry Andric     if (!TheFnVI)
358506c3fb27SDimitry Andric       continue;
358606c3fb27SDimitry Andric 
358706c3fb27SDimitry Andric     auto *GVSummary =
358806c3fb27SDimitry Andric         ImportSummary->findSummaryInModule(TheFnVI, M.getModuleIdentifier());
3589*0fca6ea1SDimitry Andric     if (!GVSummary) {
3590*0fca6ea1SDimitry Andric       // Must have been imported, use the summary which matches the definition。
3591*0fca6ea1SDimitry Andric       // (might be multiple if this was a linkonce_odr).
3592*0fca6ea1SDimitry Andric       auto SrcModuleMD = F.getMetadata("thinlto_src_module");
3593*0fca6ea1SDimitry Andric       assert(SrcModuleMD &&
3594*0fca6ea1SDimitry Andric              "enable-import-metadata is needed to emit thinlto_src_module");
3595*0fca6ea1SDimitry Andric       StringRef SrcModule =
3596*0fca6ea1SDimitry Andric           dyn_cast<MDString>(SrcModuleMD->getOperand(0))->getString();
3597*0fca6ea1SDimitry Andric       for (auto &GVS : TheFnVI.getSummaryList()) {
3598*0fca6ea1SDimitry Andric         if (GVS->modulePath() == SrcModule) {
3599*0fca6ea1SDimitry Andric           GVSummary = GVS.get();
3600*0fca6ea1SDimitry Andric           break;
3601*0fca6ea1SDimitry Andric         }
3602*0fca6ea1SDimitry Andric       }
3603*0fca6ea1SDimitry Andric       assert(GVSummary && GVSummary->modulePath() == SrcModule);
3604*0fca6ea1SDimitry Andric     }
360506c3fb27SDimitry Andric 
360606c3fb27SDimitry Andric     // If this was an imported alias skip it as we won't have the function
360706c3fb27SDimitry Andric     // summary, and it should be cloned in the original module.
360806c3fb27SDimitry Andric     if (isa<AliasSummary>(GVSummary))
360906c3fb27SDimitry Andric       continue;
361006c3fb27SDimitry Andric 
361106c3fb27SDimitry Andric     auto *FS = cast<FunctionSummary>(GVSummary->getBaseObject());
361206c3fb27SDimitry Andric 
361306c3fb27SDimitry Andric     if (FS->allocs().empty() && FS->callsites().empty())
361406c3fb27SDimitry Andric       continue;
361506c3fb27SDimitry Andric 
361606c3fb27SDimitry Andric     auto SI = FS->callsites().begin();
361706c3fb27SDimitry Andric     auto AI = FS->allocs().begin();
361806c3fb27SDimitry Andric 
3619297eecfbSDimitry Andric     // To handle callsite infos synthesized for tail calls which have missing
3620297eecfbSDimitry Andric     // frames in the profiled context, map callee VI to the synthesized callsite
3621297eecfbSDimitry Andric     // info.
3622297eecfbSDimitry Andric     DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
3623297eecfbSDimitry Andric     // Iterate the callsites for this function in reverse, since we place all
3624297eecfbSDimitry Andric     // those synthesized for tail calls at the end.
3625297eecfbSDimitry Andric     for (auto CallsiteIt = FS->callsites().rbegin();
3626297eecfbSDimitry Andric          CallsiteIt != FS->callsites().rend(); CallsiteIt++) {
3627297eecfbSDimitry Andric       auto &Callsite = *CallsiteIt;
3628297eecfbSDimitry Andric       // Stop as soon as we see a non-synthesized callsite info (see comment
3629297eecfbSDimitry Andric       // above loop). All the entries added for discovered tail calls have empty
3630297eecfbSDimitry Andric       // stack ids.
3631297eecfbSDimitry Andric       if (!Callsite.StackIdIndices.empty())
3632297eecfbSDimitry Andric         break;
3633297eecfbSDimitry Andric       MapTailCallCalleeVIToCallsite.insert({Callsite.Callee, Callsite});
3634297eecfbSDimitry Andric     }
3635297eecfbSDimitry Andric 
363606c3fb27SDimitry Andric     // Assume for now that the instructions are in the exact same order
363706c3fb27SDimitry Andric     // as when the summary was created, but confirm this is correct by
363806c3fb27SDimitry Andric     // matching the stack ids.
363906c3fb27SDimitry Andric     for (auto &BB : F) {
364006c3fb27SDimitry Andric       for (auto &I : BB) {
364106c3fb27SDimitry Andric         auto *CB = dyn_cast<CallBase>(&I);
364206c3fb27SDimitry Andric         // Same handling as when creating module summary.
364306c3fb27SDimitry Andric         if (!mayHaveMemprofSummary(CB))
364406c3fb27SDimitry Andric           continue;
364506c3fb27SDimitry Andric 
36465f757f3fSDimitry Andric         auto *CalledValue = CB->getCalledOperand();
36475f757f3fSDimitry Andric         auto *CalledFunction = CB->getCalledFunction();
36485f757f3fSDimitry Andric         if (CalledValue && !CalledFunction) {
36495f757f3fSDimitry Andric           CalledValue = CalledValue->stripPointerCasts();
36505f757f3fSDimitry Andric           // Stripping pointer casts can reveal a called function.
36515f757f3fSDimitry Andric           CalledFunction = dyn_cast<Function>(CalledValue);
36525f757f3fSDimitry Andric         }
36535f757f3fSDimitry Andric         // Check if this is an alias to a function. If so, get the
36545f757f3fSDimitry Andric         // called aliasee for the checks below.
36555f757f3fSDimitry Andric         if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
36565f757f3fSDimitry Andric           assert(!CalledFunction &&
36575f757f3fSDimitry Andric                  "Expected null called function in callsite for alias");
36585f757f3fSDimitry Andric           CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
36595f757f3fSDimitry Andric         }
36605f757f3fSDimitry Andric 
366106c3fb27SDimitry Andric         CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
366206c3fb27SDimitry Andric             I.getMetadata(LLVMContext::MD_callsite));
366306c3fb27SDimitry Andric         auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof);
366406c3fb27SDimitry Andric 
366506c3fb27SDimitry Andric         // Include allocs that were already assigned a memprof function
366606c3fb27SDimitry Andric         // attribute in the statistics.
366706c3fb27SDimitry Andric         if (CB->getAttributes().hasFnAttr("memprof")) {
366806c3fb27SDimitry Andric           assert(!MemProfMD);
366906c3fb27SDimitry Andric           CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold"
367006c3fb27SDimitry Andric               ? AllocTypeColdThinBackend++
367106c3fb27SDimitry Andric               : AllocTypeNotColdThinBackend++;
367206c3fb27SDimitry Andric           OrigAllocsThinBackend++;
367306c3fb27SDimitry Andric           AllocVersionsThinBackend++;
367406c3fb27SDimitry Andric           if (!MaxAllocVersionsThinBackend)
367506c3fb27SDimitry Andric             MaxAllocVersionsThinBackend = 1;
367606c3fb27SDimitry Andric           // Remove any remaining callsite metadata and we can skip the rest of
367706c3fb27SDimitry Andric           // the handling for this instruction, since no cloning needed.
367806c3fb27SDimitry Andric           I.setMetadata(LLVMContext::MD_callsite, nullptr);
367906c3fb27SDimitry Andric           continue;
368006c3fb27SDimitry Andric         }
368106c3fb27SDimitry Andric 
368206c3fb27SDimitry Andric         if (MemProfMD) {
368306c3fb27SDimitry Andric           // Consult the next alloc node.
368406c3fb27SDimitry Andric           assert(AI != FS->allocs().end());
368506c3fb27SDimitry Andric           auto &AllocNode = *(AI++);
368606c3fb27SDimitry Andric 
368706c3fb27SDimitry Andric           // Sanity check that the MIB stack ids match between the summary and
368806c3fb27SDimitry Andric           // instruction metadata.
368906c3fb27SDimitry Andric           auto MIBIter = AllocNode.MIBs.begin();
369006c3fb27SDimitry Andric           for (auto &MDOp : MemProfMD->operands()) {
369106c3fb27SDimitry Andric             assert(MIBIter != AllocNode.MIBs.end());
369206c3fb27SDimitry Andric             LLVM_ATTRIBUTE_UNUSED auto StackIdIndexIter =
369306c3fb27SDimitry Andric                 MIBIter->StackIdIndices.begin();
369406c3fb27SDimitry Andric             auto *MIBMD = cast<const MDNode>(MDOp);
369506c3fb27SDimitry Andric             MDNode *StackMDNode = getMIBStackNode(MIBMD);
369606c3fb27SDimitry Andric             assert(StackMDNode);
369706c3fb27SDimitry Andric             CallStack<MDNode, MDNode::op_iterator> StackContext(StackMDNode);
3698*0fca6ea1SDimitry Andric             auto ContextIterBegin =
369906c3fb27SDimitry Andric                 StackContext.beginAfterSharedPrefix(CallsiteContext);
3700*0fca6ea1SDimitry Andric             // Skip the checking on the first iteration.
3701*0fca6ea1SDimitry Andric             uint64_t LastStackContextId =
3702*0fca6ea1SDimitry Andric                 (ContextIterBegin != StackContext.end() &&
3703*0fca6ea1SDimitry Andric                  *ContextIterBegin == 0)
3704*0fca6ea1SDimitry Andric                     ? 1
3705*0fca6ea1SDimitry Andric                     : 0;
3706*0fca6ea1SDimitry Andric             for (auto ContextIter = ContextIterBegin;
370706c3fb27SDimitry Andric                  ContextIter != StackContext.end(); ++ContextIter) {
370806c3fb27SDimitry Andric               // If this is a direct recursion, simply skip the duplicate
370906c3fb27SDimitry Andric               // entries, to be consistent with how the summary ids were
371006c3fb27SDimitry Andric               // generated during ModuleSummaryAnalysis.
3711*0fca6ea1SDimitry Andric               if (LastStackContextId == *ContextIter)
371206c3fb27SDimitry Andric                 continue;
3713*0fca6ea1SDimitry Andric               LastStackContextId = *ContextIter;
371406c3fb27SDimitry Andric               assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
371506c3fb27SDimitry Andric               assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
371606c3fb27SDimitry Andric                      *ContextIter);
371706c3fb27SDimitry Andric               StackIdIndexIter++;
371806c3fb27SDimitry Andric             }
371906c3fb27SDimitry Andric             MIBIter++;
372006c3fb27SDimitry Andric           }
372106c3fb27SDimitry Andric 
372206c3fb27SDimitry Andric           // Perform cloning if not yet done.
372306c3fb27SDimitry Andric           CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size());
372406c3fb27SDimitry Andric 
372506c3fb27SDimitry Andric           OrigAllocsThinBackend++;
372606c3fb27SDimitry Andric           AllocVersionsThinBackend += AllocNode.Versions.size();
372706c3fb27SDimitry Andric           if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
372806c3fb27SDimitry Andric             MaxAllocVersionsThinBackend = AllocNode.Versions.size();
372906c3fb27SDimitry Andric 
373006c3fb27SDimitry Andric           // If there is only one version that means we didn't end up
373106c3fb27SDimitry Andric           // considering this function for cloning, and in that case the alloc
373206c3fb27SDimitry Andric           // will still be none type or should have gotten the default NotCold.
373306c3fb27SDimitry Andric           // Skip that after calling clone helper since that does some sanity
373406c3fb27SDimitry Andric           // checks that confirm we haven't decided yet that we need cloning.
373506c3fb27SDimitry Andric           if (AllocNode.Versions.size() == 1) {
373606c3fb27SDimitry Andric             assert((AllocationType)AllocNode.Versions[0] ==
373706c3fb27SDimitry Andric                        AllocationType::NotCold ||
373806c3fb27SDimitry Andric                    (AllocationType)AllocNode.Versions[0] ==
373906c3fb27SDimitry Andric                        AllocationType::None);
374006c3fb27SDimitry Andric             UnclonableAllocsThinBackend++;
374106c3fb27SDimitry Andric             continue;
374206c3fb27SDimitry Andric           }
374306c3fb27SDimitry Andric 
374406c3fb27SDimitry Andric           // All versions should have a singular allocation type.
374506c3fb27SDimitry Andric           assert(llvm::none_of(AllocNode.Versions, [](uint8_t Type) {
374606c3fb27SDimitry Andric             return Type == ((uint8_t)AllocationType::NotCold |
374706c3fb27SDimitry Andric                             (uint8_t)AllocationType::Cold);
374806c3fb27SDimitry Andric           }));
374906c3fb27SDimitry Andric 
375006c3fb27SDimitry Andric           // Update the allocation types per the summary info.
375106c3fb27SDimitry Andric           for (unsigned J = 0; J < AllocNode.Versions.size(); J++) {
375206c3fb27SDimitry Andric             // Ignore any that didn't get an assigned allocation type.
375306c3fb27SDimitry Andric             if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
375406c3fb27SDimitry Andric               continue;
375506c3fb27SDimitry Andric             AllocationType AllocTy = (AllocationType)AllocNode.Versions[J];
375606c3fb27SDimitry Andric             AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
375706c3fb27SDimitry Andric                                             : AllocTypeNotColdThinBackend++;
375806c3fb27SDimitry Andric             std::string AllocTypeString = getAllocTypeAttributeString(AllocTy);
375906c3fb27SDimitry Andric             auto A = llvm::Attribute::get(F.getContext(), "memprof",
376006c3fb27SDimitry Andric                                           AllocTypeString);
376106c3fb27SDimitry Andric             CallBase *CBClone;
376206c3fb27SDimitry Andric             // Copy 0 is the original function.
376306c3fb27SDimitry Andric             if (!J)
376406c3fb27SDimitry Andric               CBClone = CB;
376506c3fb27SDimitry Andric             else
376606c3fb27SDimitry Andric               // Since VMaps are only created for new clones, we index with
376706c3fb27SDimitry Andric               // clone J-1 (J==0 is the original clone and does not have a VMaps
376806c3fb27SDimitry Andric               // entry).
376906c3fb27SDimitry Andric               CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
377006c3fb27SDimitry Andric             CBClone->addFnAttr(A);
377106c3fb27SDimitry Andric             ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", CBClone)
377206c3fb27SDimitry Andric                      << ore::NV("AllocationCall", CBClone) << " in clone "
377306c3fb27SDimitry Andric                      << ore::NV("Caller", CBClone->getFunction())
377406c3fb27SDimitry Andric                      << " marked with memprof allocation attribute "
377506c3fb27SDimitry Andric                      << ore::NV("Attribute", AllocTypeString));
377606c3fb27SDimitry Andric           }
377706c3fb27SDimitry Andric         } else if (!CallsiteContext.empty()) {
377806c3fb27SDimitry Andric           // Consult the next callsite node.
377906c3fb27SDimitry Andric           assert(SI != FS->callsites().end());
378006c3fb27SDimitry Andric           auto &StackNode = *(SI++);
378106c3fb27SDimitry Andric 
378206c3fb27SDimitry Andric #ifndef NDEBUG
378306c3fb27SDimitry Andric           // Sanity check that the stack ids match between the summary and
378406c3fb27SDimitry Andric           // instruction metadata.
378506c3fb27SDimitry Andric           auto StackIdIndexIter = StackNode.StackIdIndices.begin();
378606c3fb27SDimitry Andric           for (auto StackId : CallsiteContext) {
378706c3fb27SDimitry Andric             assert(StackIdIndexIter != StackNode.StackIdIndices.end());
378806c3fb27SDimitry Andric             assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
378906c3fb27SDimitry Andric                    StackId);
379006c3fb27SDimitry Andric             StackIdIndexIter++;
379106c3fb27SDimitry Andric           }
379206c3fb27SDimitry Andric #endif
379306c3fb27SDimitry Andric 
3794297eecfbSDimitry Andric           CloneCallsite(StackNode, CB, CalledFunction);
3795297eecfbSDimitry Andric         } else if (CB->isTailCall()) {
3796297eecfbSDimitry Andric           // Locate the synthesized callsite info for the callee VI, if any was
3797297eecfbSDimitry Andric           // created, and use that for cloning.
3798297eecfbSDimitry Andric           ValueInfo CalleeVI =
3799297eecfbSDimitry Andric               findValueInfoForFunc(*CalledFunction, M, ImportSummary);
3800297eecfbSDimitry Andric           if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) {
3801297eecfbSDimitry Andric             auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI);
3802297eecfbSDimitry Andric             assert(Callsite != MapTailCallCalleeVIToCallsite.end());
3803297eecfbSDimitry Andric             CloneCallsite(Callsite->second, CB, CalledFunction);
380406c3fb27SDimitry Andric           }
380506c3fb27SDimitry Andric         }
380606c3fb27SDimitry Andric         // Memprof and callsite metadata on memory allocations no longer needed.
380706c3fb27SDimitry Andric         I.setMetadata(LLVMContext::MD_memprof, nullptr);
380806c3fb27SDimitry Andric         I.setMetadata(LLVMContext::MD_callsite, nullptr);
380906c3fb27SDimitry Andric       }
381006c3fb27SDimitry Andric     }
381106c3fb27SDimitry Andric   }
381206c3fb27SDimitry Andric 
381306c3fb27SDimitry Andric   return Changed;
381406c3fb27SDimitry Andric }
381506c3fb27SDimitry Andric 
381606c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy>
381706c3fb27SDimitry Andric bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
381806c3fb27SDimitry Andric   if (DumpCCG) {
381906c3fb27SDimitry Andric     dbgs() << "CCG before cloning:\n";
382006c3fb27SDimitry Andric     dbgs() << *this;
382106c3fb27SDimitry Andric   }
382206c3fb27SDimitry Andric   if (ExportToDot)
382306c3fb27SDimitry Andric     exportToDot("postbuild");
382406c3fb27SDimitry Andric 
382506c3fb27SDimitry Andric   if (VerifyCCG) {
382606c3fb27SDimitry Andric     check();
382706c3fb27SDimitry Andric   }
382806c3fb27SDimitry Andric 
382906c3fb27SDimitry Andric   identifyClones();
383006c3fb27SDimitry Andric 
383106c3fb27SDimitry Andric   if (VerifyCCG) {
383206c3fb27SDimitry Andric     check();
383306c3fb27SDimitry Andric   }
383406c3fb27SDimitry Andric 
383506c3fb27SDimitry Andric   if (DumpCCG) {
383606c3fb27SDimitry Andric     dbgs() << "CCG after cloning:\n";
383706c3fb27SDimitry Andric     dbgs() << *this;
383806c3fb27SDimitry Andric   }
383906c3fb27SDimitry Andric   if (ExportToDot)
384006c3fb27SDimitry Andric     exportToDot("cloned");
384106c3fb27SDimitry Andric 
384206c3fb27SDimitry Andric   bool Changed = assignFunctions();
384306c3fb27SDimitry Andric 
384406c3fb27SDimitry Andric   if (DumpCCG) {
384506c3fb27SDimitry Andric     dbgs() << "CCG after assigning function clones:\n";
384606c3fb27SDimitry Andric     dbgs() << *this;
384706c3fb27SDimitry Andric   }
384806c3fb27SDimitry Andric   if (ExportToDot)
384906c3fb27SDimitry Andric     exportToDot("clonefuncassign");
385006c3fb27SDimitry Andric 
3851*0fca6ea1SDimitry Andric   if (MemProfReportHintedSizes)
3852*0fca6ea1SDimitry Andric     printTotalSizes(errs());
3853*0fca6ea1SDimitry Andric 
385406c3fb27SDimitry Andric   return Changed;
385506c3fb27SDimitry Andric }
385606c3fb27SDimitry Andric 
385706c3fb27SDimitry Andric bool MemProfContextDisambiguation::processModule(
385806c3fb27SDimitry Andric     Module &M,
38597a6dacacSDimitry Andric     llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
386006c3fb27SDimitry Andric 
386106c3fb27SDimitry Andric   // If we have an import summary, then the cloning decisions were made during
386206c3fb27SDimitry Andric   // the thin link on the index. Apply them and return.
386306c3fb27SDimitry Andric   if (ImportSummary)
386406c3fb27SDimitry Andric     return applyImport(M);
386506c3fb27SDimitry Andric 
386606c3fb27SDimitry Andric   // TODO: If/when other types of memprof cloning are enabled beyond just for
386706c3fb27SDimitry Andric   // hot and cold, we will need to change this to individually control the
386806c3fb27SDimitry Andric   // AllocationType passed to addStackNodesForMIB during CCG construction.
386906c3fb27SDimitry Andric   // Note that we specifically check this after applying imports above, so that
387006c3fb27SDimitry Andric   // the option isn't needed to be passed to distributed ThinLTO backend
387106c3fb27SDimitry Andric   // clang processes, which won't necessarily have visibility into the linker
387206c3fb27SDimitry Andric   // dependences. Instead the information is communicated from the LTO link to
387306c3fb27SDimitry Andric   // the backends via the combined summary index.
387406c3fb27SDimitry Andric   if (!SupportsHotColdNew)
387506c3fb27SDimitry Andric     return false;
387606c3fb27SDimitry Andric 
387706c3fb27SDimitry Andric   ModuleCallsiteContextGraph CCG(M, OREGetter);
387806c3fb27SDimitry Andric   return CCG.process();
387906c3fb27SDimitry Andric }
388006c3fb27SDimitry Andric 
388106c3fb27SDimitry Andric MemProfContextDisambiguation::MemProfContextDisambiguation(
388206c3fb27SDimitry Andric     const ModuleSummaryIndex *Summary)
388306c3fb27SDimitry Andric     : ImportSummary(Summary) {
388406c3fb27SDimitry Andric   if (ImportSummary) {
388506c3fb27SDimitry Andric     // The MemProfImportSummary should only be used for testing ThinLTO
388606c3fb27SDimitry Andric     // distributed backend handling via opt, in which case we don't have a
388706c3fb27SDimitry Andric     // summary from the pass pipeline.
388806c3fb27SDimitry Andric     assert(MemProfImportSummary.empty());
388906c3fb27SDimitry Andric     return;
389006c3fb27SDimitry Andric   }
389106c3fb27SDimitry Andric   if (MemProfImportSummary.empty())
389206c3fb27SDimitry Andric     return;
389306c3fb27SDimitry Andric 
389406c3fb27SDimitry Andric   auto ReadSummaryFile =
389506c3fb27SDimitry Andric       errorOrToExpected(MemoryBuffer::getFile(MemProfImportSummary));
389606c3fb27SDimitry Andric   if (!ReadSummaryFile) {
389706c3fb27SDimitry Andric     logAllUnhandledErrors(ReadSummaryFile.takeError(), errs(),
389806c3fb27SDimitry Andric                           "Error loading file '" + MemProfImportSummary +
389906c3fb27SDimitry Andric                               "': ");
390006c3fb27SDimitry Andric     return;
390106c3fb27SDimitry Andric   }
390206c3fb27SDimitry Andric   auto ImportSummaryForTestingOrErr = getModuleSummaryIndex(**ReadSummaryFile);
390306c3fb27SDimitry Andric   if (!ImportSummaryForTestingOrErr) {
390406c3fb27SDimitry Andric     logAllUnhandledErrors(ImportSummaryForTestingOrErr.takeError(), errs(),
390506c3fb27SDimitry Andric                           "Error parsing file '" + MemProfImportSummary +
390606c3fb27SDimitry Andric                               "': ");
390706c3fb27SDimitry Andric     return;
390806c3fb27SDimitry Andric   }
390906c3fb27SDimitry Andric   ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
391006c3fb27SDimitry Andric   ImportSummary = ImportSummaryForTesting.get();
391106c3fb27SDimitry Andric }
391206c3fb27SDimitry Andric 
391306c3fb27SDimitry Andric PreservedAnalyses MemProfContextDisambiguation::run(Module &M,
391406c3fb27SDimitry Andric                                                     ModuleAnalysisManager &AM) {
391506c3fb27SDimitry Andric   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
391606c3fb27SDimitry Andric   auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
391706c3fb27SDimitry Andric     return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
391806c3fb27SDimitry Andric   };
391906c3fb27SDimitry Andric   if (!processModule(M, OREGetter))
392006c3fb27SDimitry Andric     return PreservedAnalyses::all();
392106c3fb27SDimitry Andric   return PreservedAnalyses::none();
392206c3fb27SDimitry Andric }
392306c3fb27SDimitry Andric 
392406c3fb27SDimitry Andric void MemProfContextDisambiguation::run(
392506c3fb27SDimitry Andric     ModuleSummaryIndex &Index,
39267a6dacacSDimitry Andric     llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
392706c3fb27SDimitry Andric         isPrevailing) {
392806c3fb27SDimitry Andric   // TODO: If/when other types of memprof cloning are enabled beyond just for
392906c3fb27SDimitry Andric   // hot and cold, we will need to change this to individually control the
393006c3fb27SDimitry Andric   // AllocationType passed to addStackNodesForMIB during CCG construction.
393106c3fb27SDimitry Andric   // The index was set from the option, so these should be in sync.
393206c3fb27SDimitry Andric   assert(Index.withSupportsHotColdNew() == SupportsHotColdNew);
393306c3fb27SDimitry Andric   if (!SupportsHotColdNew)
393406c3fb27SDimitry Andric     return;
393506c3fb27SDimitry Andric 
393606c3fb27SDimitry Andric   IndexCallsiteContextGraph CCG(Index, isPrevailing);
393706c3fb27SDimitry Andric   CCG.process();
393806c3fb27SDimitry Andric }
3939