106c3fb27SDimitry Andric //==-- MemProfContextDisambiguation.cpp - Disambiguate contexts -------------=// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric // 906c3fb27SDimitry Andric // This file implements support for context disambiguation of allocation 1006c3fb27SDimitry Andric // calls for profile guided heap optimization. Specifically, it uses Memprof 1106c3fb27SDimitry Andric // profiles which indicate context specific allocation behavior (currently 1206c3fb27SDimitry Andric // distinguishing cold vs hot memory allocations). Cloning is performed to 1306c3fb27SDimitry Andric // expose the cold allocation call contexts, and the allocation calls are 1406c3fb27SDimitry Andric // subsequently annotated with an attribute for later transformation. 1506c3fb27SDimitry Andric // 1606c3fb27SDimitry Andric // The transformations can be performed either directly on IR (regular LTO), or 1706c3fb27SDimitry Andric // on a ThinLTO index (and later applied to the IR during the ThinLTO backend). 1806c3fb27SDimitry Andric // Both types of LTO operate on a the same base graph representation, which 1906c3fb27SDimitry Andric // uses CRTP to support either IR or Index formats. 2006c3fb27SDimitry Andric // 2106c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 2206c3fb27SDimitry Andric 2306c3fb27SDimitry Andric #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" 2406c3fb27SDimitry Andric #include "llvm/ADT/DenseMap.h" 2506c3fb27SDimitry Andric #include "llvm/ADT/DenseSet.h" 2606c3fb27SDimitry Andric #include "llvm/ADT/MapVector.h" 2706c3fb27SDimitry Andric #include "llvm/ADT/SetOperations.h" 2806c3fb27SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 2906c3fb27SDimitry Andric #include "llvm/ADT/SmallSet.h" 3006c3fb27SDimitry Andric #include "llvm/ADT/SmallVector.h" 3106c3fb27SDimitry Andric #include "llvm/ADT/Statistic.h" 3206c3fb27SDimitry Andric #include "llvm/Analysis/MemoryProfileInfo.h" 3306c3fb27SDimitry Andric #include "llvm/Analysis/ModuleSummaryAnalysis.h" 3406c3fb27SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 3506c3fb27SDimitry Andric #include "llvm/Bitcode/BitcodeReader.h" 3606c3fb27SDimitry Andric #include "llvm/IR/Constants.h" 3706c3fb27SDimitry Andric #include "llvm/IR/Instructions.h" 3806c3fb27SDimitry Andric #include "llvm/IR/Module.h" 3906c3fb27SDimitry Andric #include "llvm/IR/ModuleSummaryIndex.h" 4006c3fb27SDimitry Andric #include "llvm/Pass.h" 4106c3fb27SDimitry Andric #include "llvm/Support/CommandLine.h" 4206c3fb27SDimitry Andric #include "llvm/Support/FileSystem.h" 4306c3fb27SDimitry Andric #include "llvm/Support/GraphWriter.h" 4406c3fb27SDimitry Andric #include "llvm/Support/raw_ostream.h" 4506c3fb27SDimitry Andric #include "llvm/Transforms/IPO.h" 4606c3fb27SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h" 4706c3fb27SDimitry Andric #include <sstream> 4806c3fb27SDimitry Andric #include <vector> 4906c3fb27SDimitry Andric using namespace llvm; 5006c3fb27SDimitry Andric using namespace llvm::memprof; 5106c3fb27SDimitry Andric 5206c3fb27SDimitry Andric #define DEBUG_TYPE "memprof-context-disambiguation" 5306c3fb27SDimitry Andric 5406c3fb27SDimitry Andric STATISTIC(FunctionClonesAnalysis, 5506c3fb27SDimitry Andric "Number of function clones created during whole program analysis"); 5606c3fb27SDimitry Andric STATISTIC(FunctionClonesThinBackend, 5706c3fb27SDimitry Andric "Number of function clones created during ThinLTO backend"); 5806c3fb27SDimitry Andric STATISTIC(FunctionsClonedThinBackend, 5906c3fb27SDimitry Andric "Number of functions that had clones created during ThinLTO backend"); 6006c3fb27SDimitry Andric STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly " 6106c3fb27SDimitry Andric "cloned) during whole program analysis"); 6206c3fb27SDimitry Andric STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) " 6306c3fb27SDimitry Andric "during whole program analysis"); 6406c3fb27SDimitry Andric STATISTIC(AllocTypeNotColdThinBackend, 6506c3fb27SDimitry Andric "Number of not cold static allocations (possibly cloned) during " 6606c3fb27SDimitry Andric "ThinLTO backend"); 6706c3fb27SDimitry Andric STATISTIC(AllocTypeColdThinBackend, "Number of cold static allocations " 6806c3fb27SDimitry Andric "(possibly cloned) during ThinLTO backend"); 6906c3fb27SDimitry Andric STATISTIC(OrigAllocsThinBackend, 7006c3fb27SDimitry Andric "Number of original (not cloned) allocations with memprof profiles " 7106c3fb27SDimitry Andric "during ThinLTO backend"); 7206c3fb27SDimitry Andric STATISTIC( 7306c3fb27SDimitry Andric AllocVersionsThinBackend, 7406c3fb27SDimitry Andric "Number of allocation versions (including clones) during ThinLTO backend"); 7506c3fb27SDimitry Andric STATISTIC(MaxAllocVersionsThinBackend, 7606c3fb27SDimitry Andric "Maximum number of allocation versions created for an original " 7706c3fb27SDimitry Andric "allocation during ThinLTO backend"); 7806c3fb27SDimitry Andric STATISTIC(UnclonableAllocsThinBackend, 7906c3fb27SDimitry Andric "Number of unclonable ambigous allocations during ThinLTO backend"); 80*297eecfbSDimitry Andric STATISTIC(RemovedEdgesWithMismatchedCallees, 81*297eecfbSDimitry Andric "Number of edges removed due to mismatched callees (profiled vs IR)"); 82*297eecfbSDimitry Andric STATISTIC(FoundProfiledCalleeCount, 83*297eecfbSDimitry Andric "Number of profiled callees found via tail calls"); 84*297eecfbSDimitry Andric STATISTIC(FoundProfiledCalleeDepth, 85*297eecfbSDimitry Andric "Aggregate depth of profiled callees found via tail calls"); 86*297eecfbSDimitry Andric STATISTIC(FoundProfiledCalleeMaxDepth, 87*297eecfbSDimitry Andric "Maximum depth of profiled callees found via tail calls"); 88*297eecfbSDimitry Andric STATISTIC(FoundProfiledCalleeNonUniquelyCount, 89*297eecfbSDimitry Andric "Number of profiled callees found via multiple tail call chains"); 9006c3fb27SDimitry Andric 9106c3fb27SDimitry Andric static cl::opt<std::string> DotFilePathPrefix( 9206c3fb27SDimitry Andric "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, 9306c3fb27SDimitry Andric cl::value_desc("filename"), 9406c3fb27SDimitry Andric cl::desc("Specify the path prefix of the MemProf dot files.")); 9506c3fb27SDimitry Andric 9606c3fb27SDimitry Andric static cl::opt<bool> ExportToDot("memprof-export-to-dot", cl::init(false), 9706c3fb27SDimitry Andric cl::Hidden, 9806c3fb27SDimitry Andric cl::desc("Export graph to dot files.")); 9906c3fb27SDimitry Andric 10006c3fb27SDimitry Andric static cl::opt<bool> 10106c3fb27SDimitry Andric DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, 10206c3fb27SDimitry Andric cl::desc("Dump CallingContextGraph to stdout after each stage.")); 10306c3fb27SDimitry Andric 10406c3fb27SDimitry Andric static cl::opt<bool> 10506c3fb27SDimitry Andric VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, 10606c3fb27SDimitry Andric cl::desc("Perform verification checks on CallingContextGraph.")); 10706c3fb27SDimitry Andric 10806c3fb27SDimitry Andric static cl::opt<bool> 10906c3fb27SDimitry Andric VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, 11006c3fb27SDimitry Andric cl::desc("Perform frequent verification checks on nodes.")); 11106c3fb27SDimitry Andric 11206c3fb27SDimitry Andric static cl::opt<std::string> MemProfImportSummary( 11306c3fb27SDimitry Andric "memprof-import-summary", 11406c3fb27SDimitry Andric cl::desc("Import summary to use for testing the ThinLTO backend via opt"), 11506c3fb27SDimitry Andric cl::Hidden); 11606c3fb27SDimitry Andric 117*297eecfbSDimitry Andric static cl::opt<unsigned> 118*297eecfbSDimitry Andric TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), 119*297eecfbSDimitry Andric cl::Hidden, 120*297eecfbSDimitry Andric cl::desc("Max depth to recursively search for missing " 121*297eecfbSDimitry Andric "frames through tail calls.")); 122*297eecfbSDimitry Andric 1235f757f3fSDimitry Andric namespace llvm { 12406c3fb27SDimitry Andric // Indicate we are linking with an allocator that supports hot/cold operator 12506c3fb27SDimitry Andric // new interfaces. 12606c3fb27SDimitry Andric cl::opt<bool> SupportsHotColdNew( 12706c3fb27SDimitry Andric "supports-hot-cold-new", cl::init(false), cl::Hidden, 12806c3fb27SDimitry Andric cl::desc("Linking with hot/cold operator new interfaces")); 1295f757f3fSDimitry Andric } // namespace llvm 13006c3fb27SDimitry Andric 13106c3fb27SDimitry Andric namespace { 13206c3fb27SDimitry Andric /// CRTP base for graphs built from either IR or ThinLTO summary index. 13306c3fb27SDimitry Andric /// 13406c3fb27SDimitry Andric /// The graph represents the call contexts in all memprof metadata on allocation 13506c3fb27SDimitry Andric /// calls, with nodes for the allocations themselves, as well as for the calls 13606c3fb27SDimitry Andric /// in each context. The graph is initially built from the allocation memprof 13706c3fb27SDimitry Andric /// metadata (or summary) MIBs. It is then updated to match calls with callsite 13806c3fb27SDimitry Andric /// metadata onto the nodes, updating it to reflect any inlining performed on 13906c3fb27SDimitry Andric /// those calls. 14006c3fb27SDimitry Andric /// 14106c3fb27SDimitry Andric /// Each MIB (representing an allocation's call context with allocation 14206c3fb27SDimitry Andric /// behavior) is assigned a unique context id during the graph build. The edges 14306c3fb27SDimitry Andric /// and nodes in the graph are decorated with the context ids they carry. This 14406c3fb27SDimitry Andric /// is used to correctly update the graph when cloning is performed so that we 14506c3fb27SDimitry Andric /// can uniquify the context for a single (possibly cloned) allocation. 14606c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 14706c3fb27SDimitry Andric class CallsiteContextGraph { 14806c3fb27SDimitry Andric public: 14906c3fb27SDimitry Andric CallsiteContextGraph() = default; 15006c3fb27SDimitry Andric CallsiteContextGraph(const CallsiteContextGraph &) = default; 15106c3fb27SDimitry Andric CallsiteContextGraph(CallsiteContextGraph &&) = default; 15206c3fb27SDimitry Andric 15306c3fb27SDimitry Andric /// Main entry point to perform analysis and transformations on graph. 15406c3fb27SDimitry Andric bool process(); 15506c3fb27SDimitry Andric 15606c3fb27SDimitry Andric /// Perform cloning on the graph necessary to uniquely identify the allocation 15706c3fb27SDimitry Andric /// behavior of an allocation based on its context. 15806c3fb27SDimitry Andric void identifyClones(); 15906c3fb27SDimitry Andric 16006c3fb27SDimitry Andric /// Assign callsite clones to functions, cloning functions as needed to 16106c3fb27SDimitry Andric /// accommodate the combinations of their callsite clones reached by callers. 16206c3fb27SDimitry Andric /// For regular LTO this clones functions and callsites in the IR, but for 16306c3fb27SDimitry Andric /// ThinLTO the cloning decisions are noted in the summaries and later applied 16406c3fb27SDimitry Andric /// in applyImport. 16506c3fb27SDimitry Andric bool assignFunctions(); 16606c3fb27SDimitry Andric 16706c3fb27SDimitry Andric void dump() const; 16806c3fb27SDimitry Andric void print(raw_ostream &OS) const; 16906c3fb27SDimitry Andric 17006c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, 17106c3fb27SDimitry Andric const CallsiteContextGraph &CCG) { 17206c3fb27SDimitry Andric CCG.print(OS); 17306c3fb27SDimitry Andric return OS; 17406c3fb27SDimitry Andric } 17506c3fb27SDimitry Andric 17606c3fb27SDimitry Andric friend struct GraphTraits< 17706c3fb27SDimitry Andric const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>; 17806c3fb27SDimitry Andric friend struct DOTGraphTraits< 17906c3fb27SDimitry Andric const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>; 18006c3fb27SDimitry Andric 18106c3fb27SDimitry Andric void exportToDot(std::string Label) const; 18206c3fb27SDimitry Andric 18306c3fb27SDimitry Andric /// Represents a function clone via FuncTy pointer and clone number pair. 18406c3fb27SDimitry Andric struct FuncInfo final 18506c3fb27SDimitry Andric : public std::pair<FuncTy *, unsigned /*Clone number*/> { 18606c3fb27SDimitry Andric using Base = std::pair<FuncTy *, unsigned>; 18706c3fb27SDimitry Andric FuncInfo(const Base &B) : Base(B) {} 18806c3fb27SDimitry Andric FuncInfo(FuncTy *F = nullptr, unsigned CloneNo = 0) : Base(F, CloneNo) {} 18906c3fb27SDimitry Andric explicit operator bool() const { return this->first != nullptr; } 19006c3fb27SDimitry Andric FuncTy *func() const { return this->first; } 19106c3fb27SDimitry Andric unsigned cloneNo() const { return this->second; } 19206c3fb27SDimitry Andric }; 19306c3fb27SDimitry Andric 19406c3fb27SDimitry Andric /// Represents a callsite clone via CallTy and clone number pair. 19506c3fb27SDimitry Andric struct CallInfo final : public std::pair<CallTy, unsigned /*Clone number*/> { 19606c3fb27SDimitry Andric using Base = std::pair<CallTy, unsigned>; 19706c3fb27SDimitry Andric CallInfo(const Base &B) : Base(B) {} 19806c3fb27SDimitry Andric CallInfo(CallTy Call = nullptr, unsigned CloneNo = 0) 19906c3fb27SDimitry Andric : Base(Call, CloneNo) {} 20006c3fb27SDimitry Andric explicit operator bool() const { return (bool)this->first; } 20106c3fb27SDimitry Andric CallTy call() const { return this->first; } 20206c3fb27SDimitry Andric unsigned cloneNo() const { return this->second; } 20306c3fb27SDimitry Andric void setCloneNo(unsigned N) { this->second = N; } 20406c3fb27SDimitry Andric void print(raw_ostream &OS) const { 20506c3fb27SDimitry Andric if (!operator bool()) { 20606c3fb27SDimitry Andric assert(!cloneNo()); 20706c3fb27SDimitry Andric OS << "null Call"; 20806c3fb27SDimitry Andric return; 20906c3fb27SDimitry Andric } 21006c3fb27SDimitry Andric call()->print(OS); 21106c3fb27SDimitry Andric OS << "\t(clone " << cloneNo() << ")"; 21206c3fb27SDimitry Andric } 21306c3fb27SDimitry Andric void dump() const { 21406c3fb27SDimitry Andric print(dbgs()); 21506c3fb27SDimitry Andric dbgs() << "\n"; 21606c3fb27SDimitry Andric } 21706c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, const CallInfo &Call) { 21806c3fb27SDimitry Andric Call.print(OS); 21906c3fb27SDimitry Andric return OS; 22006c3fb27SDimitry Andric } 22106c3fb27SDimitry Andric }; 22206c3fb27SDimitry Andric 22306c3fb27SDimitry Andric struct ContextEdge; 22406c3fb27SDimitry Andric 22506c3fb27SDimitry Andric /// Node in the Callsite Context Graph 22606c3fb27SDimitry Andric struct ContextNode { 22706c3fb27SDimitry Andric // Keep this for now since in the IR case where we have an Instruction* it 22806c3fb27SDimitry Andric // is not as immediately discoverable. Used for printing richer information 22906c3fb27SDimitry Andric // when dumping graph. 23006c3fb27SDimitry Andric bool IsAllocation; 23106c3fb27SDimitry Andric 23206c3fb27SDimitry Andric // Keeps track of when the Call was reset to null because there was 23306c3fb27SDimitry Andric // recursion. 23406c3fb27SDimitry Andric bool Recursive = false; 23506c3fb27SDimitry Andric 23606c3fb27SDimitry Andric // The corresponding allocation or interior call. 23706c3fb27SDimitry Andric CallInfo Call; 23806c3fb27SDimitry Andric 23906c3fb27SDimitry Andric // For alloc nodes this is a unique id assigned when constructed, and for 24006c3fb27SDimitry Andric // callsite stack nodes it is the original stack id when the node is 24106c3fb27SDimitry Andric // constructed from the memprof MIB metadata on the alloc nodes. Note that 24206c3fb27SDimitry Andric // this is only used when matching callsite metadata onto the stack nodes 24306c3fb27SDimitry Andric // created when processing the allocation memprof MIBs, and for labeling 24406c3fb27SDimitry Andric // nodes in the dot graph. Therefore we don't bother to assign a value for 24506c3fb27SDimitry Andric // clones. 24606c3fb27SDimitry Andric uint64_t OrigStackOrAllocId = 0; 24706c3fb27SDimitry Andric 24806c3fb27SDimitry Andric // This will be formed by ORing together the AllocationType enum values 24906c3fb27SDimitry Andric // for contexts including this node. 25006c3fb27SDimitry Andric uint8_t AllocTypes = 0; 25106c3fb27SDimitry Andric 25206c3fb27SDimitry Andric // Edges to all callees in the profiled call stacks. 25306c3fb27SDimitry Andric // TODO: Should this be a map (from Callee node) for more efficient lookup? 25406c3fb27SDimitry Andric std::vector<std::shared_ptr<ContextEdge>> CalleeEdges; 25506c3fb27SDimitry Andric 25606c3fb27SDimitry Andric // Edges to all callers in the profiled call stacks. 25706c3fb27SDimitry Andric // TODO: Should this be a map (from Caller node) for more efficient lookup? 25806c3fb27SDimitry Andric std::vector<std::shared_ptr<ContextEdge>> CallerEdges; 25906c3fb27SDimitry Andric 26006c3fb27SDimitry Andric // The set of IDs for contexts including this node. 26106c3fb27SDimitry Andric DenseSet<uint32_t> ContextIds; 26206c3fb27SDimitry Andric 26306c3fb27SDimitry Andric // List of clones of this ContextNode, initially empty. 26406c3fb27SDimitry Andric std::vector<ContextNode *> Clones; 26506c3fb27SDimitry Andric 26606c3fb27SDimitry Andric // If a clone, points to the original uncloned node. 26706c3fb27SDimitry Andric ContextNode *CloneOf = nullptr; 26806c3fb27SDimitry Andric 26906c3fb27SDimitry Andric ContextNode(bool IsAllocation) : IsAllocation(IsAllocation), Call() {} 27006c3fb27SDimitry Andric 27106c3fb27SDimitry Andric ContextNode(bool IsAllocation, CallInfo C) 27206c3fb27SDimitry Andric : IsAllocation(IsAllocation), Call(C) {} 27306c3fb27SDimitry Andric 27406c3fb27SDimitry Andric void addClone(ContextNode *Clone) { 27506c3fb27SDimitry Andric if (CloneOf) { 27606c3fb27SDimitry Andric CloneOf->Clones.push_back(Clone); 27706c3fb27SDimitry Andric Clone->CloneOf = CloneOf; 27806c3fb27SDimitry Andric } else { 27906c3fb27SDimitry Andric Clones.push_back(Clone); 28006c3fb27SDimitry Andric assert(!Clone->CloneOf); 28106c3fb27SDimitry Andric Clone->CloneOf = this; 28206c3fb27SDimitry Andric } 28306c3fb27SDimitry Andric } 28406c3fb27SDimitry Andric 28506c3fb27SDimitry Andric ContextNode *getOrigNode() { 28606c3fb27SDimitry Andric if (!CloneOf) 28706c3fb27SDimitry Andric return this; 28806c3fb27SDimitry Andric return CloneOf; 28906c3fb27SDimitry Andric } 29006c3fb27SDimitry Andric 29106c3fb27SDimitry Andric void addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, 29206c3fb27SDimitry Andric unsigned int ContextId); 29306c3fb27SDimitry Andric 29406c3fb27SDimitry Andric ContextEdge *findEdgeFromCallee(const ContextNode *Callee); 29506c3fb27SDimitry Andric ContextEdge *findEdgeFromCaller(const ContextNode *Caller); 29606c3fb27SDimitry Andric void eraseCalleeEdge(const ContextEdge *Edge); 29706c3fb27SDimitry Andric void eraseCallerEdge(const ContextEdge *Edge); 29806c3fb27SDimitry Andric 29906c3fb27SDimitry Andric void setCall(CallInfo C) { Call = C; } 30006c3fb27SDimitry Andric 30106c3fb27SDimitry Andric bool hasCall() const { return (bool)Call.call(); } 30206c3fb27SDimitry Andric 30306c3fb27SDimitry Andric void printCall(raw_ostream &OS) const { Call.print(OS); } 30406c3fb27SDimitry Andric 30506c3fb27SDimitry Andric // True if this node was effectively removed from the graph, in which case 30606c3fb27SDimitry Andric // its context id set, caller edges, and callee edges should all be empty. 30706c3fb27SDimitry Andric bool isRemoved() const { 30806c3fb27SDimitry Andric assert(ContextIds.empty() == 30906c3fb27SDimitry Andric (CalleeEdges.empty() && CallerEdges.empty())); 31006c3fb27SDimitry Andric return ContextIds.empty(); 31106c3fb27SDimitry Andric } 31206c3fb27SDimitry Andric 31306c3fb27SDimitry Andric void dump() const; 31406c3fb27SDimitry Andric void print(raw_ostream &OS) const; 31506c3fb27SDimitry Andric 31606c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, const ContextNode &Node) { 31706c3fb27SDimitry Andric Node.print(OS); 31806c3fb27SDimitry Andric return OS; 31906c3fb27SDimitry Andric } 32006c3fb27SDimitry Andric }; 32106c3fb27SDimitry Andric 32206c3fb27SDimitry Andric /// Edge in the Callsite Context Graph from a ContextNode N to a caller or 32306c3fb27SDimitry Andric /// callee. 32406c3fb27SDimitry Andric struct ContextEdge { 32506c3fb27SDimitry Andric ContextNode *Callee; 32606c3fb27SDimitry Andric ContextNode *Caller; 32706c3fb27SDimitry Andric 32806c3fb27SDimitry Andric // This will be formed by ORing together the AllocationType enum values 32906c3fb27SDimitry Andric // for contexts including this edge. 33006c3fb27SDimitry Andric uint8_t AllocTypes = 0; 33106c3fb27SDimitry Andric 33206c3fb27SDimitry Andric // The set of IDs for contexts including this edge. 33306c3fb27SDimitry Andric DenseSet<uint32_t> ContextIds; 33406c3fb27SDimitry Andric 33506c3fb27SDimitry Andric ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t AllocType, 33606c3fb27SDimitry Andric DenseSet<uint32_t> ContextIds) 33706c3fb27SDimitry Andric : Callee(Callee), Caller(Caller), AllocTypes(AllocType), 33806c3fb27SDimitry Andric ContextIds(ContextIds) {} 33906c3fb27SDimitry Andric 34006c3fb27SDimitry Andric DenseSet<uint32_t> &getContextIds() { return ContextIds; } 34106c3fb27SDimitry Andric 34206c3fb27SDimitry Andric void dump() const; 34306c3fb27SDimitry Andric void print(raw_ostream &OS) const; 34406c3fb27SDimitry Andric 34506c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, const ContextEdge &Edge) { 34606c3fb27SDimitry Andric Edge.print(OS); 34706c3fb27SDimitry Andric return OS; 34806c3fb27SDimitry Andric } 34906c3fb27SDimitry Andric }; 35006c3fb27SDimitry Andric 35106c3fb27SDimitry Andric /// Helper to remove callee edges that have allocation type None (due to not 35206c3fb27SDimitry Andric /// carrying any context ids) after transformations. 35306c3fb27SDimitry Andric void removeNoneTypeCalleeEdges(ContextNode *Node); 35406c3fb27SDimitry Andric 35506c3fb27SDimitry Andric protected: 35606c3fb27SDimitry Andric /// Get a list of nodes corresponding to the stack ids in the given callsite 35706c3fb27SDimitry Andric /// context. 35806c3fb27SDimitry Andric template <class NodeT, class IteratorT> 35906c3fb27SDimitry Andric std::vector<uint64_t> 36006c3fb27SDimitry Andric getStackIdsWithContextNodes(CallStack<NodeT, IteratorT> &CallsiteContext); 36106c3fb27SDimitry Andric 36206c3fb27SDimitry Andric /// Adds nodes for the given allocation and any stack ids on its memprof MIB 36306c3fb27SDimitry Andric /// metadata (or summary). 36406c3fb27SDimitry Andric ContextNode *addAllocNode(CallInfo Call, const FuncTy *F); 36506c3fb27SDimitry Andric 36606c3fb27SDimitry Andric /// Adds nodes for the given MIB stack ids. 36706c3fb27SDimitry Andric template <class NodeT, class IteratorT> 36806c3fb27SDimitry Andric void addStackNodesForMIB(ContextNode *AllocNode, 36906c3fb27SDimitry Andric CallStack<NodeT, IteratorT> &StackContext, 37006c3fb27SDimitry Andric CallStack<NodeT, IteratorT> &CallsiteContext, 37106c3fb27SDimitry Andric AllocationType AllocType); 37206c3fb27SDimitry Andric 37306c3fb27SDimitry Andric /// Matches all callsite metadata (or summary) to the nodes created for 37406c3fb27SDimitry Andric /// allocation memprof MIB metadata, synthesizing new nodes to reflect any 37506c3fb27SDimitry Andric /// inlining performed on those callsite instructions. 37606c3fb27SDimitry Andric void updateStackNodes(); 37706c3fb27SDimitry Andric 37806c3fb27SDimitry Andric /// Update graph to conservatively handle any callsite stack nodes that target 37906c3fb27SDimitry Andric /// multiple different callee target functions. 38006c3fb27SDimitry Andric void handleCallsitesWithMultipleTargets(); 38106c3fb27SDimitry Andric 38206c3fb27SDimitry Andric /// Save lists of calls with MemProf metadata in each function, for faster 38306c3fb27SDimitry Andric /// iteration. 384*297eecfbSDimitry Andric MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata; 38506c3fb27SDimitry Andric 38606c3fb27SDimitry Andric /// Map from callsite node to the enclosing caller function. 38706c3fb27SDimitry Andric std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc; 38806c3fb27SDimitry Andric 38906c3fb27SDimitry Andric private: 39006c3fb27SDimitry Andric using EdgeIter = typename std::vector<std::shared_ptr<ContextEdge>>::iterator; 39106c3fb27SDimitry Andric 39206c3fb27SDimitry Andric using CallContextInfo = std::tuple<CallTy, std::vector<uint64_t>, 39306c3fb27SDimitry Andric const FuncTy *, DenseSet<uint32_t>>; 39406c3fb27SDimitry Andric 39506c3fb27SDimitry Andric /// Assigns the given Node to calls at or inlined into the location with 39606c3fb27SDimitry Andric /// the Node's stack id, after post order traversing and processing its 39706c3fb27SDimitry Andric /// caller nodes. Uses the call information recorded in the given 39806c3fb27SDimitry Andric /// StackIdToMatchingCalls map, and creates new nodes for inlined sequences 39906c3fb27SDimitry Andric /// as needed. Called by updateStackNodes which sets up the given 40006c3fb27SDimitry Andric /// StackIdToMatchingCalls map. 40106c3fb27SDimitry Andric void assignStackNodesPostOrder( 40206c3fb27SDimitry Andric ContextNode *Node, DenseSet<const ContextNode *> &Visited, 40306c3fb27SDimitry Andric DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls); 40406c3fb27SDimitry Andric 40506c3fb27SDimitry Andric /// Duplicates the given set of context ids, updating the provided 40606c3fb27SDimitry Andric /// map from each original id with the newly generated context ids, 40706c3fb27SDimitry Andric /// and returning the new duplicated id set. 40806c3fb27SDimitry Andric DenseSet<uint32_t> duplicateContextIds( 40906c3fb27SDimitry Andric const DenseSet<uint32_t> &StackSequenceContextIds, 41006c3fb27SDimitry Andric DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds); 41106c3fb27SDimitry Andric 41206c3fb27SDimitry Andric /// Propagates all duplicated context ids across the graph. 41306c3fb27SDimitry Andric void propagateDuplicateContextIds( 41406c3fb27SDimitry Andric const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds); 41506c3fb27SDimitry Andric 41606c3fb27SDimitry Andric /// Connect the NewNode to OrigNode's callees if TowardsCallee is true, 41706c3fb27SDimitry Andric /// else to its callers. Also updates OrigNode's edges to remove any context 41806c3fb27SDimitry Andric /// ids moved to the newly created edge. 41906c3fb27SDimitry Andric void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode, 42006c3fb27SDimitry Andric bool TowardsCallee); 42106c3fb27SDimitry Andric 42206c3fb27SDimitry Andric /// Get the stack id corresponding to the given Id or Index (for IR this will 42306c3fb27SDimitry Andric /// return itself, for a summary index this will return the id recorded in the 42406c3fb27SDimitry Andric /// index for that stack id index value). 42506c3fb27SDimitry Andric uint64_t getStackId(uint64_t IdOrIndex) const { 42606c3fb27SDimitry Andric return static_cast<const DerivedCCG *>(this)->getStackId(IdOrIndex); 42706c3fb27SDimitry Andric } 42806c3fb27SDimitry Andric 429*297eecfbSDimitry Andric /// Returns true if the given call targets the callee of the given edge, or if 430*297eecfbSDimitry Andric /// we were able to identify the call chain through intermediate tail calls. 431*297eecfbSDimitry Andric /// In the latter case new context nodes are added to the graph for the 432*297eecfbSDimitry Andric /// identified tail calls, and their synthesized nodes are added to 433*297eecfbSDimitry Andric /// TailCallToContextNodeMap. The EdgeIter is updated in either case to the 434*297eecfbSDimitry Andric /// next element after the input position (either incremented or updated after 435*297eecfbSDimitry Andric /// removing the old edge). 436*297eecfbSDimitry Andric bool 437*297eecfbSDimitry Andric calleesMatch(CallTy Call, EdgeIter &EI, 438*297eecfbSDimitry Andric MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap); 439*297eecfbSDimitry Andric 440*297eecfbSDimitry Andric /// Returns true if the given call targets the given function, or if we were 441*297eecfbSDimitry Andric /// able to identify the call chain through intermediate tail calls (in which 442*297eecfbSDimitry Andric /// case FoundCalleeChain will be populated). 443*297eecfbSDimitry Andric bool calleeMatchesFunc( 444*297eecfbSDimitry Andric CallTy Call, const FuncTy *Func, const FuncTy *CallerFunc, 445*297eecfbSDimitry Andric std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) { 446*297eecfbSDimitry Andric return static_cast<DerivedCCG *>(this)->calleeMatchesFunc( 447*297eecfbSDimitry Andric Call, Func, CallerFunc, FoundCalleeChain); 44806c3fb27SDimitry Andric } 44906c3fb27SDimitry Andric 45006c3fb27SDimitry Andric /// Get a list of nodes corresponding to the stack ids in the given 45106c3fb27SDimitry Andric /// callsite's context. 45206c3fb27SDimitry Andric std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) { 45306c3fb27SDimitry Andric return static_cast<DerivedCCG *>(this)->getStackIdsWithContextNodesForCall( 45406c3fb27SDimitry Andric Call); 45506c3fb27SDimitry Andric } 45606c3fb27SDimitry Andric 45706c3fb27SDimitry Andric /// Get the last stack id in the context for callsite. 45806c3fb27SDimitry Andric uint64_t getLastStackId(CallTy Call) { 45906c3fb27SDimitry Andric return static_cast<DerivedCCG *>(this)->getLastStackId(Call); 46006c3fb27SDimitry Andric } 46106c3fb27SDimitry Andric 46206c3fb27SDimitry Andric /// Update the allocation call to record type of allocated memory. 46306c3fb27SDimitry Andric void updateAllocationCall(CallInfo &Call, AllocationType AllocType) { 46406c3fb27SDimitry Andric AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++; 46506c3fb27SDimitry Andric static_cast<DerivedCCG *>(this)->updateAllocationCall(Call, AllocType); 46606c3fb27SDimitry Andric } 46706c3fb27SDimitry Andric 46806c3fb27SDimitry Andric /// Update non-allocation call to invoke (possibly cloned) function 46906c3fb27SDimitry Andric /// CalleeFunc. 47006c3fb27SDimitry Andric void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) { 47106c3fb27SDimitry Andric static_cast<DerivedCCG *>(this)->updateCall(CallerCall, CalleeFunc); 47206c3fb27SDimitry Andric } 47306c3fb27SDimitry Andric 47406c3fb27SDimitry Andric /// Clone the given function for the given callsite, recording mapping of all 47506c3fb27SDimitry Andric /// of the functions tracked calls to their new versions in the CallMap. 47606c3fb27SDimitry Andric /// Assigns new clones to clone number CloneNo. 47706c3fb27SDimitry Andric FuncInfo cloneFunctionForCallsite( 47806c3fb27SDimitry Andric FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap, 47906c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) { 48006c3fb27SDimitry Andric return static_cast<DerivedCCG *>(this)->cloneFunctionForCallsite( 48106c3fb27SDimitry Andric Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo); 48206c3fb27SDimitry Andric } 48306c3fb27SDimitry Andric 48406c3fb27SDimitry Andric /// Gets a label to use in the dot graph for the given call clone in the given 48506c3fb27SDimitry Andric /// function. 48606c3fb27SDimitry Andric std::string getLabel(const FuncTy *Func, const CallTy Call, 48706c3fb27SDimitry Andric unsigned CloneNo) const { 48806c3fb27SDimitry Andric return static_cast<const DerivedCCG *>(this)->getLabel(Func, Call, CloneNo); 48906c3fb27SDimitry Andric } 49006c3fb27SDimitry Andric 49106c3fb27SDimitry Andric /// Helpers to find the node corresponding to the given call or stackid. 49206c3fb27SDimitry Andric ContextNode *getNodeForInst(const CallInfo &C); 49306c3fb27SDimitry Andric ContextNode *getNodeForAlloc(const CallInfo &C); 49406c3fb27SDimitry Andric ContextNode *getNodeForStackId(uint64_t StackId); 49506c3fb27SDimitry Andric 49606c3fb27SDimitry Andric /// Removes the node information recorded for the given call. 49706c3fb27SDimitry Andric void unsetNodeForInst(const CallInfo &C); 49806c3fb27SDimitry Andric 49906c3fb27SDimitry Andric /// Computes the alloc type corresponding to the given context ids, by 50006c3fb27SDimitry Andric /// unioning their recorded alloc types. 50106c3fb27SDimitry Andric uint8_t computeAllocType(DenseSet<uint32_t> &ContextIds); 50206c3fb27SDimitry Andric 50306c3fb27SDimitry Andric /// Returns the alloction type of the intersection of the contexts of two 50406c3fb27SDimitry Andric /// nodes (based on their provided context id sets), optimized for the case 50506c3fb27SDimitry Andric /// when Node1Ids is smaller than Node2Ids. 50606c3fb27SDimitry Andric uint8_t intersectAllocTypesImpl(const DenseSet<uint32_t> &Node1Ids, 50706c3fb27SDimitry Andric const DenseSet<uint32_t> &Node2Ids); 50806c3fb27SDimitry Andric 50906c3fb27SDimitry Andric /// Returns the alloction type of the intersection of the contexts of two 51006c3fb27SDimitry Andric /// nodes (based on their provided context id sets). 51106c3fb27SDimitry Andric uint8_t intersectAllocTypes(const DenseSet<uint32_t> &Node1Ids, 51206c3fb27SDimitry Andric const DenseSet<uint32_t> &Node2Ids); 51306c3fb27SDimitry Andric 51406c3fb27SDimitry Andric /// Create a clone of Edge's callee and move Edge to that new callee node, 51506c3fb27SDimitry Andric /// performing the necessary context id and allocation type updates. 51606c3fb27SDimitry Andric /// If callee's caller edge iterator is supplied, it is updated when removing 51706c3fb27SDimitry Andric /// the edge from that list. 51806c3fb27SDimitry Andric ContextNode * 51906c3fb27SDimitry Andric moveEdgeToNewCalleeClone(const std::shared_ptr<ContextEdge> &Edge, 52006c3fb27SDimitry Andric EdgeIter *CallerEdgeI = nullptr); 52106c3fb27SDimitry Andric 52206c3fb27SDimitry Andric /// Change the callee of Edge to existing callee clone NewCallee, performing 52306c3fb27SDimitry Andric /// the necessary context id and allocation type updates. 52406c3fb27SDimitry Andric /// If callee's caller edge iterator is supplied, it is updated when removing 52506c3fb27SDimitry Andric /// the edge from that list. 52606c3fb27SDimitry Andric void moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge, 52706c3fb27SDimitry Andric ContextNode *NewCallee, 52806c3fb27SDimitry Andric EdgeIter *CallerEdgeI = nullptr, 52906c3fb27SDimitry Andric bool NewClone = false); 53006c3fb27SDimitry Andric 53106c3fb27SDimitry Andric /// Recursively perform cloning on the graph for the given Node and its 53206c3fb27SDimitry Andric /// callers, in order to uniquely identify the allocation behavior of an 53306c3fb27SDimitry Andric /// allocation given its context. 53406c3fb27SDimitry Andric void identifyClones(ContextNode *Node, 53506c3fb27SDimitry Andric DenseSet<const ContextNode *> &Visited); 53606c3fb27SDimitry Andric 53706c3fb27SDimitry Andric /// Map from each context ID to the AllocationType assigned to that context. 53806c3fb27SDimitry Andric std::map<uint32_t, AllocationType> ContextIdToAllocationType; 53906c3fb27SDimitry Andric 54006c3fb27SDimitry Andric /// Identifies the context node created for a stack id when adding the MIB 54106c3fb27SDimitry Andric /// contexts to the graph. This is used to locate the context nodes when 54206c3fb27SDimitry Andric /// trying to assign the corresponding callsites with those stack ids to these 54306c3fb27SDimitry Andric /// nodes. 54406c3fb27SDimitry Andric std::map<uint64_t, ContextNode *> StackEntryIdToContextNodeMap; 54506c3fb27SDimitry Andric 54606c3fb27SDimitry Andric /// Maps to track the calls to their corresponding nodes in the graph. 54706c3fb27SDimitry Andric MapVector<CallInfo, ContextNode *> AllocationCallToContextNodeMap; 54806c3fb27SDimitry Andric MapVector<CallInfo, ContextNode *> NonAllocationCallToContextNodeMap; 54906c3fb27SDimitry Andric 55006c3fb27SDimitry Andric /// Owner of all ContextNode unique_ptrs. 55106c3fb27SDimitry Andric std::vector<std::unique_ptr<ContextNode>> NodeOwner; 55206c3fb27SDimitry Andric 55306c3fb27SDimitry Andric /// Perform sanity checks on graph when requested. 55406c3fb27SDimitry Andric void check() const; 55506c3fb27SDimitry Andric 55606c3fb27SDimitry Andric /// Keeps track of the last unique context id assigned. 55706c3fb27SDimitry Andric unsigned int LastContextId = 0; 55806c3fb27SDimitry Andric }; 55906c3fb27SDimitry Andric 56006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 56106c3fb27SDimitry Andric using ContextNode = 56206c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode; 56306c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 56406c3fb27SDimitry Andric using ContextEdge = 56506c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge; 56606c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 56706c3fb27SDimitry Andric using FuncInfo = 56806c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo; 56906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 57006c3fb27SDimitry Andric using CallInfo = 57106c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo; 57206c3fb27SDimitry Andric 57306c3fb27SDimitry Andric /// CRTP derived class for graphs built from IR (regular LTO). 57406c3fb27SDimitry Andric class ModuleCallsiteContextGraph 57506c3fb27SDimitry Andric : public CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 57606c3fb27SDimitry Andric Instruction *> { 57706c3fb27SDimitry Andric public: 57806c3fb27SDimitry Andric ModuleCallsiteContextGraph( 57906c3fb27SDimitry Andric Module &M, 58006c3fb27SDimitry Andric function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter); 58106c3fb27SDimitry Andric 58206c3fb27SDimitry Andric private: 58306c3fb27SDimitry Andric friend CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 58406c3fb27SDimitry Andric Instruction *>; 58506c3fb27SDimitry Andric 58606c3fb27SDimitry Andric uint64_t getStackId(uint64_t IdOrIndex) const; 587*297eecfbSDimitry Andric bool calleeMatchesFunc( 588*297eecfbSDimitry Andric Instruction *Call, const Function *Func, const Function *CallerFunc, 589*297eecfbSDimitry Andric std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain); 590*297eecfbSDimitry Andric bool findProfiledCalleeThroughTailCalls( 591*297eecfbSDimitry Andric const Function *ProfiledCallee, Value *CurCallee, unsigned Depth, 592*297eecfbSDimitry Andric std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain, 593*297eecfbSDimitry Andric bool &FoundMultipleCalleeChains); 59406c3fb27SDimitry Andric uint64_t getLastStackId(Instruction *Call); 59506c3fb27SDimitry Andric std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *Call); 59606c3fb27SDimitry Andric void updateAllocationCall(CallInfo &Call, AllocationType AllocType); 59706c3fb27SDimitry Andric void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc); 59806c3fb27SDimitry Andric CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 59906c3fb27SDimitry Andric Instruction *>::FuncInfo 60006c3fb27SDimitry Andric cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call, 60106c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap, 60206c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, 60306c3fb27SDimitry Andric unsigned CloneNo); 60406c3fb27SDimitry Andric std::string getLabel(const Function *Func, const Instruction *Call, 60506c3fb27SDimitry Andric unsigned CloneNo) const; 60606c3fb27SDimitry Andric 60706c3fb27SDimitry Andric const Module &Mod; 60806c3fb27SDimitry Andric function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter; 60906c3fb27SDimitry Andric }; 61006c3fb27SDimitry Andric 61106c3fb27SDimitry Andric /// Represents a call in the summary index graph, which can either be an 61206c3fb27SDimitry Andric /// allocation or an interior callsite node in an allocation's context. 61306c3fb27SDimitry Andric /// Holds a pointer to the corresponding data structure in the index. 61406c3fb27SDimitry Andric struct IndexCall : public PointerUnion<CallsiteInfo *, AllocInfo *> { 61506c3fb27SDimitry Andric IndexCall() : PointerUnion() {} 61606c3fb27SDimitry Andric IndexCall(std::nullptr_t) : IndexCall() {} 61706c3fb27SDimitry Andric IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {} 61806c3fb27SDimitry Andric IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {} 61906c3fb27SDimitry Andric IndexCall(PointerUnion PT) : PointerUnion(PT) {} 62006c3fb27SDimitry Andric 62106c3fb27SDimitry Andric IndexCall *operator->() { return this; } 62206c3fb27SDimitry Andric 62306c3fb27SDimitry Andric PointerUnion<CallsiteInfo *, AllocInfo *> getBase() const { return *this; } 62406c3fb27SDimitry Andric 62506c3fb27SDimitry Andric void print(raw_ostream &OS) const { 62606c3fb27SDimitry Andric if (auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(getBase())) { 62706c3fb27SDimitry Andric OS << *AI; 62806c3fb27SDimitry Andric } else { 62906c3fb27SDimitry Andric auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(getBase()); 63006c3fb27SDimitry Andric assert(CI); 63106c3fb27SDimitry Andric OS << *CI; 63206c3fb27SDimitry Andric } 63306c3fb27SDimitry Andric } 63406c3fb27SDimitry Andric }; 63506c3fb27SDimitry Andric 63606c3fb27SDimitry Andric /// CRTP derived class for graphs built from summary index (ThinLTO). 63706c3fb27SDimitry Andric class IndexCallsiteContextGraph 63806c3fb27SDimitry Andric : public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 63906c3fb27SDimitry Andric IndexCall> { 64006c3fb27SDimitry Andric public: 64106c3fb27SDimitry Andric IndexCallsiteContextGraph( 64206c3fb27SDimitry Andric ModuleSummaryIndex &Index, 64306c3fb27SDimitry Andric function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 64406c3fb27SDimitry Andric isPrevailing); 64506c3fb27SDimitry Andric 646*297eecfbSDimitry Andric ~IndexCallsiteContextGraph() { 647*297eecfbSDimitry Andric // Now that we are done with the graph it is safe to add the new 648*297eecfbSDimitry Andric // CallsiteInfo structs to the function summary vectors. The graph nodes 649*297eecfbSDimitry Andric // point into locations within these vectors, so we don't want to add them 650*297eecfbSDimitry Andric // any earlier. 651*297eecfbSDimitry Andric for (auto &I : FunctionCalleesToSynthesizedCallsiteInfos) { 652*297eecfbSDimitry Andric auto *FS = I.first; 653*297eecfbSDimitry Andric for (auto &Callsite : I.second) 654*297eecfbSDimitry Andric FS->addCallsite(*Callsite.second); 655*297eecfbSDimitry Andric } 656*297eecfbSDimitry Andric } 657*297eecfbSDimitry Andric 65806c3fb27SDimitry Andric private: 65906c3fb27SDimitry Andric friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 66006c3fb27SDimitry Andric IndexCall>; 66106c3fb27SDimitry Andric 66206c3fb27SDimitry Andric uint64_t getStackId(uint64_t IdOrIndex) const; 663*297eecfbSDimitry Andric bool calleeMatchesFunc( 664*297eecfbSDimitry Andric IndexCall &Call, const FunctionSummary *Func, 665*297eecfbSDimitry Andric const FunctionSummary *CallerFunc, 666*297eecfbSDimitry Andric std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain); 667*297eecfbSDimitry Andric bool findProfiledCalleeThroughTailCalls( 668*297eecfbSDimitry Andric ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth, 669*297eecfbSDimitry Andric std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain, 670*297eecfbSDimitry Andric bool &FoundMultipleCalleeChains); 67106c3fb27SDimitry Andric uint64_t getLastStackId(IndexCall &Call); 67206c3fb27SDimitry Andric std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call); 67306c3fb27SDimitry Andric void updateAllocationCall(CallInfo &Call, AllocationType AllocType); 67406c3fb27SDimitry Andric void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc); 67506c3fb27SDimitry Andric CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 67606c3fb27SDimitry Andric IndexCall>::FuncInfo 67706c3fb27SDimitry Andric cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call, 67806c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap, 67906c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, 68006c3fb27SDimitry Andric unsigned CloneNo); 68106c3fb27SDimitry Andric std::string getLabel(const FunctionSummary *Func, const IndexCall &Call, 68206c3fb27SDimitry Andric unsigned CloneNo) const; 68306c3fb27SDimitry Andric 68406c3fb27SDimitry Andric // Saves mapping from function summaries containing memprof records back to 68506c3fb27SDimitry Andric // its VI, for use in checking and debugging. 68606c3fb27SDimitry Andric std::map<const FunctionSummary *, ValueInfo> FSToVIMap; 68706c3fb27SDimitry Andric 68806c3fb27SDimitry Andric const ModuleSummaryIndex &Index; 689*297eecfbSDimitry Andric function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 690*297eecfbSDimitry Andric isPrevailing; 691*297eecfbSDimitry Andric 692*297eecfbSDimitry Andric // Saves/owns the callsite info structures synthesized for missing tail call 693*297eecfbSDimitry Andric // frames that we discover while building the graph. 694*297eecfbSDimitry Andric // It maps from the summary of the function making the tail call, to a map 695*297eecfbSDimitry Andric // of callee ValueInfo to corresponding synthesized callsite info. 696*297eecfbSDimitry Andric std::unordered_map<FunctionSummary *, 697*297eecfbSDimitry Andric std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>> 698*297eecfbSDimitry Andric FunctionCalleesToSynthesizedCallsiteInfos; 69906c3fb27SDimitry Andric }; 70006c3fb27SDimitry Andric } // namespace 70106c3fb27SDimitry Andric 70206c3fb27SDimitry Andric namespace llvm { 70306c3fb27SDimitry Andric template <> 70406c3fb27SDimitry Andric struct DenseMapInfo<typename CallsiteContextGraph< 70506c3fb27SDimitry Andric ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo> 70606c3fb27SDimitry Andric : public DenseMapInfo<std::pair<Instruction *, unsigned>> {}; 70706c3fb27SDimitry Andric template <> 70806c3fb27SDimitry Andric struct DenseMapInfo<typename CallsiteContextGraph< 70906c3fb27SDimitry Andric IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo> 71006c3fb27SDimitry Andric : public DenseMapInfo<std::pair<IndexCall, unsigned>> {}; 71106c3fb27SDimitry Andric template <> 71206c3fb27SDimitry Andric struct DenseMapInfo<IndexCall> 71306c3fb27SDimitry Andric : public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {}; 71406c3fb27SDimitry Andric } // end namespace llvm 71506c3fb27SDimitry Andric 71606c3fb27SDimitry Andric namespace { 71706c3fb27SDimitry Andric 71806c3fb27SDimitry Andric struct FieldSeparator { 71906c3fb27SDimitry Andric bool Skip = true; 72006c3fb27SDimitry Andric const char *Sep; 72106c3fb27SDimitry Andric 72206c3fb27SDimitry Andric FieldSeparator(const char *Sep = ", ") : Sep(Sep) {} 72306c3fb27SDimitry Andric }; 72406c3fb27SDimitry Andric 72506c3fb27SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) { 72606c3fb27SDimitry Andric if (FS.Skip) { 72706c3fb27SDimitry Andric FS.Skip = false; 72806c3fb27SDimitry Andric return OS; 72906c3fb27SDimitry Andric } 73006c3fb27SDimitry Andric return OS << FS.Sep; 73106c3fb27SDimitry Andric } 73206c3fb27SDimitry Andric 73306c3fb27SDimitry Andric // Map the uint8_t alloc types (which may contain NotCold|Cold) to the alloc 73406c3fb27SDimitry Andric // type we should actually use on the corresponding allocation. 73506c3fb27SDimitry Andric // If we can't clone a node that has NotCold+Cold alloc type, we will fall 73606c3fb27SDimitry Andric // back to using NotCold. So don't bother cloning to distinguish NotCold+Cold 73706c3fb27SDimitry Andric // from NotCold. 73806c3fb27SDimitry Andric AllocationType allocTypeToUse(uint8_t AllocTypes) { 73906c3fb27SDimitry Andric assert(AllocTypes != (uint8_t)AllocationType::None); 74006c3fb27SDimitry Andric if (AllocTypes == 74106c3fb27SDimitry Andric ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold)) 74206c3fb27SDimitry Andric return AllocationType::NotCold; 74306c3fb27SDimitry Andric else 74406c3fb27SDimitry Andric return (AllocationType)AllocTypes; 74506c3fb27SDimitry Andric } 74606c3fb27SDimitry Andric 74706c3fb27SDimitry Andric // Helper to check if the alloc types for all edges recorded in the 74806c3fb27SDimitry Andric // InAllocTypes vector match the alloc types for all edges in the Edges 74906c3fb27SDimitry Andric // vector. 75006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 75106c3fb27SDimitry Andric bool allocTypesMatch( 75206c3fb27SDimitry Andric const std::vector<uint8_t> &InAllocTypes, 75306c3fb27SDimitry Andric const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>> 75406c3fb27SDimitry Andric &Edges) { 75506c3fb27SDimitry Andric return std::equal( 75606c3fb27SDimitry Andric InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), 75706c3fb27SDimitry Andric [](const uint8_t &l, 75806c3fb27SDimitry Andric const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) { 75906c3fb27SDimitry Andric // Can share if one of the edges is None type - don't 76006c3fb27SDimitry Andric // care about the type along that edge as it doesn't 76106c3fb27SDimitry Andric // exist for those context ids. 76206c3fb27SDimitry Andric if (l == (uint8_t)AllocationType::None || 76306c3fb27SDimitry Andric r->AllocTypes == (uint8_t)AllocationType::None) 76406c3fb27SDimitry Andric return true; 76506c3fb27SDimitry Andric return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes); 76606c3fb27SDimitry Andric }); 76706c3fb27SDimitry Andric } 76806c3fb27SDimitry Andric 76906c3fb27SDimitry Andric } // end anonymous namespace 77006c3fb27SDimitry Andric 77106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 77206c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 77306c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst( 77406c3fb27SDimitry Andric const CallInfo &C) { 77506c3fb27SDimitry Andric ContextNode *Node = getNodeForAlloc(C); 77606c3fb27SDimitry Andric if (Node) 77706c3fb27SDimitry Andric return Node; 77806c3fb27SDimitry Andric 77906c3fb27SDimitry Andric return NonAllocationCallToContextNodeMap.lookup(C); 78006c3fb27SDimitry Andric } 78106c3fb27SDimitry Andric 78206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 78306c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 78406c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc( 78506c3fb27SDimitry Andric const CallInfo &C) { 78606c3fb27SDimitry Andric return AllocationCallToContextNodeMap.lookup(C); 78706c3fb27SDimitry Andric } 78806c3fb27SDimitry Andric 78906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 79006c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 79106c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId( 79206c3fb27SDimitry Andric uint64_t StackId) { 79306c3fb27SDimitry Andric auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId); 79406c3fb27SDimitry Andric if (StackEntryNode != StackEntryIdToContextNodeMap.end()) 79506c3fb27SDimitry Andric return StackEntryNode->second; 79606c3fb27SDimitry Andric return nullptr; 79706c3fb27SDimitry Andric } 79806c3fb27SDimitry Andric 79906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 80006c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::unsetNodeForInst( 80106c3fb27SDimitry Andric const CallInfo &C) { 80206c3fb27SDimitry Andric AllocationCallToContextNodeMap.erase(C) || 80306c3fb27SDimitry Andric NonAllocationCallToContextNodeMap.erase(C); 80406c3fb27SDimitry Andric assert(!AllocationCallToContextNodeMap.count(C) && 80506c3fb27SDimitry Andric !NonAllocationCallToContextNodeMap.count(C)); 80606c3fb27SDimitry Andric } 80706c3fb27SDimitry Andric 80806c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 80906c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 81006c3fb27SDimitry Andric addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, 81106c3fb27SDimitry Andric unsigned int ContextId) { 81206c3fb27SDimitry Andric for (auto &Edge : CallerEdges) { 81306c3fb27SDimitry Andric if (Edge->Caller == Caller) { 81406c3fb27SDimitry Andric Edge->AllocTypes |= (uint8_t)AllocType; 81506c3fb27SDimitry Andric Edge->getContextIds().insert(ContextId); 81606c3fb27SDimitry Andric return; 81706c3fb27SDimitry Andric } 81806c3fb27SDimitry Andric } 81906c3fb27SDimitry Andric std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>( 82006c3fb27SDimitry Andric this, Caller, (uint8_t)AllocType, DenseSet<uint32_t>({ContextId})); 82106c3fb27SDimitry Andric CallerEdges.push_back(Edge); 82206c3fb27SDimitry Andric Caller->CalleeEdges.push_back(Edge); 82306c3fb27SDimitry Andric } 82406c3fb27SDimitry Andric 82506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 82606c3fb27SDimitry Andric void CallsiteContextGraph< 82706c3fb27SDimitry Andric DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) { 82806c3fb27SDimitry Andric for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) { 82906c3fb27SDimitry Andric auto Edge = *EI; 83006c3fb27SDimitry Andric if (Edge->AllocTypes == (uint8_t)AllocationType::None) { 83106c3fb27SDimitry Andric assert(Edge->ContextIds.empty()); 83206c3fb27SDimitry Andric Edge->Callee->eraseCallerEdge(Edge.get()); 83306c3fb27SDimitry Andric EI = Node->CalleeEdges.erase(EI); 83406c3fb27SDimitry Andric } else 83506c3fb27SDimitry Andric ++EI; 83606c3fb27SDimitry Andric } 83706c3fb27SDimitry Andric } 83806c3fb27SDimitry Andric 83906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 84006c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge * 84106c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 84206c3fb27SDimitry Andric findEdgeFromCallee(const ContextNode *Callee) { 84306c3fb27SDimitry Andric for (const auto &Edge : CalleeEdges) 84406c3fb27SDimitry Andric if (Edge->Callee == Callee) 84506c3fb27SDimitry Andric return Edge.get(); 84606c3fb27SDimitry Andric return nullptr; 84706c3fb27SDimitry Andric } 84806c3fb27SDimitry Andric 84906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 85006c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge * 85106c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 85206c3fb27SDimitry Andric findEdgeFromCaller(const ContextNode *Caller) { 85306c3fb27SDimitry Andric for (const auto &Edge : CallerEdges) 85406c3fb27SDimitry Andric if (Edge->Caller == Caller) 85506c3fb27SDimitry Andric return Edge.get(); 85606c3fb27SDimitry Andric return nullptr; 85706c3fb27SDimitry Andric } 85806c3fb27SDimitry Andric 85906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 86006c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 86106c3fb27SDimitry Andric eraseCalleeEdge(const ContextEdge *Edge) { 8625f757f3fSDimitry Andric auto EI = llvm::find_if( 8635f757f3fSDimitry Andric CalleeEdges, [Edge](const std::shared_ptr<ContextEdge> &CalleeEdge) { 86406c3fb27SDimitry Andric return CalleeEdge.get() == Edge; 86506c3fb27SDimitry Andric }); 86606c3fb27SDimitry Andric assert(EI != CalleeEdges.end()); 86706c3fb27SDimitry Andric CalleeEdges.erase(EI); 86806c3fb27SDimitry Andric } 86906c3fb27SDimitry Andric 87006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 87106c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 87206c3fb27SDimitry Andric eraseCallerEdge(const ContextEdge *Edge) { 8735f757f3fSDimitry Andric auto EI = llvm::find_if( 8745f757f3fSDimitry Andric CallerEdges, [Edge](const std::shared_ptr<ContextEdge> &CallerEdge) { 87506c3fb27SDimitry Andric return CallerEdge.get() == Edge; 87606c3fb27SDimitry Andric }); 87706c3fb27SDimitry Andric assert(EI != CallerEdges.end()); 87806c3fb27SDimitry Andric CallerEdges.erase(EI); 87906c3fb27SDimitry Andric } 88006c3fb27SDimitry Andric 88106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 88206c3fb27SDimitry Andric uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType( 88306c3fb27SDimitry Andric DenseSet<uint32_t> &ContextIds) { 88406c3fb27SDimitry Andric uint8_t BothTypes = 88506c3fb27SDimitry Andric (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold; 88606c3fb27SDimitry Andric uint8_t AllocType = (uint8_t)AllocationType::None; 88706c3fb27SDimitry Andric for (auto Id : ContextIds) { 88806c3fb27SDimitry Andric AllocType |= (uint8_t)ContextIdToAllocationType[Id]; 88906c3fb27SDimitry Andric // Bail early if alloc type reached both, no further refinement. 89006c3fb27SDimitry Andric if (AllocType == BothTypes) 89106c3fb27SDimitry Andric return AllocType; 89206c3fb27SDimitry Andric } 89306c3fb27SDimitry Andric return AllocType; 89406c3fb27SDimitry Andric } 89506c3fb27SDimitry Andric 89606c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 89706c3fb27SDimitry Andric uint8_t 89806c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl( 89906c3fb27SDimitry Andric const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) { 90006c3fb27SDimitry Andric uint8_t BothTypes = 90106c3fb27SDimitry Andric (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold; 90206c3fb27SDimitry Andric uint8_t AllocType = (uint8_t)AllocationType::None; 90306c3fb27SDimitry Andric for (auto Id : Node1Ids) { 90406c3fb27SDimitry Andric if (!Node2Ids.count(Id)) 90506c3fb27SDimitry Andric continue; 90606c3fb27SDimitry Andric AllocType |= (uint8_t)ContextIdToAllocationType[Id]; 90706c3fb27SDimitry Andric // Bail early if alloc type reached both, no further refinement. 90806c3fb27SDimitry Andric if (AllocType == BothTypes) 90906c3fb27SDimitry Andric return AllocType; 91006c3fb27SDimitry Andric } 91106c3fb27SDimitry Andric return AllocType; 91206c3fb27SDimitry Andric } 91306c3fb27SDimitry Andric 91406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 91506c3fb27SDimitry Andric uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes( 91606c3fb27SDimitry Andric const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) { 91706c3fb27SDimitry Andric if (Node1Ids.size() < Node2Ids.size()) 91806c3fb27SDimitry Andric return intersectAllocTypesImpl(Node1Ids, Node2Ids); 91906c3fb27SDimitry Andric else 92006c3fb27SDimitry Andric return intersectAllocTypesImpl(Node2Ids, Node1Ids); 92106c3fb27SDimitry Andric } 92206c3fb27SDimitry Andric 92306c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 92406c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 92506c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode( 92606c3fb27SDimitry Andric CallInfo Call, const FuncTy *F) { 92706c3fb27SDimitry Andric assert(!getNodeForAlloc(Call)); 92806c3fb27SDimitry Andric NodeOwner.push_back( 92906c3fb27SDimitry Andric std::make_unique<ContextNode>(/*IsAllocation=*/true, Call)); 93006c3fb27SDimitry Andric ContextNode *AllocNode = NodeOwner.back().get(); 93106c3fb27SDimitry Andric AllocationCallToContextNodeMap[Call] = AllocNode; 93206c3fb27SDimitry Andric NodeToCallingFunc[AllocNode] = F; 93306c3fb27SDimitry Andric // Use LastContextId as a uniq id for MIB allocation nodes. 93406c3fb27SDimitry Andric AllocNode->OrigStackOrAllocId = LastContextId; 93506c3fb27SDimitry Andric // Alloc type should be updated as we add in the MIBs. We should assert 93606c3fb27SDimitry Andric // afterwards that it is not still None. 93706c3fb27SDimitry Andric AllocNode->AllocTypes = (uint8_t)AllocationType::None; 93806c3fb27SDimitry Andric 93906c3fb27SDimitry Andric return AllocNode; 94006c3fb27SDimitry Andric } 94106c3fb27SDimitry Andric 94206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 94306c3fb27SDimitry Andric template <class NodeT, class IteratorT> 94406c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB( 94506c3fb27SDimitry Andric ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext, 94606c3fb27SDimitry Andric CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType) { 94706c3fb27SDimitry Andric // Treating the hot alloc type as NotCold before the disambiguation for "hot" 94806c3fb27SDimitry Andric // is done. 94906c3fb27SDimitry Andric if (AllocType == AllocationType::Hot) 95006c3fb27SDimitry Andric AllocType = AllocationType::NotCold; 95106c3fb27SDimitry Andric 95206c3fb27SDimitry Andric ContextIdToAllocationType[++LastContextId] = AllocType; 95306c3fb27SDimitry Andric 95406c3fb27SDimitry Andric // Update alloc type and context ids for this MIB. 95506c3fb27SDimitry Andric AllocNode->AllocTypes |= (uint8_t)AllocType; 95606c3fb27SDimitry Andric AllocNode->ContextIds.insert(LastContextId); 95706c3fb27SDimitry Andric 95806c3fb27SDimitry Andric // Now add or update nodes for each stack id in alloc's context. 95906c3fb27SDimitry Andric // Later when processing the stack ids on non-alloc callsites we will adjust 96006c3fb27SDimitry Andric // for any inlining in the context. 96106c3fb27SDimitry Andric ContextNode *PrevNode = AllocNode; 96206c3fb27SDimitry Andric // Look for recursion (direct recursion should have been collapsed by 96306c3fb27SDimitry Andric // module summary analysis, here we should just be detecting mutual 96406c3fb27SDimitry Andric // recursion). Mark these nodes so we don't try to clone. 96506c3fb27SDimitry Andric SmallSet<uint64_t, 8> StackIdSet; 96606c3fb27SDimitry Andric // Skip any on the allocation call (inlining). 96706c3fb27SDimitry Andric for (auto ContextIter = StackContext.beginAfterSharedPrefix(CallsiteContext); 96806c3fb27SDimitry Andric ContextIter != StackContext.end(); ++ContextIter) { 96906c3fb27SDimitry Andric auto StackId = getStackId(*ContextIter); 97006c3fb27SDimitry Andric ContextNode *StackNode = getNodeForStackId(StackId); 97106c3fb27SDimitry Andric if (!StackNode) { 97206c3fb27SDimitry Andric NodeOwner.push_back( 97306c3fb27SDimitry Andric std::make_unique<ContextNode>(/*IsAllocation=*/false)); 97406c3fb27SDimitry Andric StackNode = NodeOwner.back().get(); 97506c3fb27SDimitry Andric StackEntryIdToContextNodeMap[StackId] = StackNode; 97606c3fb27SDimitry Andric StackNode->OrigStackOrAllocId = StackId; 97706c3fb27SDimitry Andric } 97806c3fb27SDimitry Andric auto Ins = StackIdSet.insert(StackId); 97906c3fb27SDimitry Andric if (!Ins.second) 98006c3fb27SDimitry Andric StackNode->Recursive = true; 98106c3fb27SDimitry Andric StackNode->ContextIds.insert(LastContextId); 98206c3fb27SDimitry Andric StackNode->AllocTypes |= (uint8_t)AllocType; 98306c3fb27SDimitry Andric PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId); 98406c3fb27SDimitry Andric PrevNode = StackNode; 98506c3fb27SDimitry Andric } 98606c3fb27SDimitry Andric } 98706c3fb27SDimitry Andric 98806c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 98906c3fb27SDimitry Andric DenseSet<uint32_t> 99006c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds( 99106c3fb27SDimitry Andric const DenseSet<uint32_t> &StackSequenceContextIds, 99206c3fb27SDimitry Andric DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) { 99306c3fb27SDimitry Andric DenseSet<uint32_t> NewContextIds; 99406c3fb27SDimitry Andric for (auto OldId : StackSequenceContextIds) { 99506c3fb27SDimitry Andric NewContextIds.insert(++LastContextId); 99606c3fb27SDimitry Andric OldToNewContextIds[OldId].insert(LastContextId); 99706c3fb27SDimitry Andric assert(ContextIdToAllocationType.count(OldId)); 99806c3fb27SDimitry Andric // The new context has the same allocation type as original. 99906c3fb27SDimitry Andric ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId]; 100006c3fb27SDimitry Andric } 100106c3fb27SDimitry Andric return NewContextIds; 100206c3fb27SDimitry Andric } 100306c3fb27SDimitry Andric 100406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 100506c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: 100606c3fb27SDimitry Andric propagateDuplicateContextIds( 100706c3fb27SDimitry Andric const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) { 100806c3fb27SDimitry Andric // Build a set of duplicated context ids corresponding to the input id set. 100906c3fb27SDimitry Andric auto GetNewIds = [&OldToNewContextIds](const DenseSet<uint32_t> &ContextIds) { 101006c3fb27SDimitry Andric DenseSet<uint32_t> NewIds; 101106c3fb27SDimitry Andric for (auto Id : ContextIds) 101206c3fb27SDimitry Andric if (auto NewId = OldToNewContextIds.find(Id); 101306c3fb27SDimitry Andric NewId != OldToNewContextIds.end()) 101406c3fb27SDimitry Andric NewIds.insert(NewId->second.begin(), NewId->second.end()); 101506c3fb27SDimitry Andric return NewIds; 101606c3fb27SDimitry Andric }; 101706c3fb27SDimitry Andric 101806c3fb27SDimitry Andric // Recursively update context ids sets along caller edges. 101906c3fb27SDimitry Andric auto UpdateCallers = [&](ContextNode *Node, 102006c3fb27SDimitry Andric DenseSet<const ContextEdge *> &Visited, 102106c3fb27SDimitry Andric auto &&UpdateCallers) -> void { 102206c3fb27SDimitry Andric for (const auto &Edge : Node->CallerEdges) { 102306c3fb27SDimitry Andric auto Inserted = Visited.insert(Edge.get()); 102406c3fb27SDimitry Andric if (!Inserted.second) 102506c3fb27SDimitry Andric continue; 102606c3fb27SDimitry Andric ContextNode *NextNode = Edge->Caller; 102706c3fb27SDimitry Andric DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Edge->getContextIds()); 102806c3fb27SDimitry Andric // Only need to recursively iterate to NextNode via this caller edge if 102906c3fb27SDimitry Andric // it resulted in any added ids to NextNode. 103006c3fb27SDimitry Andric if (!NewIdsToAdd.empty()) { 103106c3fb27SDimitry Andric Edge->getContextIds().insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); 103206c3fb27SDimitry Andric NextNode->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); 103306c3fb27SDimitry Andric UpdateCallers(NextNode, Visited, UpdateCallers); 103406c3fb27SDimitry Andric } 103506c3fb27SDimitry Andric } 103606c3fb27SDimitry Andric }; 103706c3fb27SDimitry Andric 103806c3fb27SDimitry Andric DenseSet<const ContextEdge *> Visited; 103906c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) { 104006c3fb27SDimitry Andric auto *Node = Entry.second; 104106c3fb27SDimitry Andric // Update ids on the allocation nodes before calling the recursive 104206c3fb27SDimitry Andric // update along caller edges, since this simplifies the logic during 104306c3fb27SDimitry Andric // that traversal. 104406c3fb27SDimitry Andric DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Node->ContextIds); 104506c3fb27SDimitry Andric Node->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); 104606c3fb27SDimitry Andric UpdateCallers(Node, Visited, UpdateCallers); 104706c3fb27SDimitry Andric } 104806c3fb27SDimitry Andric } 104906c3fb27SDimitry Andric 105006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 105106c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode( 105206c3fb27SDimitry Andric ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee) { 105306c3fb27SDimitry Andric // Make a copy of the context ids, since this will be adjusted below as they 105406c3fb27SDimitry Andric // are moved. 105506c3fb27SDimitry Andric DenseSet<uint32_t> RemainingContextIds = NewNode->ContextIds; 105606c3fb27SDimitry Andric auto &OrigEdges = 105706c3fb27SDimitry Andric TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges; 105806c3fb27SDimitry Andric // Increment iterator in loop so that we can remove edges as needed. 105906c3fb27SDimitry Andric for (auto EI = OrigEdges.begin(); EI != OrigEdges.end();) { 106006c3fb27SDimitry Andric auto Edge = *EI; 106106c3fb27SDimitry Andric // Remove any matching context ids from Edge, return set that were found and 106206c3fb27SDimitry Andric // removed, these are the new edge's context ids. Also update the remaining 106306c3fb27SDimitry Andric // (not found ids). 106406c3fb27SDimitry Andric DenseSet<uint32_t> NewEdgeContextIds, NotFoundContextIds; 106506c3fb27SDimitry Andric set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds, 106606c3fb27SDimitry Andric NotFoundContextIds); 106706c3fb27SDimitry Andric RemainingContextIds.swap(NotFoundContextIds); 106806c3fb27SDimitry Andric // If no matching context ids for this edge, skip it. 106906c3fb27SDimitry Andric if (NewEdgeContextIds.empty()) { 107006c3fb27SDimitry Andric ++EI; 107106c3fb27SDimitry Andric continue; 107206c3fb27SDimitry Andric } 107306c3fb27SDimitry Andric if (TowardsCallee) { 107406c3fb27SDimitry Andric auto NewEdge = std::make_shared<ContextEdge>( 107506c3fb27SDimitry Andric Edge->Callee, NewNode, computeAllocType(NewEdgeContextIds), 107606c3fb27SDimitry Andric NewEdgeContextIds); 107706c3fb27SDimitry Andric NewNode->CalleeEdges.push_back(NewEdge); 107806c3fb27SDimitry Andric NewEdge->Callee->CallerEdges.push_back(NewEdge); 107906c3fb27SDimitry Andric } else { 108006c3fb27SDimitry Andric auto NewEdge = std::make_shared<ContextEdge>( 108106c3fb27SDimitry Andric NewNode, Edge->Caller, computeAllocType(NewEdgeContextIds), 108206c3fb27SDimitry Andric NewEdgeContextIds); 108306c3fb27SDimitry Andric NewNode->CallerEdges.push_back(NewEdge); 108406c3fb27SDimitry Andric NewEdge->Caller->CalleeEdges.push_back(NewEdge); 108506c3fb27SDimitry Andric } 108606c3fb27SDimitry Andric // Remove old edge if context ids empty. 108706c3fb27SDimitry Andric if (Edge->getContextIds().empty()) { 108806c3fb27SDimitry Andric if (TowardsCallee) { 108906c3fb27SDimitry Andric Edge->Callee->eraseCallerEdge(Edge.get()); 109006c3fb27SDimitry Andric EI = OrigNode->CalleeEdges.erase(EI); 109106c3fb27SDimitry Andric } else { 109206c3fb27SDimitry Andric Edge->Caller->eraseCalleeEdge(Edge.get()); 109306c3fb27SDimitry Andric EI = OrigNode->CallerEdges.erase(EI); 109406c3fb27SDimitry Andric } 109506c3fb27SDimitry Andric continue; 109606c3fb27SDimitry Andric } 109706c3fb27SDimitry Andric ++EI; 109806c3fb27SDimitry Andric } 109906c3fb27SDimitry Andric } 110006c3fb27SDimitry Andric 110106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 110206c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: 110306c3fb27SDimitry Andric assignStackNodesPostOrder(ContextNode *Node, 110406c3fb27SDimitry Andric DenseSet<const ContextNode *> &Visited, 110506c3fb27SDimitry Andric DenseMap<uint64_t, std::vector<CallContextInfo>> 110606c3fb27SDimitry Andric &StackIdToMatchingCalls) { 110706c3fb27SDimitry Andric auto Inserted = Visited.insert(Node); 110806c3fb27SDimitry Andric if (!Inserted.second) 110906c3fb27SDimitry Andric return; 111006c3fb27SDimitry Andric // Post order traversal. Iterate over a copy since we may add nodes and 111106c3fb27SDimitry Andric // therefore new callers during the recursive call, invalidating any 111206c3fb27SDimitry Andric // iterator over the original edge vector. We don't need to process these 111306c3fb27SDimitry Andric // new nodes as they were already processed on creation. 111406c3fb27SDimitry Andric auto CallerEdges = Node->CallerEdges; 111506c3fb27SDimitry Andric for (auto &Edge : CallerEdges) { 111606c3fb27SDimitry Andric // Skip any that have been removed during the recursion. 111706c3fb27SDimitry Andric if (!Edge) 111806c3fb27SDimitry Andric continue; 111906c3fb27SDimitry Andric assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls); 112006c3fb27SDimitry Andric } 112106c3fb27SDimitry Andric 112206c3fb27SDimitry Andric // If this node's stack id is in the map, update the graph to contain new 112306c3fb27SDimitry Andric // nodes representing any inlining at interior callsites. Note we move the 112406c3fb27SDimitry Andric // associated context ids over to the new nodes. 112506c3fb27SDimitry Andric 112606c3fb27SDimitry Andric // Ignore this node if it is for an allocation or we didn't record any 112706c3fb27SDimitry Andric // stack id lists ending at it. 112806c3fb27SDimitry Andric if (Node->IsAllocation || 112906c3fb27SDimitry Andric !StackIdToMatchingCalls.count(Node->OrigStackOrAllocId)) 113006c3fb27SDimitry Andric return; 113106c3fb27SDimitry Andric 113206c3fb27SDimitry Andric auto &Calls = StackIdToMatchingCalls[Node->OrigStackOrAllocId]; 113306c3fb27SDimitry Andric // Handle the simple case first. A single call with a single stack id. 113406c3fb27SDimitry Andric // In this case there is no need to create any new context nodes, simply 113506c3fb27SDimitry Andric // assign the context node for stack id to this Call. 113606c3fb27SDimitry Andric if (Calls.size() == 1) { 113706c3fb27SDimitry Andric auto &[Call, Ids, Func, SavedContextIds] = Calls[0]; 113806c3fb27SDimitry Andric if (Ids.size() == 1) { 113906c3fb27SDimitry Andric assert(SavedContextIds.empty()); 114006c3fb27SDimitry Andric // It should be this Node 114106c3fb27SDimitry Andric assert(Node == getNodeForStackId(Ids[0])); 114206c3fb27SDimitry Andric if (Node->Recursive) 114306c3fb27SDimitry Andric return; 114406c3fb27SDimitry Andric Node->setCall(Call); 114506c3fb27SDimitry Andric NonAllocationCallToContextNodeMap[Call] = Node; 114606c3fb27SDimitry Andric NodeToCallingFunc[Node] = Func; 114706c3fb27SDimitry Andric return; 114806c3fb27SDimitry Andric } 114906c3fb27SDimitry Andric } 115006c3fb27SDimitry Andric 115106c3fb27SDimitry Andric // Find the node for the last stack id, which should be the same 115206c3fb27SDimitry Andric // across all calls recorded for this id, and is this node's id. 115306c3fb27SDimitry Andric uint64_t LastId = Node->OrigStackOrAllocId; 115406c3fb27SDimitry Andric ContextNode *LastNode = getNodeForStackId(LastId); 115506c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 115606c3fb27SDimitry Andric assert(LastNode); 115706c3fb27SDimitry Andric 115806c3fb27SDimitry Andric for (unsigned I = 0; I < Calls.size(); I++) { 115906c3fb27SDimitry Andric auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; 116006c3fb27SDimitry Andric // Skip any for which we didn't assign any ids, these don't get a node in 116106c3fb27SDimitry Andric // the graph. 116206c3fb27SDimitry Andric if (SavedContextIds.empty()) 116306c3fb27SDimitry Andric continue; 116406c3fb27SDimitry Andric 116506c3fb27SDimitry Andric assert(LastId == Ids.back()); 116606c3fb27SDimitry Andric 116706c3fb27SDimitry Andric ContextNode *FirstNode = getNodeForStackId(Ids[0]); 116806c3fb27SDimitry Andric assert(FirstNode); 116906c3fb27SDimitry Andric 117006c3fb27SDimitry Andric // Recompute the context ids for this stack id sequence (the 117106c3fb27SDimitry Andric // intersection of the context ids of the corresponding nodes). 117206c3fb27SDimitry Andric // Start with the ids we saved in the map for this call, which could be 117306c3fb27SDimitry Andric // duplicated context ids. We have to recompute as we might have overlap 117406c3fb27SDimitry Andric // overlap between the saved context ids for different last nodes, and 117506c3fb27SDimitry Andric // removed them already during the post order traversal. 117606c3fb27SDimitry Andric set_intersect(SavedContextIds, FirstNode->ContextIds); 117706c3fb27SDimitry Andric ContextNode *PrevNode = nullptr; 117806c3fb27SDimitry Andric for (auto Id : Ids) { 117906c3fb27SDimitry Andric ContextNode *CurNode = getNodeForStackId(Id); 118006c3fb27SDimitry Andric // We should only have kept stack ids that had nodes and weren't 118106c3fb27SDimitry Andric // recursive. 118206c3fb27SDimitry Andric assert(CurNode); 118306c3fb27SDimitry Andric assert(!CurNode->Recursive); 118406c3fb27SDimitry Andric if (!PrevNode) { 118506c3fb27SDimitry Andric PrevNode = CurNode; 118606c3fb27SDimitry Andric continue; 118706c3fb27SDimitry Andric } 118806c3fb27SDimitry Andric auto *Edge = CurNode->findEdgeFromCallee(PrevNode); 118906c3fb27SDimitry Andric if (!Edge) { 119006c3fb27SDimitry Andric SavedContextIds.clear(); 119106c3fb27SDimitry Andric break; 119206c3fb27SDimitry Andric } 119306c3fb27SDimitry Andric PrevNode = CurNode; 119406c3fb27SDimitry Andric set_intersect(SavedContextIds, Edge->getContextIds()); 119506c3fb27SDimitry Andric 119606c3fb27SDimitry Andric // If we now have no context ids for clone, skip this call. 119706c3fb27SDimitry Andric if (SavedContextIds.empty()) 119806c3fb27SDimitry Andric break; 119906c3fb27SDimitry Andric } 120006c3fb27SDimitry Andric if (SavedContextIds.empty()) 120106c3fb27SDimitry Andric continue; 120206c3fb27SDimitry Andric 120306c3fb27SDimitry Andric // Create new context node. 120406c3fb27SDimitry Andric NodeOwner.push_back( 120506c3fb27SDimitry Andric std::make_unique<ContextNode>(/*IsAllocation=*/false, Call)); 120606c3fb27SDimitry Andric ContextNode *NewNode = NodeOwner.back().get(); 120706c3fb27SDimitry Andric NodeToCallingFunc[NewNode] = Func; 120806c3fb27SDimitry Andric NonAllocationCallToContextNodeMap[Call] = NewNode; 120906c3fb27SDimitry Andric NewNode->ContextIds = SavedContextIds; 121006c3fb27SDimitry Andric NewNode->AllocTypes = computeAllocType(NewNode->ContextIds); 121106c3fb27SDimitry Andric 121206c3fb27SDimitry Andric // Connect to callees of innermost stack frame in inlined call chain. 121306c3fb27SDimitry Andric // This updates context ids for FirstNode's callee's to reflect those 121406c3fb27SDimitry Andric // moved to NewNode. 121506c3fb27SDimitry Andric connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true); 121606c3fb27SDimitry Andric 121706c3fb27SDimitry Andric // Connect to callers of outermost stack frame in inlined call chain. 121806c3fb27SDimitry Andric // This updates context ids for FirstNode's caller's to reflect those 121906c3fb27SDimitry Andric // moved to NewNode. 122006c3fb27SDimitry Andric connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false); 122106c3fb27SDimitry Andric 122206c3fb27SDimitry Andric // Now we need to remove context ids from edges/nodes between First and 122306c3fb27SDimitry Andric // Last Node. 122406c3fb27SDimitry Andric PrevNode = nullptr; 122506c3fb27SDimitry Andric for (auto Id : Ids) { 122606c3fb27SDimitry Andric ContextNode *CurNode = getNodeForStackId(Id); 122706c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 122806c3fb27SDimitry Andric assert(CurNode); 122906c3fb27SDimitry Andric 123006c3fb27SDimitry Andric // Remove the context ids moved to NewNode from CurNode, and the 123106c3fb27SDimitry Andric // edge from the prior node. 123206c3fb27SDimitry Andric set_subtract(CurNode->ContextIds, NewNode->ContextIds); 123306c3fb27SDimitry Andric if (PrevNode) { 123406c3fb27SDimitry Andric auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode); 123506c3fb27SDimitry Andric assert(PrevEdge); 123606c3fb27SDimitry Andric set_subtract(PrevEdge->getContextIds(), NewNode->ContextIds); 123706c3fb27SDimitry Andric if (PrevEdge->getContextIds().empty()) { 123806c3fb27SDimitry Andric PrevNode->eraseCallerEdge(PrevEdge); 123906c3fb27SDimitry Andric CurNode->eraseCalleeEdge(PrevEdge); 124006c3fb27SDimitry Andric } 124106c3fb27SDimitry Andric } 124206c3fb27SDimitry Andric PrevNode = CurNode; 124306c3fb27SDimitry Andric } 124406c3fb27SDimitry Andric } 124506c3fb27SDimitry Andric } 124606c3fb27SDimitry Andric 124706c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 124806c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() { 124906c3fb27SDimitry Andric // Map of stack id to all calls with that as the last (outermost caller) 125006c3fb27SDimitry Andric // callsite id that has a context node (some might not due to pruning 125106c3fb27SDimitry Andric // performed during matching of the allocation profile contexts). 125206c3fb27SDimitry Andric // The CallContextInfo contains the Call and a list of its stack ids with 125306c3fb27SDimitry Andric // ContextNodes, the function containing Call, and the set of context ids 125406c3fb27SDimitry Andric // the analysis will eventually identify for use in any new node created 125506c3fb27SDimitry Andric // for that callsite. 125606c3fb27SDimitry Andric DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls; 125706c3fb27SDimitry Andric for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) { 125806c3fb27SDimitry Andric for (auto &Call : CallsWithMetadata) { 125906c3fb27SDimitry Andric // Ignore allocations, already handled. 126006c3fb27SDimitry Andric if (AllocationCallToContextNodeMap.count(Call)) 126106c3fb27SDimitry Andric continue; 126206c3fb27SDimitry Andric auto StackIdsWithContextNodes = 126306c3fb27SDimitry Andric getStackIdsWithContextNodesForCall(Call.call()); 126406c3fb27SDimitry Andric // If there were no nodes created for MIBs on allocs (maybe this was in 126506c3fb27SDimitry Andric // the unambiguous part of the MIB stack that was pruned), ignore. 126606c3fb27SDimitry Andric if (StackIdsWithContextNodes.empty()) 126706c3fb27SDimitry Andric continue; 126806c3fb27SDimitry Andric // Otherwise, record this Call along with the list of ids for the last 126906c3fb27SDimitry Andric // (outermost caller) stack id with a node. 127006c3fb27SDimitry Andric StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back( 127106c3fb27SDimitry Andric {Call.call(), StackIdsWithContextNodes, Func, {}}); 127206c3fb27SDimitry Andric } 127306c3fb27SDimitry Andric } 127406c3fb27SDimitry Andric 127506c3fb27SDimitry Andric // First make a pass through all stack ids that correspond to a call, 127606c3fb27SDimitry Andric // as identified in the above loop. Compute the context ids corresponding to 127706c3fb27SDimitry Andric // each of these calls when they correspond to multiple stack ids due to 127806c3fb27SDimitry Andric // due to inlining. Perform any duplication of context ids required when 127906c3fb27SDimitry Andric // there is more than one call with the same stack ids. Their (possibly newly 128006c3fb27SDimitry Andric // duplicated) context ids are saved in the StackIdToMatchingCalls map. 128106c3fb27SDimitry Andric DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds; 128206c3fb27SDimitry Andric for (auto &It : StackIdToMatchingCalls) { 128306c3fb27SDimitry Andric auto &Calls = It.getSecond(); 128406c3fb27SDimitry Andric // Skip single calls with a single stack id. These don't need a new node. 128506c3fb27SDimitry Andric if (Calls.size() == 1) { 128606c3fb27SDimitry Andric auto &Ids = std::get<1>(Calls[0]); 128706c3fb27SDimitry Andric if (Ids.size() == 1) 128806c3fb27SDimitry Andric continue; 128906c3fb27SDimitry Andric } 129006c3fb27SDimitry Andric // In order to do the best and maximal matching of inlined calls to context 129106c3fb27SDimitry Andric // node sequences we will sort the vectors of stack ids in descending order 129206c3fb27SDimitry Andric // of length, and within each length, lexicographically by stack id. The 129306c3fb27SDimitry Andric // latter is so that we can specially handle calls that have identical stack 129406c3fb27SDimitry Andric // id sequences (either due to cloning or artificially because of the MIB 129506c3fb27SDimitry Andric // context pruning). 129606c3fb27SDimitry Andric std::stable_sort(Calls.begin(), Calls.end(), 129706c3fb27SDimitry Andric [](const CallContextInfo &A, const CallContextInfo &B) { 129806c3fb27SDimitry Andric auto &IdsA = std::get<1>(A); 129906c3fb27SDimitry Andric auto &IdsB = std::get<1>(B); 130006c3fb27SDimitry Andric return IdsA.size() > IdsB.size() || 130106c3fb27SDimitry Andric (IdsA.size() == IdsB.size() && IdsA < IdsB); 130206c3fb27SDimitry Andric }); 130306c3fb27SDimitry Andric 130406c3fb27SDimitry Andric // Find the node for the last stack id, which should be the same 130506c3fb27SDimitry Andric // across all calls recorded for this id, and is the id for this 130606c3fb27SDimitry Andric // entry in the StackIdToMatchingCalls map. 130706c3fb27SDimitry Andric uint64_t LastId = It.getFirst(); 130806c3fb27SDimitry Andric ContextNode *LastNode = getNodeForStackId(LastId); 130906c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 131006c3fb27SDimitry Andric assert(LastNode); 131106c3fb27SDimitry Andric 131206c3fb27SDimitry Andric if (LastNode->Recursive) 131306c3fb27SDimitry Andric continue; 131406c3fb27SDimitry Andric 131506c3fb27SDimitry Andric // Initialize the context ids with the last node's. We will subsequently 131606c3fb27SDimitry Andric // refine the context ids by computing the intersection along all edges. 131706c3fb27SDimitry Andric DenseSet<uint32_t> LastNodeContextIds = LastNode->ContextIds; 131806c3fb27SDimitry Andric assert(!LastNodeContextIds.empty()); 131906c3fb27SDimitry Andric 132006c3fb27SDimitry Andric for (unsigned I = 0; I < Calls.size(); I++) { 132106c3fb27SDimitry Andric auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; 132206c3fb27SDimitry Andric assert(SavedContextIds.empty()); 132306c3fb27SDimitry Andric assert(LastId == Ids.back()); 132406c3fb27SDimitry Andric 132506c3fb27SDimitry Andric // First compute the context ids for this stack id sequence (the 132606c3fb27SDimitry Andric // intersection of the context ids of the corresponding nodes). 132706c3fb27SDimitry Andric // Start with the remaining saved ids for the last node. 132806c3fb27SDimitry Andric assert(!LastNodeContextIds.empty()); 132906c3fb27SDimitry Andric DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds; 133006c3fb27SDimitry Andric 133106c3fb27SDimitry Andric ContextNode *PrevNode = LastNode; 133206c3fb27SDimitry Andric ContextNode *CurNode = LastNode; 133306c3fb27SDimitry Andric bool Skip = false; 133406c3fb27SDimitry Andric 133506c3fb27SDimitry Andric // Iterate backwards through the stack Ids, starting after the last Id 133606c3fb27SDimitry Andric // in the list, which was handled once outside for all Calls. 133706c3fb27SDimitry Andric for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) { 133806c3fb27SDimitry Andric auto Id = *IdIter; 133906c3fb27SDimitry Andric CurNode = getNodeForStackId(Id); 134006c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 134106c3fb27SDimitry Andric assert(CurNode); 134206c3fb27SDimitry Andric 134306c3fb27SDimitry Andric if (CurNode->Recursive) { 134406c3fb27SDimitry Andric Skip = true; 134506c3fb27SDimitry Andric break; 134606c3fb27SDimitry Andric } 134706c3fb27SDimitry Andric 134806c3fb27SDimitry Andric auto *Edge = CurNode->findEdgeFromCaller(PrevNode); 134906c3fb27SDimitry Andric // If there is no edge then the nodes belong to different MIB contexts, 135006c3fb27SDimitry Andric // and we should skip this inlined context sequence. For example, this 135106c3fb27SDimitry Andric // particular inlined context may include stack ids A->B, and we may 135206c3fb27SDimitry Andric // indeed have nodes for both A and B, but it is possible that they were 135306c3fb27SDimitry Andric // never profiled in sequence in a single MIB for any allocation (i.e. 135406c3fb27SDimitry Andric // we might have profiled an allocation that involves the callsite A, 135506c3fb27SDimitry Andric // but through a different one of its callee callsites, and we might 135606c3fb27SDimitry Andric // have profiled an allocation that involves callsite B, but reached 135706c3fb27SDimitry Andric // from a different caller callsite). 135806c3fb27SDimitry Andric if (!Edge) { 135906c3fb27SDimitry Andric Skip = true; 136006c3fb27SDimitry Andric break; 136106c3fb27SDimitry Andric } 136206c3fb27SDimitry Andric PrevNode = CurNode; 136306c3fb27SDimitry Andric 136406c3fb27SDimitry Andric // Update the context ids, which is the intersection of the ids along 136506c3fb27SDimitry Andric // all edges in the sequence. 136606c3fb27SDimitry Andric set_intersect(StackSequenceContextIds, Edge->getContextIds()); 136706c3fb27SDimitry Andric 136806c3fb27SDimitry Andric // If we now have no context ids for clone, skip this call. 136906c3fb27SDimitry Andric if (StackSequenceContextIds.empty()) { 137006c3fb27SDimitry Andric Skip = true; 137106c3fb27SDimitry Andric break; 137206c3fb27SDimitry Andric } 137306c3fb27SDimitry Andric } 137406c3fb27SDimitry Andric if (Skip) 137506c3fb27SDimitry Andric continue; 137606c3fb27SDimitry Andric 137706c3fb27SDimitry Andric // If some of this call's stack ids did not have corresponding nodes (due 137806c3fb27SDimitry Andric // to pruning), don't include any context ids for contexts that extend 137906c3fb27SDimitry Andric // beyond these nodes. Otherwise we would be matching part of unrelated / 138006c3fb27SDimitry Andric // not fully matching stack contexts. To do this, subtract any context ids 138106c3fb27SDimitry Andric // found in caller nodes of the last node found above. 138206c3fb27SDimitry Andric if (Ids.back() != getLastStackId(Call)) { 138306c3fb27SDimitry Andric for (const auto &PE : CurNode->CallerEdges) { 138406c3fb27SDimitry Andric set_subtract(StackSequenceContextIds, PE->getContextIds()); 138506c3fb27SDimitry Andric if (StackSequenceContextIds.empty()) 138606c3fb27SDimitry Andric break; 138706c3fb27SDimitry Andric } 138806c3fb27SDimitry Andric // If we now have no context ids for clone, skip this call. 138906c3fb27SDimitry Andric if (StackSequenceContextIds.empty()) 139006c3fb27SDimitry Andric continue; 139106c3fb27SDimitry Andric } 139206c3fb27SDimitry Andric 139306c3fb27SDimitry Andric // Check if the next set of stack ids is the same (since the Calls vector 139406c3fb27SDimitry Andric // of tuples is sorted by the stack ids we can just look at the next one). 139506c3fb27SDimitry Andric bool DuplicateContextIds = false; 139606c3fb27SDimitry Andric if (I + 1 < Calls.size()) { 139706c3fb27SDimitry Andric auto NextIds = std::get<1>(Calls[I + 1]); 139806c3fb27SDimitry Andric DuplicateContextIds = Ids == NextIds; 139906c3fb27SDimitry Andric } 140006c3fb27SDimitry Andric 140106c3fb27SDimitry Andric // If we don't have duplicate context ids, then we can assign all the 140206c3fb27SDimitry Andric // context ids computed for the original node sequence to this call. 140306c3fb27SDimitry Andric // If there are duplicate calls with the same stack ids then we synthesize 140406c3fb27SDimitry Andric // new context ids that are duplicates of the originals. These are 140506c3fb27SDimitry Andric // assigned to SavedContextIds, which is a reference into the map entry 140606c3fb27SDimitry Andric // for this call, allowing us to access these ids later on. 140706c3fb27SDimitry Andric OldToNewContextIds.reserve(OldToNewContextIds.size() + 140806c3fb27SDimitry Andric StackSequenceContextIds.size()); 140906c3fb27SDimitry Andric SavedContextIds = 141006c3fb27SDimitry Andric DuplicateContextIds 141106c3fb27SDimitry Andric ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds) 141206c3fb27SDimitry Andric : StackSequenceContextIds; 141306c3fb27SDimitry Andric assert(!SavedContextIds.empty()); 141406c3fb27SDimitry Andric 141506c3fb27SDimitry Andric if (!DuplicateContextIds) { 141606c3fb27SDimitry Andric // Update saved last node's context ids to remove those that are 141706c3fb27SDimitry Andric // assigned to other calls, so that it is ready for the next call at 141806c3fb27SDimitry Andric // this stack id. 141906c3fb27SDimitry Andric set_subtract(LastNodeContextIds, StackSequenceContextIds); 142006c3fb27SDimitry Andric if (LastNodeContextIds.empty()) 142106c3fb27SDimitry Andric break; 142206c3fb27SDimitry Andric } 142306c3fb27SDimitry Andric } 142406c3fb27SDimitry Andric } 142506c3fb27SDimitry Andric 142606c3fb27SDimitry Andric // Propagate the duplicate context ids over the graph. 142706c3fb27SDimitry Andric propagateDuplicateContextIds(OldToNewContextIds); 142806c3fb27SDimitry Andric 142906c3fb27SDimitry Andric if (VerifyCCG) 143006c3fb27SDimitry Andric check(); 143106c3fb27SDimitry Andric 143206c3fb27SDimitry Andric // Now perform a post-order traversal over the graph, starting with the 143306c3fb27SDimitry Andric // allocation nodes, essentially processing nodes from callers to callees. 143406c3fb27SDimitry Andric // For any that contains an id in the map, update the graph to contain new 143506c3fb27SDimitry Andric // nodes representing any inlining at interior callsites. Note we move the 143606c3fb27SDimitry Andric // associated context ids over to the new nodes. 143706c3fb27SDimitry Andric DenseSet<const ContextNode *> Visited; 143806c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) 143906c3fb27SDimitry Andric assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls); 144006c3fb27SDimitry Andric } 144106c3fb27SDimitry Andric 144206c3fb27SDimitry Andric uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) { 144306c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext( 144406c3fb27SDimitry Andric Call->getMetadata(LLVMContext::MD_callsite)); 144506c3fb27SDimitry Andric return CallsiteContext.back(); 144606c3fb27SDimitry Andric } 144706c3fb27SDimitry Andric 144806c3fb27SDimitry Andric uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) { 144906c3fb27SDimitry Andric assert(isa<CallsiteInfo *>(Call.getBase())); 145006c3fb27SDimitry Andric CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator> 145106c3fb27SDimitry Andric CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase())); 145206c3fb27SDimitry Andric // Need to convert index into stack id. 145306c3fb27SDimitry Andric return Index.getStackIdAtIndex(CallsiteContext.back()); 145406c3fb27SDimitry Andric } 145506c3fb27SDimitry Andric 145606c3fb27SDimitry Andric static const std::string MemProfCloneSuffix = ".memprof."; 145706c3fb27SDimitry Andric 145806c3fb27SDimitry Andric static std::string getMemProfFuncName(Twine Base, unsigned CloneNo) { 145906c3fb27SDimitry Andric // We use CloneNo == 0 to refer to the original version, which doesn't get 146006c3fb27SDimitry Andric // renamed with a suffix. 146106c3fb27SDimitry Andric if (!CloneNo) 146206c3fb27SDimitry Andric return Base.str(); 146306c3fb27SDimitry Andric return (Base + MemProfCloneSuffix + Twine(CloneNo)).str(); 146406c3fb27SDimitry Andric } 146506c3fb27SDimitry Andric 146606c3fb27SDimitry Andric std::string ModuleCallsiteContextGraph::getLabel(const Function *Func, 146706c3fb27SDimitry Andric const Instruction *Call, 146806c3fb27SDimitry Andric unsigned CloneNo) const { 146906c3fb27SDimitry Andric return (Twine(Call->getFunction()->getName()) + " -> " + 147006c3fb27SDimitry Andric cast<CallBase>(Call)->getCalledFunction()->getName()) 147106c3fb27SDimitry Andric .str(); 147206c3fb27SDimitry Andric } 147306c3fb27SDimitry Andric 147406c3fb27SDimitry Andric std::string IndexCallsiteContextGraph::getLabel(const FunctionSummary *Func, 147506c3fb27SDimitry Andric const IndexCall &Call, 147606c3fb27SDimitry Andric unsigned CloneNo) const { 147706c3fb27SDimitry Andric auto VI = FSToVIMap.find(Func); 147806c3fb27SDimitry Andric assert(VI != FSToVIMap.end()); 147906c3fb27SDimitry Andric if (isa<AllocInfo *>(Call.getBase())) 148006c3fb27SDimitry Andric return (VI->second.name() + " -> alloc").str(); 148106c3fb27SDimitry Andric else { 148206c3fb27SDimitry Andric auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(Call.getBase()); 148306c3fb27SDimitry Andric return (VI->second.name() + " -> " + 148406c3fb27SDimitry Andric getMemProfFuncName(Callsite->Callee.name(), 148506c3fb27SDimitry Andric Callsite->Clones[CloneNo])) 148606c3fb27SDimitry Andric .str(); 148706c3fb27SDimitry Andric } 148806c3fb27SDimitry Andric } 148906c3fb27SDimitry Andric 149006c3fb27SDimitry Andric std::vector<uint64_t> 149106c3fb27SDimitry Andric ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall( 149206c3fb27SDimitry Andric Instruction *Call) { 149306c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext( 149406c3fb27SDimitry Andric Call->getMetadata(LLVMContext::MD_callsite)); 149506c3fb27SDimitry Andric return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>( 149606c3fb27SDimitry Andric CallsiteContext); 149706c3fb27SDimitry Andric } 149806c3fb27SDimitry Andric 149906c3fb27SDimitry Andric std::vector<uint64_t> 150006c3fb27SDimitry Andric IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) { 150106c3fb27SDimitry Andric assert(isa<CallsiteInfo *>(Call.getBase())); 150206c3fb27SDimitry Andric CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator> 150306c3fb27SDimitry Andric CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase())); 150406c3fb27SDimitry Andric return getStackIdsWithContextNodes<CallsiteInfo, 150506c3fb27SDimitry Andric SmallVector<unsigned>::const_iterator>( 150606c3fb27SDimitry Andric CallsiteContext); 150706c3fb27SDimitry Andric } 150806c3fb27SDimitry Andric 150906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 151006c3fb27SDimitry Andric template <class NodeT, class IteratorT> 151106c3fb27SDimitry Andric std::vector<uint64_t> 151206c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes( 151306c3fb27SDimitry Andric CallStack<NodeT, IteratorT> &CallsiteContext) { 151406c3fb27SDimitry Andric std::vector<uint64_t> StackIds; 151506c3fb27SDimitry Andric for (auto IdOrIndex : CallsiteContext) { 151606c3fb27SDimitry Andric auto StackId = getStackId(IdOrIndex); 151706c3fb27SDimitry Andric ContextNode *Node = getNodeForStackId(StackId); 151806c3fb27SDimitry Andric if (!Node) 151906c3fb27SDimitry Andric break; 152006c3fb27SDimitry Andric StackIds.push_back(StackId); 152106c3fb27SDimitry Andric } 152206c3fb27SDimitry Andric return StackIds; 152306c3fb27SDimitry Andric } 152406c3fb27SDimitry Andric 152506c3fb27SDimitry Andric ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( 152606c3fb27SDimitry Andric Module &M, function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) 152706c3fb27SDimitry Andric : Mod(M), OREGetter(OREGetter) { 152806c3fb27SDimitry Andric for (auto &F : M) { 152906c3fb27SDimitry Andric std::vector<CallInfo> CallsWithMetadata; 153006c3fb27SDimitry Andric for (auto &BB : F) { 153106c3fb27SDimitry Andric for (auto &I : BB) { 153206c3fb27SDimitry Andric if (!isa<CallBase>(I)) 153306c3fb27SDimitry Andric continue; 153406c3fb27SDimitry Andric if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) { 153506c3fb27SDimitry Andric CallsWithMetadata.push_back(&I); 153606c3fb27SDimitry Andric auto *AllocNode = addAllocNode(&I, &F); 153706c3fb27SDimitry Andric auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite); 153806c3fb27SDimitry Andric assert(CallsiteMD); 153906c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext(CallsiteMD); 154006c3fb27SDimitry Andric // Add all of the MIBs and their stack nodes. 154106c3fb27SDimitry Andric for (auto &MDOp : MemProfMD->operands()) { 154206c3fb27SDimitry Andric auto *MIBMD = cast<const MDNode>(MDOp); 154306c3fb27SDimitry Andric MDNode *StackNode = getMIBStackNode(MIBMD); 154406c3fb27SDimitry Andric assert(StackNode); 154506c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode); 154606c3fb27SDimitry Andric addStackNodesForMIB<MDNode, MDNode::op_iterator>( 154706c3fb27SDimitry Andric AllocNode, StackContext, CallsiteContext, 154806c3fb27SDimitry Andric getMIBAllocType(MIBMD)); 154906c3fb27SDimitry Andric } 155006c3fb27SDimitry Andric assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); 155106c3fb27SDimitry Andric // Memprof and callsite metadata on memory allocations no longer 155206c3fb27SDimitry Andric // needed. 155306c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_memprof, nullptr); 155406c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_callsite, nullptr); 155506c3fb27SDimitry Andric } 155606c3fb27SDimitry Andric // For callsite metadata, add to list for this function for later use. 155706c3fb27SDimitry Andric else if (I.getMetadata(LLVMContext::MD_callsite)) 155806c3fb27SDimitry Andric CallsWithMetadata.push_back(&I); 155906c3fb27SDimitry Andric } 156006c3fb27SDimitry Andric } 156106c3fb27SDimitry Andric if (!CallsWithMetadata.empty()) 1562*297eecfbSDimitry Andric FuncToCallsWithMetadata[&F] = CallsWithMetadata; 156306c3fb27SDimitry Andric } 156406c3fb27SDimitry Andric 156506c3fb27SDimitry Andric if (DumpCCG) { 156606c3fb27SDimitry Andric dbgs() << "CCG before updating call stack chains:\n"; 156706c3fb27SDimitry Andric dbgs() << *this; 156806c3fb27SDimitry Andric } 156906c3fb27SDimitry Andric 157006c3fb27SDimitry Andric if (ExportToDot) 157106c3fb27SDimitry Andric exportToDot("prestackupdate"); 157206c3fb27SDimitry Andric 157306c3fb27SDimitry Andric updateStackNodes(); 157406c3fb27SDimitry Andric 157506c3fb27SDimitry Andric handleCallsitesWithMultipleTargets(); 157606c3fb27SDimitry Andric 157706c3fb27SDimitry Andric // Strip off remaining callsite metadata, no longer needed. 157806c3fb27SDimitry Andric for (auto &FuncEntry : FuncToCallsWithMetadata) 157906c3fb27SDimitry Andric for (auto &Call : FuncEntry.second) 158006c3fb27SDimitry Andric Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr); 158106c3fb27SDimitry Andric } 158206c3fb27SDimitry Andric 158306c3fb27SDimitry Andric IndexCallsiteContextGraph::IndexCallsiteContextGraph( 158406c3fb27SDimitry Andric ModuleSummaryIndex &Index, 158506c3fb27SDimitry Andric function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 158606c3fb27SDimitry Andric isPrevailing) 1587*297eecfbSDimitry Andric : Index(Index), isPrevailing(isPrevailing) { 158806c3fb27SDimitry Andric for (auto &I : Index) { 158906c3fb27SDimitry Andric auto VI = Index.getValueInfo(I); 159006c3fb27SDimitry Andric for (auto &S : VI.getSummaryList()) { 159106c3fb27SDimitry Andric // We should only add the prevailing nodes. Otherwise we may try to clone 159206c3fb27SDimitry Andric // in a weak copy that won't be linked (and may be different than the 159306c3fb27SDimitry Andric // prevailing version). 159406c3fb27SDimitry Andric // We only keep the memprof summary on the prevailing copy now when 159506c3fb27SDimitry Andric // building the combined index, as a space optimization, however don't 159606c3fb27SDimitry Andric // rely on this optimization. The linker doesn't resolve local linkage 159706c3fb27SDimitry Andric // values so don't check whether those are prevailing. 159806c3fb27SDimitry Andric if (!GlobalValue::isLocalLinkage(S->linkage()) && 159906c3fb27SDimitry Andric !isPrevailing(VI.getGUID(), S.get())) 160006c3fb27SDimitry Andric continue; 160106c3fb27SDimitry Andric auto *FS = dyn_cast<FunctionSummary>(S.get()); 160206c3fb27SDimitry Andric if (!FS) 160306c3fb27SDimitry Andric continue; 160406c3fb27SDimitry Andric std::vector<CallInfo> CallsWithMetadata; 160506c3fb27SDimitry Andric if (!FS->allocs().empty()) { 160606c3fb27SDimitry Andric for (auto &AN : FS->mutableAllocs()) { 160706c3fb27SDimitry Andric // This can happen because of recursion elimination handling that 160806c3fb27SDimitry Andric // currently exists in ModuleSummaryAnalysis. Skip these for now. 160906c3fb27SDimitry Andric // We still added them to the summary because we need to be able to 161006c3fb27SDimitry Andric // correlate properly in applyImport in the backends. 161106c3fb27SDimitry Andric if (AN.MIBs.empty()) 161206c3fb27SDimitry Andric continue; 161306c3fb27SDimitry Andric CallsWithMetadata.push_back({&AN}); 161406c3fb27SDimitry Andric auto *AllocNode = addAllocNode({&AN}, FS); 161506c3fb27SDimitry Andric // Pass an empty CallStack to the CallsiteContext (second) 161606c3fb27SDimitry Andric // parameter, since for ThinLTO we already collapsed out the inlined 161706c3fb27SDimitry Andric // stack ids on the allocation call during ModuleSummaryAnalysis. 161806c3fb27SDimitry Andric CallStack<MIBInfo, SmallVector<unsigned>::const_iterator> 161906c3fb27SDimitry Andric EmptyContext; 162006c3fb27SDimitry Andric // Now add all of the MIBs and their stack nodes. 162106c3fb27SDimitry Andric for (auto &MIB : AN.MIBs) { 162206c3fb27SDimitry Andric CallStack<MIBInfo, SmallVector<unsigned>::const_iterator> 162306c3fb27SDimitry Andric StackContext(&MIB); 162406c3fb27SDimitry Andric addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>( 162506c3fb27SDimitry Andric AllocNode, StackContext, EmptyContext, MIB.AllocType); 162606c3fb27SDimitry Andric } 162706c3fb27SDimitry Andric assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); 162806c3fb27SDimitry Andric // Initialize version 0 on the summary alloc node to the current alloc 162906c3fb27SDimitry Andric // type, unless it has both types in which case make it default, so 163006c3fb27SDimitry Andric // that in the case where we aren't able to clone the original version 163106c3fb27SDimitry Andric // always ends up with the default allocation behavior. 163206c3fb27SDimitry Andric AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes); 163306c3fb27SDimitry Andric } 163406c3fb27SDimitry Andric } 163506c3fb27SDimitry Andric // For callsite metadata, add to list for this function for later use. 163606c3fb27SDimitry Andric if (!FS->callsites().empty()) 163706c3fb27SDimitry Andric for (auto &SN : FS->mutableCallsites()) 163806c3fb27SDimitry Andric CallsWithMetadata.push_back({&SN}); 163906c3fb27SDimitry Andric 164006c3fb27SDimitry Andric if (!CallsWithMetadata.empty()) 1641*297eecfbSDimitry Andric FuncToCallsWithMetadata[FS] = CallsWithMetadata; 164206c3fb27SDimitry Andric 164306c3fb27SDimitry Andric if (!FS->allocs().empty() || !FS->callsites().empty()) 164406c3fb27SDimitry Andric FSToVIMap[FS] = VI; 164506c3fb27SDimitry Andric } 164606c3fb27SDimitry Andric } 164706c3fb27SDimitry Andric 164806c3fb27SDimitry Andric if (DumpCCG) { 164906c3fb27SDimitry Andric dbgs() << "CCG before updating call stack chains:\n"; 165006c3fb27SDimitry Andric dbgs() << *this; 165106c3fb27SDimitry Andric } 165206c3fb27SDimitry Andric 165306c3fb27SDimitry Andric if (ExportToDot) 165406c3fb27SDimitry Andric exportToDot("prestackupdate"); 165506c3fb27SDimitry Andric 165606c3fb27SDimitry Andric updateStackNodes(); 165706c3fb27SDimitry Andric 165806c3fb27SDimitry Andric handleCallsitesWithMultipleTargets(); 165906c3fb27SDimitry Andric } 166006c3fb27SDimitry Andric 166106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 166206c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, 166306c3fb27SDimitry Andric CallTy>::handleCallsitesWithMultipleTargets() { 166406c3fb27SDimitry Andric // Look for and workaround callsites that call multiple functions. 166506c3fb27SDimitry Andric // This can happen for indirect calls, which needs better handling, and in 166606c3fb27SDimitry Andric // more rare cases (e.g. macro expansion). 166706c3fb27SDimitry Andric // TODO: To fix this for indirect calls we will want to perform speculative 166806c3fb27SDimitry Andric // devirtualization using either the normal PGO info with ICP, or using the 166906c3fb27SDimitry Andric // information in the profiled MemProf contexts. We can do this prior to 167006c3fb27SDimitry Andric // this transformation for regular LTO, and for ThinLTO we can simulate that 167106c3fb27SDimitry Andric // effect in the summary and perform the actual speculative devirtualization 167206c3fb27SDimitry Andric // while cloning in the ThinLTO backend. 1673*297eecfbSDimitry Andric 1674*297eecfbSDimitry Andric // Keep track of the new nodes synthesized for discovered tail calls missing 1675*297eecfbSDimitry Andric // from the profiled contexts. 1676*297eecfbSDimitry Andric MapVector<CallInfo, ContextNode *> TailCallToContextNodeMap; 1677*297eecfbSDimitry Andric 167806c3fb27SDimitry Andric for (auto Entry = NonAllocationCallToContextNodeMap.begin(); 167906c3fb27SDimitry Andric Entry != NonAllocationCallToContextNodeMap.end();) { 168006c3fb27SDimitry Andric auto *Node = Entry->second; 168106c3fb27SDimitry Andric assert(Node->Clones.empty()); 168206c3fb27SDimitry Andric // Check all node callees and see if in the same function. 168306c3fb27SDimitry Andric bool Removed = false; 168406c3fb27SDimitry Andric auto Call = Node->Call.call(); 1685*297eecfbSDimitry Andric for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) { 1686*297eecfbSDimitry Andric auto Edge = *EI; 1687*297eecfbSDimitry Andric if (!Edge->Callee->hasCall()) { 1688*297eecfbSDimitry Andric ++EI; 168906c3fb27SDimitry Andric continue; 1690*297eecfbSDimitry Andric } 169106c3fb27SDimitry Andric assert(NodeToCallingFunc.count(Edge->Callee)); 169206c3fb27SDimitry Andric // Check if the called function matches that of the callee node. 1693*297eecfbSDimitry Andric if (calleesMatch(Call, EI, TailCallToContextNodeMap)) 169406c3fb27SDimitry Andric continue; 1695*297eecfbSDimitry Andric RemovedEdgesWithMismatchedCallees++; 169606c3fb27SDimitry Andric // Work around by setting Node to have a null call, so it gets 169706c3fb27SDimitry Andric // skipped during cloning. Otherwise assignFunctions will assert 169806c3fb27SDimitry Andric // because its data structures are not designed to handle this case. 169906c3fb27SDimitry Andric Entry = NonAllocationCallToContextNodeMap.erase(Entry); 170006c3fb27SDimitry Andric Node->setCall(CallInfo()); 170106c3fb27SDimitry Andric Removed = true; 170206c3fb27SDimitry Andric break; 170306c3fb27SDimitry Andric } 170406c3fb27SDimitry Andric if (!Removed) 170506c3fb27SDimitry Andric Entry++; 170606c3fb27SDimitry Andric } 1707*297eecfbSDimitry Andric 1708*297eecfbSDimitry Andric // Add the new nodes after the above loop so that the iteration is not 1709*297eecfbSDimitry Andric // invalidated. 1710*297eecfbSDimitry Andric for (auto &[Call, Node] : TailCallToContextNodeMap) 1711*297eecfbSDimitry Andric NonAllocationCallToContextNodeMap[Call] = Node; 171206c3fb27SDimitry Andric } 171306c3fb27SDimitry Andric 171406c3fb27SDimitry Andric uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { 171506c3fb27SDimitry Andric // In the Module (IR) case this is already the Id. 171606c3fb27SDimitry Andric return IdOrIndex; 171706c3fb27SDimitry Andric } 171806c3fb27SDimitry Andric 171906c3fb27SDimitry Andric uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { 172006c3fb27SDimitry Andric // In the Index case this is an index into the stack id list in the summary 172106c3fb27SDimitry Andric // index, convert it to an Id. 172206c3fb27SDimitry Andric return Index.getStackIdAtIndex(IdOrIndex); 172306c3fb27SDimitry Andric } 172406c3fb27SDimitry Andric 1725*297eecfbSDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1726*297eecfbSDimitry Andric bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch( 1727*297eecfbSDimitry Andric CallTy Call, EdgeIter &EI, 1728*297eecfbSDimitry Andric MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) { 1729*297eecfbSDimitry Andric auto Edge = *EI; 1730*297eecfbSDimitry Andric const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee]; 1731*297eecfbSDimitry Andric const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller]; 1732*297eecfbSDimitry Andric // Will be populated in order of callee to caller if we find a chain of tail 1733*297eecfbSDimitry Andric // calls between the profiled caller and callee. 1734*297eecfbSDimitry Andric std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain; 1735*297eecfbSDimitry Andric if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc, 1736*297eecfbSDimitry Andric FoundCalleeChain)) { 1737*297eecfbSDimitry Andric ++EI; 1738*297eecfbSDimitry Andric return false; 1739*297eecfbSDimitry Andric } 1740*297eecfbSDimitry Andric 1741*297eecfbSDimitry Andric // The usual case where the profiled callee matches that of the IR/summary. 1742*297eecfbSDimitry Andric if (FoundCalleeChain.empty()) { 1743*297eecfbSDimitry Andric ++EI; 1744*297eecfbSDimitry Andric return true; 1745*297eecfbSDimitry Andric } 1746*297eecfbSDimitry Andric 1747*297eecfbSDimitry Andric auto AddEdge = [Edge, &EI](ContextNode *Caller, ContextNode *Callee) { 1748*297eecfbSDimitry Andric auto *CurEdge = Callee->findEdgeFromCaller(Caller); 1749*297eecfbSDimitry Andric // If there is already an edge between these nodes, simply update it and 1750*297eecfbSDimitry Andric // return. 1751*297eecfbSDimitry Andric if (CurEdge) { 1752*297eecfbSDimitry Andric CurEdge->ContextIds.insert(Edge->ContextIds.begin(), 1753*297eecfbSDimitry Andric Edge->ContextIds.end()); 1754*297eecfbSDimitry Andric CurEdge->AllocTypes |= Edge->AllocTypes; 1755*297eecfbSDimitry Andric return; 1756*297eecfbSDimitry Andric } 1757*297eecfbSDimitry Andric // Otherwise, create a new edge and insert it into the caller and callee 1758*297eecfbSDimitry Andric // lists. 1759*297eecfbSDimitry Andric auto NewEdge = std::make_shared<ContextEdge>( 1760*297eecfbSDimitry Andric Callee, Caller, Edge->AllocTypes, Edge->ContextIds); 1761*297eecfbSDimitry Andric Callee->CallerEdges.push_back(NewEdge); 1762*297eecfbSDimitry Andric if (Caller == Edge->Caller) { 1763*297eecfbSDimitry Andric // If we are inserting the new edge into the current edge's caller, insert 1764*297eecfbSDimitry Andric // the new edge before the current iterator position, and then increment 1765*297eecfbSDimitry Andric // back to the current edge. 1766*297eecfbSDimitry Andric EI = Caller->CalleeEdges.insert(EI, NewEdge); 1767*297eecfbSDimitry Andric ++EI; 1768*297eecfbSDimitry Andric assert(*EI == Edge && 1769*297eecfbSDimitry Andric "Iterator position not restored after insert and increment"); 1770*297eecfbSDimitry Andric } else 1771*297eecfbSDimitry Andric Caller->CalleeEdges.push_back(NewEdge); 1772*297eecfbSDimitry Andric }; 1773*297eecfbSDimitry Andric 1774*297eecfbSDimitry Andric // Create new nodes for each found callee and connect in between the profiled 1775*297eecfbSDimitry Andric // caller and callee. 1776*297eecfbSDimitry Andric auto *CurCalleeNode = Edge->Callee; 1777*297eecfbSDimitry Andric for (auto &[NewCall, Func] : FoundCalleeChain) { 1778*297eecfbSDimitry Andric ContextNode *NewNode = nullptr; 1779*297eecfbSDimitry Andric // First check if we have already synthesized a node for this tail call. 1780*297eecfbSDimitry Andric if (TailCallToContextNodeMap.count(NewCall)) { 1781*297eecfbSDimitry Andric NewNode = TailCallToContextNodeMap[NewCall]; 1782*297eecfbSDimitry Andric NewNode->ContextIds.insert(Edge->ContextIds.begin(), 1783*297eecfbSDimitry Andric Edge->ContextIds.end()); 1784*297eecfbSDimitry Andric NewNode->AllocTypes |= Edge->AllocTypes; 1785*297eecfbSDimitry Andric } else { 1786*297eecfbSDimitry Andric FuncToCallsWithMetadata[Func].push_back({NewCall}); 1787*297eecfbSDimitry Andric // Create Node and record node info. 1788*297eecfbSDimitry Andric NodeOwner.push_back( 1789*297eecfbSDimitry Andric std::make_unique<ContextNode>(/*IsAllocation=*/false, NewCall)); 1790*297eecfbSDimitry Andric NewNode = NodeOwner.back().get(); 1791*297eecfbSDimitry Andric NodeToCallingFunc[NewNode] = Func; 1792*297eecfbSDimitry Andric TailCallToContextNodeMap[NewCall] = NewNode; 1793*297eecfbSDimitry Andric NewNode->ContextIds = Edge->ContextIds; 1794*297eecfbSDimitry Andric NewNode->AllocTypes = Edge->AllocTypes; 1795*297eecfbSDimitry Andric } 1796*297eecfbSDimitry Andric 1797*297eecfbSDimitry Andric // Hook up node to its callee node 1798*297eecfbSDimitry Andric AddEdge(NewNode, CurCalleeNode); 1799*297eecfbSDimitry Andric 1800*297eecfbSDimitry Andric CurCalleeNode = NewNode; 1801*297eecfbSDimitry Andric } 1802*297eecfbSDimitry Andric 1803*297eecfbSDimitry Andric // Hook up edge's original caller to new callee node. 1804*297eecfbSDimitry Andric AddEdge(Edge->Caller, CurCalleeNode); 1805*297eecfbSDimitry Andric 1806*297eecfbSDimitry Andric // Remove old edge 1807*297eecfbSDimitry Andric Edge->Callee->eraseCallerEdge(Edge.get()); 1808*297eecfbSDimitry Andric EI = Edge->Caller->CalleeEdges.erase(EI); 1809*297eecfbSDimitry Andric 1810*297eecfbSDimitry Andric return true; 1811*297eecfbSDimitry Andric } 1812*297eecfbSDimitry Andric 1813*297eecfbSDimitry Andric bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls( 1814*297eecfbSDimitry Andric const Function *ProfiledCallee, Value *CurCallee, unsigned Depth, 1815*297eecfbSDimitry Andric std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain, 1816*297eecfbSDimitry Andric bool &FoundMultipleCalleeChains) { 1817*297eecfbSDimitry Andric // Stop recursive search if we have already explored the maximum specified 1818*297eecfbSDimitry Andric // depth. 1819*297eecfbSDimitry Andric if (Depth > TailCallSearchDepth) 1820*297eecfbSDimitry Andric return false; 1821*297eecfbSDimitry Andric 1822*297eecfbSDimitry Andric auto SaveCallsiteInfo = [&](Instruction *Callsite, Function *F) { 1823*297eecfbSDimitry Andric FoundCalleeChain.push_back({Callsite, F}); 1824*297eecfbSDimitry Andric }; 1825*297eecfbSDimitry Andric 1826*297eecfbSDimitry Andric auto *CalleeFunc = dyn_cast<Function>(CurCallee); 1827*297eecfbSDimitry Andric if (!CalleeFunc) { 1828*297eecfbSDimitry Andric auto *Alias = dyn_cast<GlobalAlias>(CurCallee); 1829*297eecfbSDimitry Andric assert(Alias); 1830*297eecfbSDimitry Andric CalleeFunc = dyn_cast<Function>(Alias->getAliasee()); 1831*297eecfbSDimitry Andric assert(CalleeFunc); 1832*297eecfbSDimitry Andric } 1833*297eecfbSDimitry Andric 1834*297eecfbSDimitry Andric // Look for tail calls in this function, and check if they either call the 1835*297eecfbSDimitry Andric // profiled callee directly, or indirectly (via a recursive search). 1836*297eecfbSDimitry Andric // Only succeed if there is a single unique tail call chain found between the 1837*297eecfbSDimitry Andric // profiled caller and callee, otherwise we could perform incorrect cloning. 1838*297eecfbSDimitry Andric bool FoundSingleCalleeChain = false; 1839*297eecfbSDimitry Andric for (auto &BB : *CalleeFunc) { 1840*297eecfbSDimitry Andric for (auto &I : BB) { 1841*297eecfbSDimitry Andric auto *CB = dyn_cast<CallBase>(&I); 1842*297eecfbSDimitry Andric if (!CB || !CB->isTailCall()) 1843*297eecfbSDimitry Andric continue; 1844*297eecfbSDimitry Andric auto *CalledValue = CB->getCalledOperand(); 1845*297eecfbSDimitry Andric auto *CalledFunction = CB->getCalledFunction(); 1846*297eecfbSDimitry Andric if (CalledValue && !CalledFunction) { 1847*297eecfbSDimitry Andric CalledValue = CalledValue->stripPointerCasts(); 1848*297eecfbSDimitry Andric // Stripping pointer casts can reveal a called function. 1849*297eecfbSDimitry Andric CalledFunction = dyn_cast<Function>(CalledValue); 1850*297eecfbSDimitry Andric } 1851*297eecfbSDimitry Andric // Check if this is an alias to a function. If so, get the 1852*297eecfbSDimitry Andric // called aliasee for the checks below. 1853*297eecfbSDimitry Andric if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) { 1854*297eecfbSDimitry Andric assert(!CalledFunction && 1855*297eecfbSDimitry Andric "Expected null called function in callsite for alias"); 1856*297eecfbSDimitry Andric CalledFunction = dyn_cast<Function>(GA->getAliaseeObject()); 1857*297eecfbSDimitry Andric } 1858*297eecfbSDimitry Andric if (!CalledFunction) 1859*297eecfbSDimitry Andric continue; 1860*297eecfbSDimitry Andric if (CalledFunction == ProfiledCallee) { 1861*297eecfbSDimitry Andric if (FoundSingleCalleeChain) { 1862*297eecfbSDimitry Andric FoundMultipleCalleeChains = true; 1863*297eecfbSDimitry Andric return false; 1864*297eecfbSDimitry Andric } 1865*297eecfbSDimitry Andric FoundSingleCalleeChain = true; 1866*297eecfbSDimitry Andric FoundProfiledCalleeCount++; 1867*297eecfbSDimitry Andric FoundProfiledCalleeDepth += Depth; 1868*297eecfbSDimitry Andric if (Depth > FoundProfiledCalleeMaxDepth) 1869*297eecfbSDimitry Andric FoundProfiledCalleeMaxDepth = Depth; 1870*297eecfbSDimitry Andric SaveCallsiteInfo(&I, CalleeFunc); 1871*297eecfbSDimitry Andric } else if (findProfiledCalleeThroughTailCalls( 1872*297eecfbSDimitry Andric ProfiledCallee, CalledFunction, Depth + 1, 1873*297eecfbSDimitry Andric FoundCalleeChain, FoundMultipleCalleeChains)) { 1874*297eecfbSDimitry Andric if (FoundMultipleCalleeChains) 1875*297eecfbSDimitry Andric return false; 1876*297eecfbSDimitry Andric if (FoundSingleCalleeChain) { 1877*297eecfbSDimitry Andric FoundMultipleCalleeChains = true; 1878*297eecfbSDimitry Andric return false; 1879*297eecfbSDimitry Andric } 1880*297eecfbSDimitry Andric FoundSingleCalleeChain = true; 1881*297eecfbSDimitry Andric SaveCallsiteInfo(&I, CalleeFunc); 1882*297eecfbSDimitry Andric } 1883*297eecfbSDimitry Andric } 1884*297eecfbSDimitry Andric } 1885*297eecfbSDimitry Andric 1886*297eecfbSDimitry Andric return FoundSingleCalleeChain; 1887*297eecfbSDimitry Andric } 1888*297eecfbSDimitry Andric 1889*297eecfbSDimitry Andric bool ModuleCallsiteContextGraph::calleeMatchesFunc( 1890*297eecfbSDimitry Andric Instruction *Call, const Function *Func, const Function *CallerFunc, 1891*297eecfbSDimitry Andric std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) { 189206c3fb27SDimitry Andric auto *CB = dyn_cast<CallBase>(Call); 189306c3fb27SDimitry Andric if (!CB->getCalledOperand()) 189406c3fb27SDimitry Andric return false; 189506c3fb27SDimitry Andric auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts(); 189606c3fb27SDimitry Andric auto *CalleeFunc = dyn_cast<Function>(CalleeVal); 189706c3fb27SDimitry Andric if (CalleeFunc == Func) 189806c3fb27SDimitry Andric return true; 189906c3fb27SDimitry Andric auto *Alias = dyn_cast<GlobalAlias>(CalleeVal); 1900*297eecfbSDimitry Andric if (Alias && Alias->getAliasee() == Func) 1901*297eecfbSDimitry Andric return true; 1902*297eecfbSDimitry Andric 1903*297eecfbSDimitry Andric // Recursively search for the profiled callee through tail calls starting with 1904*297eecfbSDimitry Andric // the actual Callee. The discovered tail call chain is saved in 1905*297eecfbSDimitry Andric // FoundCalleeChain, and we will fixup the graph to include these callsites 1906*297eecfbSDimitry Andric // after returning. 1907*297eecfbSDimitry Andric // FIXME: We will currently redo the same recursive walk if we find the same 1908*297eecfbSDimitry Andric // mismatched callee from another callsite. We can improve this with more 1909*297eecfbSDimitry Andric // bookkeeping of the created chain of new nodes for each mismatch. 1910*297eecfbSDimitry Andric unsigned Depth = 1; 1911*297eecfbSDimitry Andric bool FoundMultipleCalleeChains = false; 1912*297eecfbSDimitry Andric if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal, Depth, 1913*297eecfbSDimitry Andric FoundCalleeChain, 1914*297eecfbSDimitry Andric FoundMultipleCalleeChains)) { 1915*297eecfbSDimitry Andric LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: " 1916*297eecfbSDimitry Andric << Func->getName() << " from " << CallerFunc->getName() 1917*297eecfbSDimitry Andric << " that actually called " << CalleeVal->getName() 1918*297eecfbSDimitry Andric << (FoundMultipleCalleeChains 1919*297eecfbSDimitry Andric ? " (found multiple possible chains)" 1920*297eecfbSDimitry Andric : "") 1921*297eecfbSDimitry Andric << "\n"); 1922*297eecfbSDimitry Andric if (FoundMultipleCalleeChains) 1923*297eecfbSDimitry Andric FoundProfiledCalleeNonUniquelyCount++; 1924*297eecfbSDimitry Andric return false; 192506c3fb27SDimitry Andric } 192606c3fb27SDimitry Andric 1927*297eecfbSDimitry Andric return true; 1928*297eecfbSDimitry Andric } 1929*297eecfbSDimitry Andric 1930*297eecfbSDimitry Andric bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls( 1931*297eecfbSDimitry Andric ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth, 1932*297eecfbSDimitry Andric std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain, 1933*297eecfbSDimitry Andric bool &FoundMultipleCalleeChains) { 1934*297eecfbSDimitry Andric // Stop recursive search if we have already explored the maximum specified 1935*297eecfbSDimitry Andric // depth. 1936*297eecfbSDimitry Andric if (Depth > TailCallSearchDepth) 1937*297eecfbSDimitry Andric return false; 1938*297eecfbSDimitry Andric 1939*297eecfbSDimitry Andric auto CreateAndSaveCallsiteInfo = [&](ValueInfo Callee, FunctionSummary *FS) { 1940*297eecfbSDimitry Andric // Make a CallsiteInfo for each discovered callee, if one hasn't already 1941*297eecfbSDimitry Andric // been synthesized. 1942*297eecfbSDimitry Andric if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) || 1943*297eecfbSDimitry Andric !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee)) 1944*297eecfbSDimitry Andric // StackIds is empty (we don't have debug info available in the index for 1945*297eecfbSDimitry Andric // these callsites) 1946*297eecfbSDimitry Andric FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] = 1947*297eecfbSDimitry Andric std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>()); 1948*297eecfbSDimitry Andric CallsiteInfo *NewCallsiteInfo = 1949*297eecfbSDimitry Andric FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee].get(); 1950*297eecfbSDimitry Andric FoundCalleeChain.push_back({NewCallsiteInfo, FS}); 1951*297eecfbSDimitry Andric }; 1952*297eecfbSDimitry Andric 1953*297eecfbSDimitry Andric // Look for tail calls in this function, and check if they either call the 1954*297eecfbSDimitry Andric // profiled callee directly, or indirectly (via a recursive search). 1955*297eecfbSDimitry Andric // Only succeed if there is a single unique tail call chain found between the 1956*297eecfbSDimitry Andric // profiled caller and callee, otherwise we could perform incorrect cloning. 1957*297eecfbSDimitry Andric bool FoundSingleCalleeChain = false; 1958*297eecfbSDimitry Andric for (auto &S : CurCallee.getSummaryList()) { 1959*297eecfbSDimitry Andric if (!GlobalValue::isLocalLinkage(S->linkage()) && 1960*297eecfbSDimitry Andric !isPrevailing(CurCallee.getGUID(), S.get())) 1961*297eecfbSDimitry Andric continue; 1962*297eecfbSDimitry Andric auto *FS = dyn_cast<FunctionSummary>(S->getBaseObject()); 1963*297eecfbSDimitry Andric if (!FS) 1964*297eecfbSDimitry Andric continue; 1965*297eecfbSDimitry Andric auto FSVI = CurCallee; 1966*297eecfbSDimitry Andric auto *AS = dyn_cast<AliasSummary>(S.get()); 1967*297eecfbSDimitry Andric if (AS) 1968*297eecfbSDimitry Andric FSVI = AS->getAliaseeVI(); 1969*297eecfbSDimitry Andric for (auto &CallEdge : FS->calls()) { 1970*297eecfbSDimitry Andric if (!CallEdge.second.hasTailCall()) 1971*297eecfbSDimitry Andric continue; 1972*297eecfbSDimitry Andric if (CallEdge.first == ProfiledCallee) { 1973*297eecfbSDimitry Andric if (FoundSingleCalleeChain) { 1974*297eecfbSDimitry Andric FoundMultipleCalleeChains = true; 1975*297eecfbSDimitry Andric return false; 1976*297eecfbSDimitry Andric } 1977*297eecfbSDimitry Andric FoundSingleCalleeChain = true; 1978*297eecfbSDimitry Andric FoundProfiledCalleeCount++; 1979*297eecfbSDimitry Andric FoundProfiledCalleeDepth += Depth; 1980*297eecfbSDimitry Andric if (Depth > FoundProfiledCalleeMaxDepth) 1981*297eecfbSDimitry Andric FoundProfiledCalleeMaxDepth = Depth; 1982*297eecfbSDimitry Andric CreateAndSaveCallsiteInfo(CallEdge.first, FS); 1983*297eecfbSDimitry Andric // Add FS to FSToVIMap in case it isn't already there. 1984*297eecfbSDimitry Andric assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI); 1985*297eecfbSDimitry Andric FSToVIMap[FS] = FSVI; 1986*297eecfbSDimitry Andric } else if (findProfiledCalleeThroughTailCalls( 1987*297eecfbSDimitry Andric ProfiledCallee, CallEdge.first, Depth + 1, 1988*297eecfbSDimitry Andric FoundCalleeChain, FoundMultipleCalleeChains)) { 1989*297eecfbSDimitry Andric if (FoundMultipleCalleeChains) 1990*297eecfbSDimitry Andric return false; 1991*297eecfbSDimitry Andric if (FoundSingleCalleeChain) { 1992*297eecfbSDimitry Andric FoundMultipleCalleeChains = true; 1993*297eecfbSDimitry Andric return false; 1994*297eecfbSDimitry Andric } 1995*297eecfbSDimitry Andric FoundSingleCalleeChain = true; 1996*297eecfbSDimitry Andric CreateAndSaveCallsiteInfo(CallEdge.first, FS); 1997*297eecfbSDimitry Andric // Add FS to FSToVIMap in case it isn't already there. 1998*297eecfbSDimitry Andric assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI); 1999*297eecfbSDimitry Andric FSToVIMap[FS] = FSVI; 2000*297eecfbSDimitry Andric } 2001*297eecfbSDimitry Andric } 2002*297eecfbSDimitry Andric } 2003*297eecfbSDimitry Andric 2004*297eecfbSDimitry Andric return FoundSingleCalleeChain; 2005*297eecfbSDimitry Andric } 2006*297eecfbSDimitry Andric 2007*297eecfbSDimitry Andric bool IndexCallsiteContextGraph::calleeMatchesFunc( 2008*297eecfbSDimitry Andric IndexCall &Call, const FunctionSummary *Func, 2009*297eecfbSDimitry Andric const FunctionSummary *CallerFunc, 2010*297eecfbSDimitry Andric std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) { 201106c3fb27SDimitry Andric ValueInfo Callee = 201206c3fb27SDimitry Andric dyn_cast_if_present<CallsiteInfo *>(Call.getBase())->Callee; 201306c3fb27SDimitry Andric // If there is no summary list then this is a call to an externally defined 201406c3fb27SDimitry Andric // symbol. 201506c3fb27SDimitry Andric AliasSummary *Alias = 201606c3fb27SDimitry Andric Callee.getSummaryList().empty() 201706c3fb27SDimitry Andric ? nullptr 201806c3fb27SDimitry Andric : dyn_cast<AliasSummary>(Callee.getSummaryList()[0].get()); 201906c3fb27SDimitry Andric assert(FSToVIMap.count(Func)); 2020*297eecfbSDimitry Andric auto FuncVI = FSToVIMap[Func]; 2021*297eecfbSDimitry Andric if (Callee == FuncVI || 202206c3fb27SDimitry Andric // If callee is an alias, check the aliasee, since only function 202306c3fb27SDimitry Andric // summary base objects will contain the stack node summaries and thus 202406c3fb27SDimitry Andric // get a context node. 2025*297eecfbSDimitry Andric (Alias && Alias->getAliaseeVI() == FuncVI)) 2026*297eecfbSDimitry Andric return true; 2027*297eecfbSDimitry Andric 2028*297eecfbSDimitry Andric // Recursively search for the profiled callee through tail calls starting with 2029*297eecfbSDimitry Andric // the actual Callee. The discovered tail call chain is saved in 2030*297eecfbSDimitry Andric // FoundCalleeChain, and we will fixup the graph to include these callsites 2031*297eecfbSDimitry Andric // after returning. 2032*297eecfbSDimitry Andric // FIXME: We will currently redo the same recursive walk if we find the same 2033*297eecfbSDimitry Andric // mismatched callee from another callsite. We can improve this with more 2034*297eecfbSDimitry Andric // bookkeeping of the created chain of new nodes for each mismatch. 2035*297eecfbSDimitry Andric unsigned Depth = 1; 2036*297eecfbSDimitry Andric bool FoundMultipleCalleeChains = false; 2037*297eecfbSDimitry Andric if (!findProfiledCalleeThroughTailCalls( 2038*297eecfbSDimitry Andric FuncVI, Callee, Depth, FoundCalleeChain, FoundMultipleCalleeChains)) { 2039*297eecfbSDimitry Andric LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: " << FuncVI 2040*297eecfbSDimitry Andric << " from " << FSToVIMap[CallerFunc] 2041*297eecfbSDimitry Andric << " that actually called " << Callee 2042*297eecfbSDimitry Andric << (FoundMultipleCalleeChains 2043*297eecfbSDimitry Andric ? " (found multiple possible chains)" 2044*297eecfbSDimitry Andric : "") 2045*297eecfbSDimitry Andric << "\n"); 2046*297eecfbSDimitry Andric if (FoundMultipleCalleeChains) 2047*297eecfbSDimitry Andric FoundProfiledCalleeNonUniquelyCount++; 2048*297eecfbSDimitry Andric return false; 2049*297eecfbSDimitry Andric } 2050*297eecfbSDimitry Andric 2051*297eecfbSDimitry Andric return true; 205206c3fb27SDimitry Andric } 205306c3fb27SDimitry Andric 205406c3fb27SDimitry Andric static std::string getAllocTypeString(uint8_t AllocTypes) { 205506c3fb27SDimitry Andric if (!AllocTypes) 205606c3fb27SDimitry Andric return "None"; 205706c3fb27SDimitry Andric std::string Str; 205806c3fb27SDimitry Andric if (AllocTypes & (uint8_t)AllocationType::NotCold) 205906c3fb27SDimitry Andric Str += "NotCold"; 206006c3fb27SDimitry Andric if (AllocTypes & (uint8_t)AllocationType::Cold) 206106c3fb27SDimitry Andric Str += "Cold"; 206206c3fb27SDimitry Andric return Str; 206306c3fb27SDimitry Andric } 206406c3fb27SDimitry Andric 206506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 206606c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump() 206706c3fb27SDimitry Andric const { 206806c3fb27SDimitry Andric print(dbgs()); 206906c3fb27SDimitry Andric dbgs() << "\n"; 207006c3fb27SDimitry Andric } 207106c3fb27SDimitry Andric 207206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 207306c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print( 207406c3fb27SDimitry Andric raw_ostream &OS) const { 207506c3fb27SDimitry Andric OS << "Node " << this << "\n"; 207606c3fb27SDimitry Andric OS << "\t"; 207706c3fb27SDimitry Andric printCall(OS); 207806c3fb27SDimitry Andric if (Recursive) 207906c3fb27SDimitry Andric OS << " (recursive)"; 208006c3fb27SDimitry Andric OS << "\n"; 208106c3fb27SDimitry Andric OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n"; 208206c3fb27SDimitry Andric OS << "\tContextIds:"; 208306c3fb27SDimitry Andric std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end()); 208406c3fb27SDimitry Andric std::sort(SortedIds.begin(), SortedIds.end()); 208506c3fb27SDimitry Andric for (auto Id : SortedIds) 208606c3fb27SDimitry Andric OS << " " << Id; 208706c3fb27SDimitry Andric OS << "\n"; 208806c3fb27SDimitry Andric OS << "\tCalleeEdges:\n"; 208906c3fb27SDimitry Andric for (auto &Edge : CalleeEdges) 209006c3fb27SDimitry Andric OS << "\t\t" << *Edge << "\n"; 209106c3fb27SDimitry Andric OS << "\tCallerEdges:\n"; 209206c3fb27SDimitry Andric for (auto &Edge : CallerEdges) 209306c3fb27SDimitry Andric OS << "\t\t" << *Edge << "\n"; 209406c3fb27SDimitry Andric if (!Clones.empty()) { 209506c3fb27SDimitry Andric OS << "\tClones: "; 209606c3fb27SDimitry Andric FieldSeparator FS; 209706c3fb27SDimitry Andric for (auto *Clone : Clones) 209806c3fb27SDimitry Andric OS << FS << Clone; 209906c3fb27SDimitry Andric OS << "\n"; 210006c3fb27SDimitry Andric } else if (CloneOf) { 210106c3fb27SDimitry Andric OS << "\tClone of " << CloneOf << "\n"; 210206c3fb27SDimitry Andric } 210306c3fb27SDimitry Andric } 210406c3fb27SDimitry Andric 210506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 210606c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump() 210706c3fb27SDimitry Andric const { 210806c3fb27SDimitry Andric print(dbgs()); 210906c3fb27SDimitry Andric dbgs() << "\n"; 211006c3fb27SDimitry Andric } 211106c3fb27SDimitry Andric 211206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 211306c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print( 211406c3fb27SDimitry Andric raw_ostream &OS) const { 211506c3fb27SDimitry Andric OS << "Edge from Callee " << Callee << " to Caller: " << Caller 211606c3fb27SDimitry Andric << " AllocTypes: " << getAllocTypeString(AllocTypes); 211706c3fb27SDimitry Andric OS << " ContextIds:"; 211806c3fb27SDimitry Andric std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end()); 211906c3fb27SDimitry Andric std::sort(SortedIds.begin(), SortedIds.end()); 212006c3fb27SDimitry Andric for (auto Id : SortedIds) 212106c3fb27SDimitry Andric OS << " " << Id; 212206c3fb27SDimitry Andric } 212306c3fb27SDimitry Andric 212406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 212506c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump() const { 212606c3fb27SDimitry Andric print(dbgs()); 212706c3fb27SDimitry Andric } 212806c3fb27SDimitry Andric 212906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 213006c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print( 213106c3fb27SDimitry Andric raw_ostream &OS) const { 213206c3fb27SDimitry Andric OS << "Callsite Context Graph:\n"; 213306c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 213406c3fb27SDimitry Andric for (const auto Node : nodes<GraphType>(this)) { 213506c3fb27SDimitry Andric if (Node->isRemoved()) 213606c3fb27SDimitry Andric continue; 213706c3fb27SDimitry Andric Node->print(OS); 213806c3fb27SDimitry Andric OS << "\n"; 213906c3fb27SDimitry Andric } 214006c3fb27SDimitry Andric } 214106c3fb27SDimitry Andric 214206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 214306c3fb27SDimitry Andric static void checkEdge( 214406c3fb27SDimitry Andric const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) { 214506c3fb27SDimitry Andric // Confirm that alloc type is not None and that we have at least one context 214606c3fb27SDimitry Andric // id. 214706c3fb27SDimitry Andric assert(Edge->AllocTypes != (uint8_t)AllocationType::None); 214806c3fb27SDimitry Andric assert(!Edge->ContextIds.empty()); 214906c3fb27SDimitry Andric } 215006c3fb27SDimitry Andric 215106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 215206c3fb27SDimitry Andric static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node, 215306c3fb27SDimitry Andric bool CheckEdges = true) { 215406c3fb27SDimitry Andric if (Node->isRemoved()) 215506c3fb27SDimitry Andric return; 215606c3fb27SDimitry Andric // Node's context ids should be the union of both its callee and caller edge 215706c3fb27SDimitry Andric // context ids. 215806c3fb27SDimitry Andric if (Node->CallerEdges.size()) { 215906c3fb27SDimitry Andric auto EI = Node->CallerEdges.begin(); 216006c3fb27SDimitry Andric auto &FirstEdge = *EI; 216106c3fb27SDimitry Andric EI++; 216206c3fb27SDimitry Andric DenseSet<uint32_t> CallerEdgeContextIds(FirstEdge->ContextIds); 216306c3fb27SDimitry Andric for (; EI != Node->CallerEdges.end(); EI++) { 216406c3fb27SDimitry Andric const auto &Edge = *EI; 216506c3fb27SDimitry Andric if (CheckEdges) 216606c3fb27SDimitry Andric checkEdge<DerivedCCG, FuncTy, CallTy>(Edge); 216706c3fb27SDimitry Andric set_union(CallerEdgeContextIds, Edge->ContextIds); 216806c3fb27SDimitry Andric } 216906c3fb27SDimitry Andric // Node can have more context ids than callers if some contexts terminate at 217006c3fb27SDimitry Andric // node and some are longer. 217106c3fb27SDimitry Andric assert(Node->ContextIds == CallerEdgeContextIds || 217206c3fb27SDimitry Andric set_is_subset(CallerEdgeContextIds, Node->ContextIds)); 217306c3fb27SDimitry Andric } 217406c3fb27SDimitry Andric if (Node->CalleeEdges.size()) { 217506c3fb27SDimitry Andric auto EI = Node->CalleeEdges.begin(); 217606c3fb27SDimitry Andric auto &FirstEdge = *EI; 217706c3fb27SDimitry Andric EI++; 217806c3fb27SDimitry Andric DenseSet<uint32_t> CalleeEdgeContextIds(FirstEdge->ContextIds); 217906c3fb27SDimitry Andric for (; EI != Node->CalleeEdges.end(); EI++) { 218006c3fb27SDimitry Andric const auto &Edge = *EI; 218106c3fb27SDimitry Andric if (CheckEdges) 218206c3fb27SDimitry Andric checkEdge<DerivedCCG, FuncTy, CallTy>(Edge); 218306c3fb27SDimitry Andric set_union(CalleeEdgeContextIds, Edge->ContextIds); 218406c3fb27SDimitry Andric } 218506c3fb27SDimitry Andric assert(Node->ContextIds == CalleeEdgeContextIds); 218606c3fb27SDimitry Andric } 218706c3fb27SDimitry Andric } 218806c3fb27SDimitry Andric 218906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 219006c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const { 219106c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 219206c3fb27SDimitry Andric for (const auto Node : nodes<GraphType>(this)) { 219306c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false); 219406c3fb27SDimitry Andric for (auto &Edge : Node->CallerEdges) 219506c3fb27SDimitry Andric checkEdge<DerivedCCG, FuncTy, CallTy>(Edge); 219606c3fb27SDimitry Andric } 219706c3fb27SDimitry Andric } 219806c3fb27SDimitry Andric 219906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 220006c3fb27SDimitry Andric struct GraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> { 220106c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 220206c3fb27SDimitry Andric using NodeRef = const ContextNode<DerivedCCG, FuncTy, CallTy> *; 220306c3fb27SDimitry Andric 220406c3fb27SDimitry Andric using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>; 220506c3fb27SDimitry Andric static NodeRef getNode(const NodePtrTy &P) { return P.get(); } 220606c3fb27SDimitry Andric 220706c3fb27SDimitry Andric using nodes_iterator = 220806c3fb27SDimitry Andric mapped_iterator<typename std::vector<NodePtrTy>::const_iterator, 220906c3fb27SDimitry Andric decltype(&getNode)>; 221006c3fb27SDimitry Andric 221106c3fb27SDimitry Andric static nodes_iterator nodes_begin(GraphType G) { 221206c3fb27SDimitry Andric return nodes_iterator(G->NodeOwner.begin(), &getNode); 221306c3fb27SDimitry Andric } 221406c3fb27SDimitry Andric 221506c3fb27SDimitry Andric static nodes_iterator nodes_end(GraphType G) { 221606c3fb27SDimitry Andric return nodes_iterator(G->NodeOwner.end(), &getNode); 221706c3fb27SDimitry Andric } 221806c3fb27SDimitry Andric 221906c3fb27SDimitry Andric static NodeRef getEntryNode(GraphType G) { 222006c3fb27SDimitry Andric return G->NodeOwner.begin()->get(); 222106c3fb27SDimitry Andric } 222206c3fb27SDimitry Andric 222306c3fb27SDimitry Andric using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>; 222406c3fb27SDimitry Andric static const ContextNode<DerivedCCG, FuncTy, CallTy> * 222506c3fb27SDimitry Andric GetCallee(const EdgePtrTy &P) { 222606c3fb27SDimitry Andric return P->Callee; 222706c3fb27SDimitry Andric } 222806c3fb27SDimitry Andric 222906c3fb27SDimitry Andric using ChildIteratorType = 223006c3fb27SDimitry Andric mapped_iterator<typename std::vector<std::shared_ptr<ContextEdge< 223106c3fb27SDimitry Andric DerivedCCG, FuncTy, CallTy>>>::const_iterator, 223206c3fb27SDimitry Andric decltype(&GetCallee)>; 223306c3fb27SDimitry Andric 223406c3fb27SDimitry Andric static ChildIteratorType child_begin(NodeRef N) { 223506c3fb27SDimitry Andric return ChildIteratorType(N->CalleeEdges.begin(), &GetCallee); 223606c3fb27SDimitry Andric } 223706c3fb27SDimitry Andric 223806c3fb27SDimitry Andric static ChildIteratorType child_end(NodeRef N) { 223906c3fb27SDimitry Andric return ChildIteratorType(N->CalleeEdges.end(), &GetCallee); 224006c3fb27SDimitry Andric } 224106c3fb27SDimitry Andric }; 224206c3fb27SDimitry Andric 224306c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 224406c3fb27SDimitry Andric struct DOTGraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> 224506c3fb27SDimitry Andric : public DefaultDOTGraphTraits { 224606c3fb27SDimitry Andric DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} 224706c3fb27SDimitry Andric 224806c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 224906c3fb27SDimitry Andric using GTraits = GraphTraits<GraphType>; 225006c3fb27SDimitry Andric using NodeRef = typename GTraits::NodeRef; 225106c3fb27SDimitry Andric using ChildIteratorType = typename GTraits::ChildIteratorType; 225206c3fb27SDimitry Andric 225306c3fb27SDimitry Andric static std::string getNodeLabel(NodeRef Node, GraphType G) { 225406c3fb27SDimitry Andric std::string LabelString = 225506c3fb27SDimitry Andric (Twine("OrigId: ") + (Node->IsAllocation ? "Alloc" : "") + 225606c3fb27SDimitry Andric Twine(Node->OrigStackOrAllocId)) 225706c3fb27SDimitry Andric .str(); 225806c3fb27SDimitry Andric LabelString += "\n"; 225906c3fb27SDimitry Andric if (Node->hasCall()) { 226006c3fb27SDimitry Andric auto Func = G->NodeToCallingFunc.find(Node); 226106c3fb27SDimitry Andric assert(Func != G->NodeToCallingFunc.end()); 226206c3fb27SDimitry Andric LabelString += 226306c3fb27SDimitry Andric G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo()); 226406c3fb27SDimitry Andric } else { 226506c3fb27SDimitry Andric LabelString += "null call"; 226606c3fb27SDimitry Andric if (Node->Recursive) 226706c3fb27SDimitry Andric LabelString += " (recursive)"; 226806c3fb27SDimitry Andric else 226906c3fb27SDimitry Andric LabelString += " (external)"; 227006c3fb27SDimitry Andric } 227106c3fb27SDimitry Andric return LabelString; 227206c3fb27SDimitry Andric } 227306c3fb27SDimitry Andric 227406c3fb27SDimitry Andric static std::string getNodeAttributes(NodeRef Node, GraphType) { 227506c3fb27SDimitry Andric std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " + 227606c3fb27SDimitry Andric getContextIds(Node->ContextIds) + "\"") 227706c3fb27SDimitry Andric .str(); 227806c3fb27SDimitry Andric AttributeString += 227906c3fb27SDimitry Andric (Twine(",fillcolor=\"") + getColor(Node->AllocTypes) + "\"").str(); 228006c3fb27SDimitry Andric AttributeString += ",style=\"filled\""; 228106c3fb27SDimitry Andric if (Node->CloneOf) { 228206c3fb27SDimitry Andric AttributeString += ",color=\"blue\""; 228306c3fb27SDimitry Andric AttributeString += ",style=\"filled,bold,dashed\""; 228406c3fb27SDimitry Andric } else 228506c3fb27SDimitry Andric AttributeString += ",style=\"filled\""; 228606c3fb27SDimitry Andric return AttributeString; 228706c3fb27SDimitry Andric } 228806c3fb27SDimitry Andric 228906c3fb27SDimitry Andric static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, 229006c3fb27SDimitry Andric GraphType) { 229106c3fb27SDimitry Andric auto &Edge = *(ChildIter.getCurrent()); 229206c3fb27SDimitry Andric return (Twine("tooltip=\"") + getContextIds(Edge->ContextIds) + "\"" + 229306c3fb27SDimitry Andric Twine(",fillcolor=\"") + getColor(Edge->AllocTypes) + "\"") 229406c3fb27SDimitry Andric .str(); 229506c3fb27SDimitry Andric } 229606c3fb27SDimitry Andric 229706c3fb27SDimitry Andric // Since the NodeOwners list includes nodes that are no longer connected to 229806c3fb27SDimitry Andric // the graph, skip them here. 229906c3fb27SDimitry Andric static bool isNodeHidden(NodeRef Node, GraphType) { 230006c3fb27SDimitry Andric return Node->isRemoved(); 230106c3fb27SDimitry Andric } 230206c3fb27SDimitry Andric 230306c3fb27SDimitry Andric private: 230406c3fb27SDimitry Andric static std::string getContextIds(const DenseSet<uint32_t> &ContextIds) { 230506c3fb27SDimitry Andric std::string IdString = "ContextIds:"; 230606c3fb27SDimitry Andric if (ContextIds.size() < 100) { 230706c3fb27SDimitry Andric std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end()); 230806c3fb27SDimitry Andric std::sort(SortedIds.begin(), SortedIds.end()); 230906c3fb27SDimitry Andric for (auto Id : SortedIds) 231006c3fb27SDimitry Andric IdString += (" " + Twine(Id)).str(); 231106c3fb27SDimitry Andric } else { 231206c3fb27SDimitry Andric IdString += (" (" + Twine(ContextIds.size()) + " ids)").str(); 231306c3fb27SDimitry Andric } 231406c3fb27SDimitry Andric return IdString; 231506c3fb27SDimitry Andric } 231606c3fb27SDimitry Andric 231706c3fb27SDimitry Andric static std::string getColor(uint8_t AllocTypes) { 231806c3fb27SDimitry Andric if (AllocTypes == (uint8_t)AllocationType::NotCold) 231906c3fb27SDimitry Andric // Color "brown1" actually looks like a lighter red. 232006c3fb27SDimitry Andric return "brown1"; 232106c3fb27SDimitry Andric if (AllocTypes == (uint8_t)AllocationType::Cold) 232206c3fb27SDimitry Andric return "cyan"; 232306c3fb27SDimitry Andric if (AllocTypes == 232406c3fb27SDimitry Andric ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold)) 232506c3fb27SDimitry Andric // Lighter purple. 232606c3fb27SDimitry Andric return "mediumorchid1"; 232706c3fb27SDimitry Andric return "gray"; 232806c3fb27SDimitry Andric } 232906c3fb27SDimitry Andric 233006c3fb27SDimitry Andric static std::string getNodeId(NodeRef Node) { 233106c3fb27SDimitry Andric std::stringstream SStream; 233206c3fb27SDimitry Andric SStream << std::hex << "N0x" << (unsigned long long)Node; 233306c3fb27SDimitry Andric std::string Result = SStream.str(); 233406c3fb27SDimitry Andric return Result; 233506c3fb27SDimitry Andric } 233606c3fb27SDimitry Andric }; 233706c3fb27SDimitry Andric 233806c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 233906c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot( 234006c3fb27SDimitry Andric std::string Label) const { 234106c3fb27SDimitry Andric WriteGraph(this, "", false, Label, 234206c3fb27SDimitry Andric DotFilePathPrefix + "ccg." + Label + ".dot"); 234306c3fb27SDimitry Andric } 234406c3fb27SDimitry Andric 234506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 234606c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 234706c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone( 234806c3fb27SDimitry Andric const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI) { 234906c3fb27SDimitry Andric ContextNode *Node = Edge->Callee; 235006c3fb27SDimitry Andric NodeOwner.push_back( 235106c3fb27SDimitry Andric std::make_unique<ContextNode>(Node->IsAllocation, Node->Call)); 235206c3fb27SDimitry Andric ContextNode *Clone = NodeOwner.back().get(); 235306c3fb27SDimitry Andric Node->addClone(Clone); 235406c3fb27SDimitry Andric assert(NodeToCallingFunc.count(Node)); 235506c3fb27SDimitry Andric NodeToCallingFunc[Clone] = NodeToCallingFunc[Node]; 235606c3fb27SDimitry Andric moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true); 235706c3fb27SDimitry Andric return Clone; 235806c3fb27SDimitry Andric } 235906c3fb27SDimitry Andric 236006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 236106c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: 236206c3fb27SDimitry Andric moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge, 236306c3fb27SDimitry Andric ContextNode *NewCallee, EdgeIter *CallerEdgeI, 236406c3fb27SDimitry Andric bool NewClone) { 236506c3fb27SDimitry Andric // NewCallee and Edge's current callee must be clones of the same original 236606c3fb27SDimitry Andric // node (Edge's current callee may be the original node too). 236706c3fb27SDimitry Andric assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode()); 236806c3fb27SDimitry Andric auto &EdgeContextIds = Edge->getContextIds(); 236906c3fb27SDimitry Andric ContextNode *OldCallee = Edge->Callee; 237006c3fb27SDimitry Andric if (CallerEdgeI) 237106c3fb27SDimitry Andric *CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI); 237206c3fb27SDimitry Andric else 237306c3fb27SDimitry Andric OldCallee->eraseCallerEdge(Edge.get()); 237406c3fb27SDimitry Andric Edge->Callee = NewCallee; 237506c3fb27SDimitry Andric NewCallee->CallerEdges.push_back(Edge); 237606c3fb27SDimitry Andric // Don't need to update Edge's context ids since we are simply reconnecting 237706c3fb27SDimitry Andric // it. 237806c3fb27SDimitry Andric set_subtract(OldCallee->ContextIds, EdgeContextIds); 237906c3fb27SDimitry Andric NewCallee->ContextIds.insert(EdgeContextIds.begin(), EdgeContextIds.end()); 238006c3fb27SDimitry Andric NewCallee->AllocTypes |= Edge->AllocTypes; 238106c3fb27SDimitry Andric OldCallee->AllocTypes = computeAllocType(OldCallee->ContextIds); 238206c3fb27SDimitry Andric // OldCallee alloc type should be None iff its context id set is now empty. 238306c3fb27SDimitry Andric assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) == 238406c3fb27SDimitry Andric OldCallee->ContextIds.empty()); 238506c3fb27SDimitry Andric // Now walk the old callee node's callee edges and move Edge's context ids 238606c3fb27SDimitry Andric // over to the corresponding edge into the clone (which is created here if 238706c3fb27SDimitry Andric // this is a newly created clone). 238806c3fb27SDimitry Andric for (auto &OldCalleeEdge : OldCallee->CalleeEdges) { 238906c3fb27SDimitry Andric // The context ids moving to the new callee are the subset of this edge's 239006c3fb27SDimitry Andric // context ids and the context ids on the caller edge being moved. 239106c3fb27SDimitry Andric DenseSet<uint32_t> EdgeContextIdsToMove = 239206c3fb27SDimitry Andric set_intersection(OldCalleeEdge->getContextIds(), EdgeContextIds); 239306c3fb27SDimitry Andric set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove); 239406c3fb27SDimitry Andric OldCalleeEdge->AllocTypes = 239506c3fb27SDimitry Andric computeAllocType(OldCalleeEdge->getContextIds()); 239606c3fb27SDimitry Andric if (!NewClone) { 239706c3fb27SDimitry Andric // Update context ids / alloc type on corresponding edge to NewCallee. 239806c3fb27SDimitry Andric // There is a chance this may not exist if we are reusing an existing 239906c3fb27SDimitry Andric // clone, specifically during function assignment, where we would have 240006c3fb27SDimitry Andric // removed none type edges after creating the clone. If we can't find 240106c3fb27SDimitry Andric // a corresponding edge there, fall through to the cloning below. 240206c3fb27SDimitry Andric if (auto *NewCalleeEdge = 240306c3fb27SDimitry Andric NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) { 240406c3fb27SDimitry Andric NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.begin(), 240506c3fb27SDimitry Andric EdgeContextIdsToMove.end()); 240606c3fb27SDimitry Andric NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove); 240706c3fb27SDimitry Andric continue; 240806c3fb27SDimitry Andric } 240906c3fb27SDimitry Andric } 241006c3fb27SDimitry Andric auto NewEdge = std::make_shared<ContextEdge>( 241106c3fb27SDimitry Andric OldCalleeEdge->Callee, NewCallee, 241206c3fb27SDimitry Andric computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove); 241306c3fb27SDimitry Andric NewCallee->CalleeEdges.push_back(NewEdge); 241406c3fb27SDimitry Andric NewEdge->Callee->CallerEdges.push_back(NewEdge); 241506c3fb27SDimitry Andric } 241606c3fb27SDimitry Andric if (VerifyCCG) { 241706c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee, /*CheckEdges=*/false); 241806c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee, /*CheckEdges=*/false); 241906c3fb27SDimitry Andric for (const auto &OldCalleeEdge : OldCallee->CalleeEdges) 242006c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee, 242106c3fb27SDimitry Andric /*CheckEdges=*/false); 242206c3fb27SDimitry Andric for (const auto &NewCalleeEdge : NewCallee->CalleeEdges) 242306c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee, 242406c3fb27SDimitry Andric /*CheckEdges=*/false); 242506c3fb27SDimitry Andric } 242606c3fb27SDimitry Andric } 242706c3fb27SDimitry Andric 242806c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 242906c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() { 243006c3fb27SDimitry Andric DenseSet<const ContextNode *> Visited; 243106c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) 243206c3fb27SDimitry Andric identifyClones(Entry.second, Visited); 243306c3fb27SDimitry Andric } 243406c3fb27SDimitry Andric 243506c3fb27SDimitry Andric // helper function to check an AllocType is cold or notcold or both. 243606c3fb27SDimitry Andric bool checkColdOrNotCold(uint8_t AllocType) { 243706c3fb27SDimitry Andric return (AllocType == (uint8_t)AllocationType::Cold) || 243806c3fb27SDimitry Andric (AllocType == (uint8_t)AllocationType::NotCold) || 243906c3fb27SDimitry Andric (AllocType == 244006c3fb27SDimitry Andric ((uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold)); 244106c3fb27SDimitry Andric } 244206c3fb27SDimitry Andric 244306c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 244406c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones( 244506c3fb27SDimitry Andric ContextNode *Node, DenseSet<const ContextNode *> &Visited) { 244606c3fb27SDimitry Andric if (VerifyNodes) 244706c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node); 244806c3fb27SDimitry Andric assert(!Node->CloneOf); 244906c3fb27SDimitry Andric 245006c3fb27SDimitry Andric // If Node as a null call, then either it wasn't found in the module (regular 245106c3fb27SDimitry Andric // LTO) or summary index (ThinLTO), or there were other conditions blocking 245206c3fb27SDimitry Andric // cloning (e.g. recursion, calls multiple targets, etc). 245306c3fb27SDimitry Andric // Do this here so that we don't try to recursively clone callers below, which 245406c3fb27SDimitry Andric // isn't useful at least for this node. 245506c3fb27SDimitry Andric if (!Node->hasCall()) 245606c3fb27SDimitry Andric return; 245706c3fb27SDimitry Andric 245806c3fb27SDimitry Andric #ifndef NDEBUG 245906c3fb27SDimitry Andric auto Insert = 246006c3fb27SDimitry Andric #endif 246106c3fb27SDimitry Andric Visited.insert(Node); 246206c3fb27SDimitry Andric // We should not have visited this node yet. 246306c3fb27SDimitry Andric assert(Insert.second); 246406c3fb27SDimitry Andric // The recursive call to identifyClones may delete the current edge from the 246506c3fb27SDimitry Andric // CallerEdges vector. Make a copy and iterate on that, simpler than passing 246606c3fb27SDimitry Andric // in an iterator and having recursive call erase from it. Other edges may 246706c3fb27SDimitry Andric // also get removed during the recursion, which will have null Callee and 246806c3fb27SDimitry Andric // Caller pointers (and are deleted later), so we skip those below. 246906c3fb27SDimitry Andric { 247006c3fb27SDimitry Andric auto CallerEdges = Node->CallerEdges; 247106c3fb27SDimitry Andric for (auto &Edge : CallerEdges) { 247206c3fb27SDimitry Andric // Skip any that have been removed by an earlier recursive call. 247306c3fb27SDimitry Andric if (Edge->Callee == nullptr && Edge->Caller == nullptr) { 24745f757f3fSDimitry Andric assert(!llvm::count(Node->CallerEdges, Edge)); 247506c3fb27SDimitry Andric continue; 247606c3fb27SDimitry Andric } 247706c3fb27SDimitry Andric // Ignore any caller we previously visited via another edge. 247806c3fb27SDimitry Andric if (!Visited.count(Edge->Caller) && !Edge->Caller->CloneOf) { 247906c3fb27SDimitry Andric identifyClones(Edge->Caller, Visited); 248006c3fb27SDimitry Andric } 248106c3fb27SDimitry Andric } 248206c3fb27SDimitry Andric } 248306c3fb27SDimitry Andric 248406c3fb27SDimitry Andric // Check if we reached an unambiguous call or have have only a single caller. 248506c3fb27SDimitry Andric if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1) 248606c3fb27SDimitry Andric return; 248706c3fb27SDimitry Andric 248806c3fb27SDimitry Andric // We need to clone. 248906c3fb27SDimitry Andric 249006c3fb27SDimitry Andric // Try to keep the original version as alloc type NotCold. This will make 249106c3fb27SDimitry Andric // cases with indirect calls or any other situation with an unknown call to 249206c3fb27SDimitry Andric // the original function get the default behavior. We do this by sorting the 249306c3fb27SDimitry Andric // CallerEdges of the Node we will clone by alloc type. 249406c3fb27SDimitry Andric // 249506c3fb27SDimitry Andric // Give NotCold edge the lowest sort priority so those edges are at the end of 249606c3fb27SDimitry Andric // the caller edges vector, and stay on the original version (since the below 249706c3fb27SDimitry Andric // code clones greedily until it finds all remaining edges have the same type 249806c3fb27SDimitry Andric // and leaves the remaining ones on the original Node). 249906c3fb27SDimitry Andric // 250006c3fb27SDimitry Andric // We shouldn't actually have any None type edges, so the sorting priority for 250106c3fb27SDimitry Andric // that is arbitrary, and we assert in that case below. 250206c3fb27SDimitry Andric const unsigned AllocTypeCloningPriority[] = {/*None*/ 3, /*NotCold*/ 4, 250306c3fb27SDimitry Andric /*Cold*/ 1, 250406c3fb27SDimitry Andric /*NotColdCold*/ 2}; 250506c3fb27SDimitry Andric std::stable_sort(Node->CallerEdges.begin(), Node->CallerEdges.end(), 250606c3fb27SDimitry Andric [&](const std::shared_ptr<ContextEdge> &A, 250706c3fb27SDimitry Andric const std::shared_ptr<ContextEdge> &B) { 250806c3fb27SDimitry Andric assert(checkColdOrNotCold(A->AllocTypes) && 250906c3fb27SDimitry Andric checkColdOrNotCold(B->AllocTypes)); 251006c3fb27SDimitry Andric 251106c3fb27SDimitry Andric if (A->AllocTypes == B->AllocTypes) 251206c3fb27SDimitry Andric // Use the first context id for each edge as a 251306c3fb27SDimitry Andric // tie-breaker. 251406c3fb27SDimitry Andric return *A->ContextIds.begin() < *B->ContextIds.begin(); 251506c3fb27SDimitry Andric return AllocTypeCloningPriority[A->AllocTypes] < 251606c3fb27SDimitry Andric AllocTypeCloningPriority[B->AllocTypes]; 251706c3fb27SDimitry Andric }); 251806c3fb27SDimitry Andric 251906c3fb27SDimitry Andric assert(Node->AllocTypes != (uint8_t)AllocationType::None); 252006c3fb27SDimitry Andric 252106c3fb27SDimitry Andric // Iterate until we find no more opportunities for disambiguating the alloc 252206c3fb27SDimitry Andric // types via cloning. In most cases this loop will terminate once the Node 252306c3fb27SDimitry Andric // has a single allocation type, in which case no more cloning is needed. 252406c3fb27SDimitry Andric // We need to be able to remove Edge from CallerEdges, so need to adjust 252506c3fb27SDimitry Andric // iterator inside the loop. 252606c3fb27SDimitry Andric for (auto EI = Node->CallerEdges.begin(); EI != Node->CallerEdges.end();) { 252706c3fb27SDimitry Andric auto CallerEdge = *EI; 252806c3fb27SDimitry Andric 252906c3fb27SDimitry Andric // See if cloning the prior caller edge left this node with a single alloc 253006c3fb27SDimitry Andric // type or a single caller. In that case no more cloning of Node is needed. 253106c3fb27SDimitry Andric if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1) 253206c3fb27SDimitry Andric break; 253306c3fb27SDimitry Andric 253406c3fb27SDimitry Andric // Compute the node callee edge alloc types corresponding to the context ids 253506c3fb27SDimitry Andric // for this caller edge. 253606c3fb27SDimitry Andric std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge; 253706c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge.reserve(Node->CalleeEdges.size()); 253806c3fb27SDimitry Andric for (auto &CalleeEdge : Node->CalleeEdges) 253906c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes( 254006c3fb27SDimitry Andric CalleeEdge->getContextIds(), CallerEdge->getContextIds())); 254106c3fb27SDimitry Andric 254206c3fb27SDimitry Andric // Don't clone if doing so will not disambiguate any alloc types amongst 254306c3fb27SDimitry Andric // caller edges (including the callee edges that would be cloned). 254406c3fb27SDimitry Andric // Otherwise we will simply move all edges to the clone. 254506c3fb27SDimitry Andric // 254606c3fb27SDimitry Andric // First check if by cloning we will disambiguate the caller allocation 254706c3fb27SDimitry Andric // type from node's allocation type. Query allocTypeToUse so that we don't 254806c3fb27SDimitry Andric // bother cloning to distinguish NotCold+Cold from NotCold. Note that 254906c3fb27SDimitry Andric // neither of these should be None type. 255006c3fb27SDimitry Andric // 255106c3fb27SDimitry Andric // Then check if by cloning node at least one of the callee edges will be 255206c3fb27SDimitry Andric // disambiguated by splitting out different context ids. 255306c3fb27SDimitry Andric assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None); 255406c3fb27SDimitry Andric assert(Node->AllocTypes != (uint8_t)AllocationType::None); 255506c3fb27SDimitry Andric if (allocTypeToUse(CallerEdge->AllocTypes) == 255606c3fb27SDimitry Andric allocTypeToUse(Node->AllocTypes) && 255706c3fb27SDimitry Andric allocTypesMatch<DerivedCCG, FuncTy, CallTy>( 255806c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) { 255906c3fb27SDimitry Andric ++EI; 256006c3fb27SDimitry Andric continue; 256106c3fb27SDimitry Andric } 256206c3fb27SDimitry Andric 256306c3fb27SDimitry Andric // First see if we can use an existing clone. Check each clone and its 256406c3fb27SDimitry Andric // callee edges for matching alloc types. 256506c3fb27SDimitry Andric ContextNode *Clone = nullptr; 256606c3fb27SDimitry Andric for (auto *CurClone : Node->Clones) { 256706c3fb27SDimitry Andric if (allocTypeToUse(CurClone->AllocTypes) != 256806c3fb27SDimitry Andric allocTypeToUse(CallerEdge->AllocTypes)) 256906c3fb27SDimitry Andric continue; 257006c3fb27SDimitry Andric 257106c3fb27SDimitry Andric if (!allocTypesMatch<DerivedCCG, FuncTy, CallTy>( 257206c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge, CurClone->CalleeEdges)) 257306c3fb27SDimitry Andric continue; 257406c3fb27SDimitry Andric Clone = CurClone; 257506c3fb27SDimitry Andric break; 257606c3fb27SDimitry Andric } 257706c3fb27SDimitry Andric 257806c3fb27SDimitry Andric // The edge iterator is adjusted when we move the CallerEdge to the clone. 257906c3fb27SDimitry Andric if (Clone) 258006c3fb27SDimitry Andric moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI); 258106c3fb27SDimitry Andric else 258206c3fb27SDimitry Andric Clone = moveEdgeToNewCalleeClone(CallerEdge, &EI); 258306c3fb27SDimitry Andric 258406c3fb27SDimitry Andric assert(EI == Node->CallerEdges.end() || 258506c3fb27SDimitry Andric Node->AllocTypes != (uint8_t)AllocationType::None); 258606c3fb27SDimitry Andric // Sanity check that no alloc types on clone or its edges are None. 258706c3fb27SDimitry Andric assert(Clone->AllocTypes != (uint8_t)AllocationType::None); 258806c3fb27SDimitry Andric assert(llvm::none_of( 258906c3fb27SDimitry Andric Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) { 259006c3fb27SDimitry Andric return E->AllocTypes == (uint8_t)AllocationType::None; 259106c3fb27SDimitry Andric })); 259206c3fb27SDimitry Andric } 259306c3fb27SDimitry Andric 259406c3fb27SDimitry Andric // Cloning may have resulted in some cloned callee edges with type None, 259506c3fb27SDimitry Andric // because they aren't carrying any contexts. Remove those edges. 259606c3fb27SDimitry Andric for (auto *Clone : Node->Clones) { 259706c3fb27SDimitry Andric removeNoneTypeCalleeEdges(Clone); 259806c3fb27SDimitry Andric if (VerifyNodes) 259906c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Clone); 260006c3fb27SDimitry Andric } 260106c3fb27SDimitry Andric // We should still have some context ids on the original Node. 260206c3fb27SDimitry Andric assert(!Node->ContextIds.empty()); 260306c3fb27SDimitry Andric 260406c3fb27SDimitry Andric // Remove any callee edges that ended up with alloc type None after creating 260506c3fb27SDimitry Andric // clones and updating callee edges. 260606c3fb27SDimitry Andric removeNoneTypeCalleeEdges(Node); 260706c3fb27SDimitry Andric 260806c3fb27SDimitry Andric // Sanity check that no alloc types on node or edges are None. 260906c3fb27SDimitry Andric assert(Node->AllocTypes != (uint8_t)AllocationType::None); 261006c3fb27SDimitry Andric assert(llvm::none_of(Node->CalleeEdges, 261106c3fb27SDimitry Andric [&](const std::shared_ptr<ContextEdge> &E) { 261206c3fb27SDimitry Andric return E->AllocTypes == (uint8_t)AllocationType::None; 261306c3fb27SDimitry Andric })); 261406c3fb27SDimitry Andric assert(llvm::none_of(Node->CallerEdges, 261506c3fb27SDimitry Andric [&](const std::shared_ptr<ContextEdge> &E) { 261606c3fb27SDimitry Andric return E->AllocTypes == (uint8_t)AllocationType::None; 261706c3fb27SDimitry Andric })); 261806c3fb27SDimitry Andric 261906c3fb27SDimitry Andric if (VerifyNodes) 262006c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node); 262106c3fb27SDimitry Andric } 262206c3fb27SDimitry Andric 262306c3fb27SDimitry Andric void ModuleCallsiteContextGraph::updateAllocationCall( 262406c3fb27SDimitry Andric CallInfo &Call, AllocationType AllocType) { 262506c3fb27SDimitry Andric std::string AllocTypeString = getAllocTypeAttributeString(AllocType); 262606c3fb27SDimitry Andric auto A = llvm::Attribute::get(Call.call()->getFunction()->getContext(), 262706c3fb27SDimitry Andric "memprof", AllocTypeString); 262806c3fb27SDimitry Andric cast<CallBase>(Call.call())->addFnAttr(A); 262906c3fb27SDimitry Andric OREGetter(Call.call()->getFunction()) 263006c3fb27SDimitry Andric .emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", Call.call()) 263106c3fb27SDimitry Andric << ore::NV("AllocationCall", Call.call()) << " in clone " 263206c3fb27SDimitry Andric << ore::NV("Caller", Call.call()->getFunction()) 263306c3fb27SDimitry Andric << " marked with memprof allocation attribute " 263406c3fb27SDimitry Andric << ore::NV("Attribute", AllocTypeString)); 263506c3fb27SDimitry Andric } 263606c3fb27SDimitry Andric 263706c3fb27SDimitry Andric void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &Call, 263806c3fb27SDimitry Andric AllocationType AllocType) { 263906c3fb27SDimitry Andric auto *AI = Call.call().dyn_cast<AllocInfo *>(); 264006c3fb27SDimitry Andric assert(AI); 264106c3fb27SDimitry Andric assert(AI->Versions.size() > Call.cloneNo()); 264206c3fb27SDimitry Andric AI->Versions[Call.cloneNo()] = (uint8_t)AllocType; 264306c3fb27SDimitry Andric } 264406c3fb27SDimitry Andric 264506c3fb27SDimitry Andric void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall, 264606c3fb27SDimitry Andric FuncInfo CalleeFunc) { 264706c3fb27SDimitry Andric if (CalleeFunc.cloneNo() > 0) 264806c3fb27SDimitry Andric cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func()); 264906c3fb27SDimitry Andric OREGetter(CallerCall.call()->getFunction()) 265006c3fb27SDimitry Andric .emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call()) 265106c3fb27SDimitry Andric << ore::NV("Call", CallerCall.call()) << " in clone " 265206c3fb27SDimitry Andric << ore::NV("Caller", CallerCall.call()->getFunction()) 265306c3fb27SDimitry Andric << " assigned to call function clone " 265406c3fb27SDimitry Andric << ore::NV("Callee", CalleeFunc.func())); 265506c3fb27SDimitry Andric } 265606c3fb27SDimitry Andric 265706c3fb27SDimitry Andric void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall, 265806c3fb27SDimitry Andric FuncInfo CalleeFunc) { 265906c3fb27SDimitry Andric auto *CI = CallerCall.call().dyn_cast<CallsiteInfo *>(); 266006c3fb27SDimitry Andric assert(CI && 266106c3fb27SDimitry Andric "Caller cannot be an allocation which should not have profiled calls"); 266206c3fb27SDimitry Andric assert(CI->Clones.size() > CallerCall.cloneNo()); 266306c3fb27SDimitry Andric CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo(); 266406c3fb27SDimitry Andric } 266506c3fb27SDimitry Andric 266606c3fb27SDimitry Andric CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 266706c3fb27SDimitry Andric Instruction *>::FuncInfo 266806c3fb27SDimitry Andric ModuleCallsiteContextGraph::cloneFunctionForCallsite( 266906c3fb27SDimitry Andric FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap, 267006c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) { 267106c3fb27SDimitry Andric // Use existing LLVM facilities for cloning and obtaining Call in clone 267206c3fb27SDimitry Andric ValueToValueMapTy VMap; 267306c3fb27SDimitry Andric auto *NewFunc = CloneFunction(Func.func(), VMap); 267406c3fb27SDimitry Andric std::string Name = getMemProfFuncName(Func.func()->getName(), CloneNo); 267506c3fb27SDimitry Andric assert(!Func.func()->getParent()->getFunction(Name)); 267606c3fb27SDimitry Andric NewFunc->setName(Name); 267706c3fb27SDimitry Andric for (auto &Inst : CallsWithMetadataInFunc) { 267806c3fb27SDimitry Andric // This map always has the initial version in it. 267906c3fb27SDimitry Andric assert(Inst.cloneNo() == 0); 268006c3fb27SDimitry Andric CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo}; 268106c3fb27SDimitry Andric } 268206c3fb27SDimitry Andric OREGetter(Func.func()) 268306c3fb27SDimitry Andric .emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", Func.func()) 268406c3fb27SDimitry Andric << "created clone " << ore::NV("NewFunction", NewFunc)); 268506c3fb27SDimitry Andric return {NewFunc, CloneNo}; 268606c3fb27SDimitry Andric } 268706c3fb27SDimitry Andric 268806c3fb27SDimitry Andric CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 268906c3fb27SDimitry Andric IndexCall>::FuncInfo 269006c3fb27SDimitry Andric IndexCallsiteContextGraph::cloneFunctionForCallsite( 269106c3fb27SDimitry Andric FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap, 269206c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) { 269306c3fb27SDimitry Andric // Check how many clones we have of Call (and therefore function). 269406c3fb27SDimitry Andric // The next clone number is the current size of versions array. 269506c3fb27SDimitry Andric // Confirm this matches the CloneNo provided by the caller, which is based on 269606c3fb27SDimitry Andric // the number of function clones we have. 269706c3fb27SDimitry Andric assert(CloneNo == 269806c3fb27SDimitry Andric (Call.call().is<AllocInfo *>() 269906c3fb27SDimitry Andric ? Call.call().dyn_cast<AllocInfo *>()->Versions.size() 270006c3fb27SDimitry Andric : Call.call().dyn_cast<CallsiteInfo *>()->Clones.size())); 270106c3fb27SDimitry Andric // Walk all the instructions in this function. Create a new version for 270206c3fb27SDimitry Andric // each (by adding an entry to the Versions/Clones summary array), and copy 270306c3fb27SDimitry Andric // over the version being called for the function clone being cloned here. 270406c3fb27SDimitry Andric // Additionally, add an entry to the CallMap for the new function clone, 270506c3fb27SDimitry Andric // mapping the original call (clone 0, what is in CallsWithMetadataInFunc) 270606c3fb27SDimitry Andric // to the new call clone. 270706c3fb27SDimitry Andric for (auto &Inst : CallsWithMetadataInFunc) { 270806c3fb27SDimitry Andric // This map always has the initial version in it. 270906c3fb27SDimitry Andric assert(Inst.cloneNo() == 0); 271006c3fb27SDimitry Andric if (auto *AI = Inst.call().dyn_cast<AllocInfo *>()) { 271106c3fb27SDimitry Andric assert(AI->Versions.size() == CloneNo); 271206c3fb27SDimitry Andric // We assign the allocation type later (in updateAllocationCall), just add 271306c3fb27SDimitry Andric // an entry for it here. 271406c3fb27SDimitry Andric AI->Versions.push_back(0); 271506c3fb27SDimitry Andric } else { 271606c3fb27SDimitry Andric auto *CI = Inst.call().dyn_cast<CallsiteInfo *>(); 271706c3fb27SDimitry Andric assert(CI && CI->Clones.size() == CloneNo); 271806c3fb27SDimitry Andric // We assign the clone number later (in updateCall), just add an entry for 271906c3fb27SDimitry Andric // it here. 272006c3fb27SDimitry Andric CI->Clones.push_back(0); 272106c3fb27SDimitry Andric } 272206c3fb27SDimitry Andric CallMap[Inst] = {Inst.call(), CloneNo}; 272306c3fb27SDimitry Andric } 272406c3fb27SDimitry Andric return {Func.func(), CloneNo}; 272506c3fb27SDimitry Andric } 272606c3fb27SDimitry Andric 272706c3fb27SDimitry Andric // This method assigns cloned callsites to functions, cloning the functions as 272806c3fb27SDimitry Andric // needed. The assignment is greedy and proceeds roughly as follows: 272906c3fb27SDimitry Andric // 273006c3fb27SDimitry Andric // For each function Func: 273106c3fb27SDimitry Andric // For each call with graph Node having clones: 273206c3fb27SDimitry Andric // Initialize ClonesWorklist to Node and its clones 273306c3fb27SDimitry Andric // Initialize NodeCloneCount to 0 273406c3fb27SDimitry Andric // While ClonesWorklist is not empty: 273506c3fb27SDimitry Andric // Clone = pop front ClonesWorklist 273606c3fb27SDimitry Andric // NodeCloneCount++ 273706c3fb27SDimitry Andric // If Func has been cloned less than NodeCloneCount times: 273806c3fb27SDimitry Andric // If NodeCloneCount is 1: 273906c3fb27SDimitry Andric // Assign Clone to original Func 274006c3fb27SDimitry Andric // Continue 274106c3fb27SDimitry Andric // Create a new function clone 274206c3fb27SDimitry Andric // If other callers not assigned to call a function clone yet: 274306c3fb27SDimitry Andric // Assign them to call new function clone 274406c3fb27SDimitry Andric // Continue 274506c3fb27SDimitry Andric // Assign any other caller calling the cloned version to new clone 274606c3fb27SDimitry Andric // 274706c3fb27SDimitry Andric // For each caller of Clone: 274806c3fb27SDimitry Andric // If caller is assigned to call a specific function clone: 274906c3fb27SDimitry Andric // If we cannot assign Clone to that function clone: 275006c3fb27SDimitry Andric // Create new callsite Clone NewClone 275106c3fb27SDimitry Andric // Add NewClone to ClonesWorklist 275206c3fb27SDimitry Andric // Continue 275306c3fb27SDimitry Andric // Assign Clone to existing caller's called function clone 275406c3fb27SDimitry Andric // Else: 275506c3fb27SDimitry Andric // If Clone not already assigned to a function clone: 275606c3fb27SDimitry Andric // Assign to first function clone without assignment 275706c3fb27SDimitry Andric // Assign caller to selected function clone 275806c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 275906c3fb27SDimitry Andric bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { 276006c3fb27SDimitry Andric bool Changed = false; 276106c3fb27SDimitry Andric 276206c3fb27SDimitry Andric // Keep track of the assignment of nodes (callsites) to function clones they 276306c3fb27SDimitry Andric // call. 276406c3fb27SDimitry Andric DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap; 276506c3fb27SDimitry Andric 276606c3fb27SDimitry Andric // Update caller node to call function version CalleeFunc, by recording the 276706c3fb27SDimitry Andric // assignment in CallsiteToCalleeFuncCloneMap. 276806c3fb27SDimitry Andric auto RecordCalleeFuncOfCallsite = [&](ContextNode *Caller, 276906c3fb27SDimitry Andric const FuncInfo &CalleeFunc) { 277006c3fb27SDimitry Andric assert(Caller->hasCall()); 277106c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[Caller] = CalleeFunc; 277206c3fb27SDimitry Andric }; 277306c3fb27SDimitry Andric 277406c3fb27SDimitry Andric // Walk all functions for which we saw calls with memprof metadata, and handle 277506c3fb27SDimitry Andric // cloning for each of its calls. 277606c3fb27SDimitry Andric for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) { 277706c3fb27SDimitry Andric FuncInfo OrigFunc(Func); 277806c3fb27SDimitry Andric // Map from each clone of OrigFunc to a map of remappings of each call of 277906c3fb27SDimitry Andric // interest (from original uncloned call to the corresponding cloned call in 278006c3fb27SDimitry Andric // that function clone). 278106c3fb27SDimitry Andric std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap; 278206c3fb27SDimitry Andric for (auto &Call : CallsWithMetadata) { 278306c3fb27SDimitry Andric ContextNode *Node = getNodeForInst(Call); 278406c3fb27SDimitry Andric // Skip call if we do not have a node for it (all uses of its stack ids 278506c3fb27SDimitry Andric // were either on inlined chains or pruned from the MIBs), or if we did 278606c3fb27SDimitry Andric // not create any clones for it. 278706c3fb27SDimitry Andric if (!Node || Node->Clones.empty()) 278806c3fb27SDimitry Andric continue; 278906c3fb27SDimitry Andric assert(Node->hasCall() && 279006c3fb27SDimitry Andric "Not having a call should have prevented cloning"); 279106c3fb27SDimitry Andric 279206c3fb27SDimitry Andric // Track the assignment of function clones to clones of the current 279306c3fb27SDimitry Andric // callsite Node being handled. 279406c3fb27SDimitry Andric std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap; 279506c3fb27SDimitry Andric 279606c3fb27SDimitry Andric // Assign callsite version CallsiteClone to function version FuncClone, 279706c3fb27SDimitry Andric // and also assign (possibly cloned) Call to CallsiteClone. 279806c3fb27SDimitry Andric auto AssignCallsiteCloneToFuncClone = [&](const FuncInfo &FuncClone, 279906c3fb27SDimitry Andric CallInfo &Call, 280006c3fb27SDimitry Andric ContextNode *CallsiteClone, 280106c3fb27SDimitry Andric bool IsAlloc) { 280206c3fb27SDimitry Andric // Record the clone of callsite node assigned to this function clone. 280306c3fb27SDimitry Andric FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone; 280406c3fb27SDimitry Andric 280506c3fb27SDimitry Andric assert(FuncClonesToCallMap.count(FuncClone)); 280606c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone]; 280706c3fb27SDimitry Andric CallInfo CallClone(Call); 280806c3fb27SDimitry Andric if (CallMap.count(Call)) 280906c3fb27SDimitry Andric CallClone = CallMap[Call]; 281006c3fb27SDimitry Andric CallsiteClone->setCall(CallClone); 281106c3fb27SDimitry Andric }; 281206c3fb27SDimitry Andric 281306c3fb27SDimitry Andric // Keep track of the clones of callsite Node that need to be assigned to 281406c3fb27SDimitry Andric // function clones. This list may be expanded in the loop body below if we 281506c3fb27SDimitry Andric // find additional cloning is required. 281606c3fb27SDimitry Andric std::deque<ContextNode *> ClonesWorklist; 281706c3fb27SDimitry Andric // Ignore original Node if we moved all of its contexts to clones. 281806c3fb27SDimitry Andric if (!Node->ContextIds.empty()) 281906c3fb27SDimitry Andric ClonesWorklist.push_back(Node); 282006c3fb27SDimitry Andric ClonesWorklist.insert(ClonesWorklist.end(), Node->Clones.begin(), 282106c3fb27SDimitry Andric Node->Clones.end()); 282206c3fb27SDimitry Andric 282306c3fb27SDimitry Andric // Now walk through all of the clones of this callsite Node that we need, 282406c3fb27SDimitry Andric // and determine the assignment to a corresponding clone of the current 282506c3fb27SDimitry Andric // function (creating new function clones as needed). 282606c3fb27SDimitry Andric unsigned NodeCloneCount = 0; 282706c3fb27SDimitry Andric while (!ClonesWorklist.empty()) { 282806c3fb27SDimitry Andric ContextNode *Clone = ClonesWorklist.front(); 282906c3fb27SDimitry Andric ClonesWorklist.pop_front(); 283006c3fb27SDimitry Andric NodeCloneCount++; 283106c3fb27SDimitry Andric if (VerifyNodes) 283206c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Clone); 283306c3fb27SDimitry Andric 283406c3fb27SDimitry Andric // Need to create a new function clone if we have more callsite clones 283506c3fb27SDimitry Andric // than existing function clones, which would have been assigned to an 283606c3fb27SDimitry Andric // earlier clone in the list (we assign callsite clones to function 283706c3fb27SDimitry Andric // clones greedily). 283806c3fb27SDimitry Andric if (FuncClonesToCallMap.size() < NodeCloneCount) { 283906c3fb27SDimitry Andric // If this is the first callsite copy, assign to original function. 284006c3fb27SDimitry Andric if (NodeCloneCount == 1) { 284106c3fb27SDimitry Andric // Since FuncClonesToCallMap is empty in this case, no clones have 284206c3fb27SDimitry Andric // been created for this function yet, and no callers should have 284306c3fb27SDimitry Andric // been assigned a function clone for this callee node yet. 284406c3fb27SDimitry Andric assert(llvm::none_of( 284506c3fb27SDimitry Andric Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) { 284606c3fb27SDimitry Andric return CallsiteToCalleeFuncCloneMap.count(E->Caller); 284706c3fb27SDimitry Andric })); 284806c3fb27SDimitry Andric // Initialize with empty call map, assign Clone to original function 284906c3fb27SDimitry Andric // and its callers, and skip to the next clone. 285006c3fb27SDimitry Andric FuncClonesToCallMap[OrigFunc] = {}; 285106c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 285206c3fb27SDimitry Andric OrigFunc, Call, Clone, 285306c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 285406c3fb27SDimitry Andric for (auto &CE : Clone->CallerEdges) { 285506c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 285606c3fb27SDimitry Andric if (!CE->Caller->hasCall()) 285706c3fb27SDimitry Andric continue; 285806c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(CE->Caller, OrigFunc); 285906c3fb27SDimitry Andric } 286006c3fb27SDimitry Andric continue; 286106c3fb27SDimitry Andric } 286206c3fb27SDimitry Andric 286306c3fb27SDimitry Andric // First locate which copy of OrigFunc to clone again. If a caller 286406c3fb27SDimitry Andric // of this callsite clone was already assigned to call a particular 286506c3fb27SDimitry Andric // function clone, we need to redirect all of those callers to the 286606c3fb27SDimitry Andric // new function clone, and update their other callees within this 286706c3fb27SDimitry Andric // function. 286806c3fb27SDimitry Andric FuncInfo PreviousAssignedFuncClone; 286906c3fb27SDimitry Andric auto EI = llvm::find_if( 287006c3fb27SDimitry Andric Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) { 287106c3fb27SDimitry Andric return CallsiteToCalleeFuncCloneMap.count(E->Caller); 287206c3fb27SDimitry Andric }); 287306c3fb27SDimitry Andric bool CallerAssignedToCloneOfFunc = false; 287406c3fb27SDimitry Andric if (EI != Clone->CallerEdges.end()) { 287506c3fb27SDimitry Andric const std::shared_ptr<ContextEdge> &Edge = *EI; 287606c3fb27SDimitry Andric PreviousAssignedFuncClone = 287706c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[Edge->Caller]; 287806c3fb27SDimitry Andric CallerAssignedToCloneOfFunc = true; 287906c3fb27SDimitry Andric } 288006c3fb27SDimitry Andric 288106c3fb27SDimitry Andric // Clone function and save it along with the CallInfo map created 288206c3fb27SDimitry Andric // during cloning in the FuncClonesToCallMap. 288306c3fb27SDimitry Andric std::map<CallInfo, CallInfo> NewCallMap; 288406c3fb27SDimitry Andric unsigned CloneNo = FuncClonesToCallMap.size(); 288506c3fb27SDimitry Andric assert(CloneNo > 0 && "Clone 0 is the original function, which " 288606c3fb27SDimitry Andric "should already exist in the map"); 288706c3fb27SDimitry Andric FuncInfo NewFuncClone = cloneFunctionForCallsite( 288806c3fb27SDimitry Andric OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo); 288906c3fb27SDimitry Andric FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap)); 289006c3fb27SDimitry Andric FunctionClonesAnalysis++; 289106c3fb27SDimitry Andric Changed = true; 289206c3fb27SDimitry Andric 289306c3fb27SDimitry Andric // If no caller callsites were already assigned to a clone of this 289406c3fb27SDimitry Andric // function, we can simply assign this clone to the new func clone 289506c3fb27SDimitry Andric // and update all callers to it, then skip to the next clone. 289606c3fb27SDimitry Andric if (!CallerAssignedToCloneOfFunc) { 289706c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 289806c3fb27SDimitry Andric NewFuncClone, Call, Clone, 289906c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 290006c3fb27SDimitry Andric for (auto &CE : Clone->CallerEdges) { 290106c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 290206c3fb27SDimitry Andric if (!CE->Caller->hasCall()) 290306c3fb27SDimitry Andric continue; 290406c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone); 290506c3fb27SDimitry Andric } 290606c3fb27SDimitry Andric continue; 290706c3fb27SDimitry Andric } 290806c3fb27SDimitry Andric 290906c3fb27SDimitry Andric // We may need to do additional node cloning in this case. 291006c3fb27SDimitry Andric // Reset the CallsiteToCalleeFuncCloneMap entry for any callers 291106c3fb27SDimitry Andric // that were previously assigned to call PreviousAssignedFuncClone, 291206c3fb27SDimitry Andric // to record that they now call NewFuncClone. 291306c3fb27SDimitry Andric for (auto CE : Clone->CallerEdges) { 2914*297eecfbSDimitry Andric // Skip any that have been removed on an earlier iteration. 2915*297eecfbSDimitry Andric if (!CE) 2916*297eecfbSDimitry Andric continue; 291706c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 291806c3fb27SDimitry Andric if (!CE->Caller->hasCall()) 291906c3fb27SDimitry Andric continue; 292006c3fb27SDimitry Andric 292106c3fb27SDimitry Andric if (!CallsiteToCalleeFuncCloneMap.count(CE->Caller) || 292206c3fb27SDimitry Andric // We subsequently fall through to later handling that 292306c3fb27SDimitry Andric // will perform any additional cloning required for 292406c3fb27SDimitry Andric // callers that were calling other function clones. 292506c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[CE->Caller] != 292606c3fb27SDimitry Andric PreviousAssignedFuncClone) 292706c3fb27SDimitry Andric continue; 292806c3fb27SDimitry Andric 292906c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone); 293006c3fb27SDimitry Andric 293106c3fb27SDimitry Andric // If we are cloning a function that was already assigned to some 293206c3fb27SDimitry Andric // callers, then essentially we are creating new callsite clones 293306c3fb27SDimitry Andric // of the other callsites in that function that are reached by those 293406c3fb27SDimitry Andric // callers. Clone the other callees of the current callsite's caller 293506c3fb27SDimitry Andric // that were already assigned to PreviousAssignedFuncClone 293606c3fb27SDimitry Andric // accordingly. This is important since we subsequently update the 293706c3fb27SDimitry Andric // calls from the nodes in the graph and their assignments to callee 293806c3fb27SDimitry Andric // functions recorded in CallsiteToCalleeFuncCloneMap. 293906c3fb27SDimitry Andric for (auto CalleeEdge : CE->Caller->CalleeEdges) { 294006c3fb27SDimitry Andric // Skip any that have been removed on an earlier iteration when 294106c3fb27SDimitry Andric // cleaning up newly None type callee edges. 294206c3fb27SDimitry Andric if (!CalleeEdge) 294306c3fb27SDimitry Andric continue; 294406c3fb27SDimitry Andric ContextNode *Callee = CalleeEdge->Callee; 294506c3fb27SDimitry Andric // Skip the current callsite, we are looking for other 294606c3fb27SDimitry Andric // callsites Caller calls, as well as any that does not have a 294706c3fb27SDimitry Andric // recorded callsite Call. 294806c3fb27SDimitry Andric if (Callee == Clone || !Callee->hasCall()) 294906c3fb27SDimitry Andric continue; 295006c3fb27SDimitry Andric ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge); 295106c3fb27SDimitry Andric removeNoneTypeCalleeEdges(NewClone); 295206c3fb27SDimitry Andric // Moving the edge may have resulted in some none type 295306c3fb27SDimitry Andric // callee edges on the original Callee. 295406c3fb27SDimitry Andric removeNoneTypeCalleeEdges(Callee); 295506c3fb27SDimitry Andric assert(NewClone->AllocTypes != (uint8_t)AllocationType::None); 295606c3fb27SDimitry Andric // If the Callee node was already assigned to call a specific 295706c3fb27SDimitry Andric // function version, make sure its new clone is assigned to call 295806c3fb27SDimitry Andric // that same function clone. 295906c3fb27SDimitry Andric if (CallsiteToCalleeFuncCloneMap.count(Callee)) 296006c3fb27SDimitry Andric RecordCalleeFuncOfCallsite( 296106c3fb27SDimitry Andric NewClone, CallsiteToCalleeFuncCloneMap[Callee]); 296206c3fb27SDimitry Andric // Update NewClone with the new Call clone of this callsite's Call 296306c3fb27SDimitry Andric // created for the new function clone created earlier. 296406c3fb27SDimitry Andric // Recall that we have already ensured when building the graph 296506c3fb27SDimitry Andric // that each caller can only call callsites within the same 296606c3fb27SDimitry Andric // function, so we are guaranteed that Callee Call is in the 296706c3fb27SDimitry Andric // current OrigFunc. 296806c3fb27SDimitry Andric // CallMap is set up as indexed by original Call at clone 0. 296906c3fb27SDimitry Andric CallInfo OrigCall(Callee->getOrigNode()->Call); 297006c3fb27SDimitry Andric OrigCall.setCloneNo(0); 297106c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap = 297206c3fb27SDimitry Andric FuncClonesToCallMap[NewFuncClone]; 297306c3fb27SDimitry Andric assert(CallMap.count(OrigCall)); 297406c3fb27SDimitry Andric CallInfo NewCall(CallMap[OrigCall]); 297506c3fb27SDimitry Andric assert(NewCall); 297606c3fb27SDimitry Andric NewClone->setCall(NewCall); 297706c3fb27SDimitry Andric } 297806c3fb27SDimitry Andric } 297906c3fb27SDimitry Andric // Fall through to handling below to perform the recording of the 298006c3fb27SDimitry Andric // function for this callsite clone. This enables handling of cases 298106c3fb27SDimitry Andric // where the callers were assigned to different clones of a function. 298206c3fb27SDimitry Andric } 298306c3fb27SDimitry Andric 298406c3fb27SDimitry Andric // See if we can use existing function clone. Walk through 298506c3fb27SDimitry Andric // all caller edges to see if any have already been assigned to 298606c3fb27SDimitry Andric // a clone of this callsite's function. If we can use it, do so. If not, 298706c3fb27SDimitry Andric // because that function clone is already assigned to a different clone 298806c3fb27SDimitry Andric // of this callsite, then we need to clone again. 298906c3fb27SDimitry Andric // Basically, this checking is needed to handle the case where different 299006c3fb27SDimitry Andric // caller functions/callsites may need versions of this function 299106c3fb27SDimitry Andric // containing different mixes of callsite clones across the different 299206c3fb27SDimitry Andric // callsites within the function. If that happens, we need to create 299306c3fb27SDimitry Andric // additional function clones to handle the various combinations. 299406c3fb27SDimitry Andric // 299506c3fb27SDimitry Andric // Keep track of any new clones of this callsite created by the 299606c3fb27SDimitry Andric // following loop, as well as any existing clone that we decided to 299706c3fb27SDimitry Andric // assign this clone to. 299806c3fb27SDimitry Andric std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap; 299906c3fb27SDimitry Andric FuncInfo FuncCloneAssignedToCurCallsiteClone; 300006c3fb27SDimitry Andric // We need to be able to remove Edge from CallerEdges, so need to adjust 300106c3fb27SDimitry Andric // iterator in the loop. 300206c3fb27SDimitry Andric for (auto EI = Clone->CallerEdges.begin(); 300306c3fb27SDimitry Andric EI != Clone->CallerEdges.end();) { 300406c3fb27SDimitry Andric auto Edge = *EI; 300506c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 300606c3fb27SDimitry Andric if (!Edge->Caller->hasCall()) { 300706c3fb27SDimitry Andric EI++; 300806c3fb27SDimitry Andric continue; 300906c3fb27SDimitry Andric } 301006c3fb27SDimitry Andric // If this caller already assigned to call a version of OrigFunc, need 301106c3fb27SDimitry Andric // to ensure we can assign this callsite clone to that function clone. 301206c3fb27SDimitry Andric if (CallsiteToCalleeFuncCloneMap.count(Edge->Caller)) { 301306c3fb27SDimitry Andric FuncInfo FuncCloneCalledByCaller = 301406c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[Edge->Caller]; 301506c3fb27SDimitry Andric // First we need to confirm that this function clone is available 301606c3fb27SDimitry Andric // for use by this callsite node clone. 301706c3fb27SDimitry Andric // 301806c3fb27SDimitry Andric // While FuncCloneToCurNodeCloneMap is built only for this Node and 301906c3fb27SDimitry Andric // its callsite clones, one of those callsite clones X could have 302006c3fb27SDimitry Andric // been assigned to the same function clone called by Edge's caller 302106c3fb27SDimitry Andric // - if Edge's caller calls another callsite within Node's original 302206c3fb27SDimitry Andric // function, and that callsite has another caller reaching clone X. 302306c3fb27SDimitry Andric // We need to clone Node again in this case. 302406c3fb27SDimitry Andric if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) && 302506c3fb27SDimitry Andric FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] != 302606c3fb27SDimitry Andric Clone) || 302706c3fb27SDimitry Andric // Detect when we have multiple callers of this callsite that 302806c3fb27SDimitry Andric // have already been assigned to specific, and different, clones 302906c3fb27SDimitry Andric // of OrigFunc (due to other unrelated callsites in Func they 303006c3fb27SDimitry Andric // reach via call contexts). Is this Clone of callsite Node 303106c3fb27SDimitry Andric // assigned to a different clone of OrigFunc? If so, clone Node 303206c3fb27SDimitry Andric // again. 303306c3fb27SDimitry Andric (FuncCloneAssignedToCurCallsiteClone && 303406c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone != 303506c3fb27SDimitry Andric FuncCloneCalledByCaller)) { 303606c3fb27SDimitry Andric // We need to use a different newly created callsite clone, in 303706c3fb27SDimitry Andric // order to assign it to another new function clone on a 303806c3fb27SDimitry Andric // subsequent iteration over the Clones array (adjusted below). 303906c3fb27SDimitry Andric // Note we specifically do not reset the 304006c3fb27SDimitry Andric // CallsiteToCalleeFuncCloneMap entry for this caller, so that 304106c3fb27SDimitry Andric // when this new clone is processed later we know which version of 304206c3fb27SDimitry Andric // the function to copy (so that other callsite clones we have 304306c3fb27SDimitry Andric // assigned to that function clone are properly cloned over). See 304406c3fb27SDimitry Andric // comments in the function cloning handling earlier. 304506c3fb27SDimitry Andric 304606c3fb27SDimitry Andric // Check if we already have cloned this callsite again while 304706c3fb27SDimitry Andric // walking through caller edges, for a caller calling the same 304806c3fb27SDimitry Andric // function clone. If so, we can move this edge to that new clone 304906c3fb27SDimitry Andric // rather than creating yet another new clone. 305006c3fb27SDimitry Andric if (FuncCloneToNewCallsiteCloneMap.count( 305106c3fb27SDimitry Andric FuncCloneCalledByCaller)) { 305206c3fb27SDimitry Andric ContextNode *NewClone = 305306c3fb27SDimitry Andric FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller]; 305406c3fb27SDimitry Andric moveEdgeToExistingCalleeClone(Edge, NewClone, &EI); 305506c3fb27SDimitry Andric // Cleanup any none type edges cloned over. 305606c3fb27SDimitry Andric removeNoneTypeCalleeEdges(NewClone); 305706c3fb27SDimitry Andric } else { 305806c3fb27SDimitry Andric // Create a new callsite clone. 305906c3fb27SDimitry Andric ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge, &EI); 306006c3fb27SDimitry Andric removeNoneTypeCalleeEdges(NewClone); 306106c3fb27SDimitry Andric FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] = 306206c3fb27SDimitry Andric NewClone; 306306c3fb27SDimitry Andric // Add to list of clones and process later. 306406c3fb27SDimitry Andric ClonesWorklist.push_back(NewClone); 306506c3fb27SDimitry Andric assert(EI == Clone->CallerEdges.end() || 306606c3fb27SDimitry Andric Clone->AllocTypes != (uint8_t)AllocationType::None); 306706c3fb27SDimitry Andric assert(NewClone->AllocTypes != (uint8_t)AllocationType::None); 306806c3fb27SDimitry Andric } 306906c3fb27SDimitry Andric // Moving the caller edge may have resulted in some none type 307006c3fb27SDimitry Andric // callee edges. 307106c3fb27SDimitry Andric removeNoneTypeCalleeEdges(Clone); 307206c3fb27SDimitry Andric // We will handle the newly created callsite clone in a subsequent 307306c3fb27SDimitry Andric // iteration over this Node's Clones. Continue here since we 307406c3fb27SDimitry Andric // already adjusted iterator EI while moving the edge. 307506c3fb27SDimitry Andric continue; 307606c3fb27SDimitry Andric } 307706c3fb27SDimitry Andric 307806c3fb27SDimitry Andric // Otherwise, we can use the function clone already assigned to this 307906c3fb27SDimitry Andric // caller. 308006c3fb27SDimitry Andric if (!FuncCloneAssignedToCurCallsiteClone) { 308106c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller; 308206c3fb27SDimitry Andric // Assign Clone to FuncCloneCalledByCaller 308306c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 308406c3fb27SDimitry Andric FuncCloneCalledByCaller, Call, Clone, 308506c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 308606c3fb27SDimitry Andric } else 308706c3fb27SDimitry Andric // Don't need to do anything - callsite is already calling this 308806c3fb27SDimitry Andric // function clone. 308906c3fb27SDimitry Andric assert(FuncCloneAssignedToCurCallsiteClone == 309006c3fb27SDimitry Andric FuncCloneCalledByCaller); 309106c3fb27SDimitry Andric 309206c3fb27SDimitry Andric } else { 309306c3fb27SDimitry Andric // We have not already assigned this caller to a version of 309406c3fb27SDimitry Andric // OrigFunc. Do the assignment now. 309506c3fb27SDimitry Andric 309606c3fb27SDimitry Andric // First check if we have already assigned this callsite clone to a 309706c3fb27SDimitry Andric // clone of OrigFunc for another caller during this iteration over 309806c3fb27SDimitry Andric // its caller edges. 309906c3fb27SDimitry Andric if (!FuncCloneAssignedToCurCallsiteClone) { 310006c3fb27SDimitry Andric // Find first function in FuncClonesToCallMap without an assigned 310106c3fb27SDimitry Andric // clone of this callsite Node. We should always have one 310206c3fb27SDimitry Andric // available at this point due to the earlier cloning when the 310306c3fb27SDimitry Andric // FuncClonesToCallMap size was smaller than the clone number. 310406c3fb27SDimitry Andric for (auto &CF : FuncClonesToCallMap) { 310506c3fb27SDimitry Andric if (!FuncCloneToCurNodeCloneMap.count(CF.first)) { 310606c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone = CF.first; 310706c3fb27SDimitry Andric break; 310806c3fb27SDimitry Andric } 310906c3fb27SDimitry Andric } 311006c3fb27SDimitry Andric assert(FuncCloneAssignedToCurCallsiteClone); 311106c3fb27SDimitry Andric // Assign Clone to FuncCloneAssignedToCurCallsiteClone 311206c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 311306c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone, Call, Clone, 311406c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 311506c3fb27SDimitry Andric } else 311606c3fb27SDimitry Andric assert(FuncCloneToCurNodeCloneMap 311706c3fb27SDimitry Andric [FuncCloneAssignedToCurCallsiteClone] == Clone); 311806c3fb27SDimitry Andric // Update callers to record function version called. 311906c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(Edge->Caller, 312006c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone); 312106c3fb27SDimitry Andric } 312206c3fb27SDimitry Andric 312306c3fb27SDimitry Andric EI++; 312406c3fb27SDimitry Andric } 312506c3fb27SDimitry Andric } 312606c3fb27SDimitry Andric if (VerifyCCG) { 312706c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node); 312806c3fb27SDimitry Andric for (const auto &PE : Node->CalleeEdges) 312906c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee); 313006c3fb27SDimitry Andric for (const auto &CE : Node->CallerEdges) 313106c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller); 313206c3fb27SDimitry Andric for (auto *Clone : Node->Clones) { 313306c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Clone); 313406c3fb27SDimitry Andric for (const auto &PE : Clone->CalleeEdges) 313506c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee); 313606c3fb27SDimitry Andric for (const auto &CE : Clone->CallerEdges) 313706c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller); 313806c3fb27SDimitry Andric } 313906c3fb27SDimitry Andric } 314006c3fb27SDimitry Andric } 314106c3fb27SDimitry Andric } 314206c3fb27SDimitry Andric 314306c3fb27SDimitry Andric auto UpdateCalls = [&](ContextNode *Node, 314406c3fb27SDimitry Andric DenseSet<const ContextNode *> &Visited, 314506c3fb27SDimitry Andric auto &&UpdateCalls) { 314606c3fb27SDimitry Andric auto Inserted = Visited.insert(Node); 314706c3fb27SDimitry Andric if (!Inserted.second) 314806c3fb27SDimitry Andric return; 314906c3fb27SDimitry Andric 315006c3fb27SDimitry Andric for (auto *Clone : Node->Clones) 315106c3fb27SDimitry Andric UpdateCalls(Clone, Visited, UpdateCalls); 315206c3fb27SDimitry Andric 315306c3fb27SDimitry Andric for (auto &Edge : Node->CallerEdges) 315406c3fb27SDimitry Andric UpdateCalls(Edge->Caller, Visited, UpdateCalls); 315506c3fb27SDimitry Andric 315606c3fb27SDimitry Andric // Skip if either no call to update, or if we ended up with no context ids 315706c3fb27SDimitry Andric // (we moved all edges onto other clones). 315806c3fb27SDimitry Andric if (!Node->hasCall() || Node->ContextIds.empty()) 315906c3fb27SDimitry Andric return; 316006c3fb27SDimitry Andric 316106c3fb27SDimitry Andric if (Node->IsAllocation) { 316206c3fb27SDimitry Andric updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes)); 316306c3fb27SDimitry Andric return; 316406c3fb27SDimitry Andric } 316506c3fb27SDimitry Andric 316606c3fb27SDimitry Andric if (!CallsiteToCalleeFuncCloneMap.count(Node)) 316706c3fb27SDimitry Andric return; 316806c3fb27SDimitry Andric 316906c3fb27SDimitry Andric auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node]; 317006c3fb27SDimitry Andric updateCall(Node->Call, CalleeFunc); 317106c3fb27SDimitry Andric }; 317206c3fb27SDimitry Andric 317306c3fb27SDimitry Andric // Performs DFS traversal starting from allocation nodes to update calls to 317406c3fb27SDimitry Andric // reflect cloning decisions recorded earlier. For regular LTO this will 317506c3fb27SDimitry Andric // update the actual calls in the IR to call the appropriate function clone 317606c3fb27SDimitry Andric // (and add attributes to allocation calls), whereas for ThinLTO the decisions 317706c3fb27SDimitry Andric // are recorded in the summary entries. 317806c3fb27SDimitry Andric DenseSet<const ContextNode *> Visited; 317906c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) 318006c3fb27SDimitry Andric UpdateCalls(Entry.second, Visited, UpdateCalls); 318106c3fb27SDimitry Andric 318206c3fb27SDimitry Andric return Changed; 318306c3fb27SDimitry Andric } 318406c3fb27SDimitry Andric 318506c3fb27SDimitry Andric static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones( 318606c3fb27SDimitry Andric Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, 318706c3fb27SDimitry Andric std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>> 318806c3fb27SDimitry Andric &FuncToAliasMap) { 318906c3fb27SDimitry Andric // The first "clone" is the original copy, we should only call this if we 319006c3fb27SDimitry Andric // needed to create new clones. 319106c3fb27SDimitry Andric assert(NumClones > 1); 319206c3fb27SDimitry Andric SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; 319306c3fb27SDimitry Andric VMaps.reserve(NumClones - 1); 319406c3fb27SDimitry Andric FunctionsClonedThinBackend++; 319506c3fb27SDimitry Andric for (unsigned I = 1; I < NumClones; I++) { 319606c3fb27SDimitry Andric VMaps.emplace_back(std::make_unique<ValueToValueMapTy>()); 319706c3fb27SDimitry Andric auto *NewF = CloneFunction(&F, *VMaps.back()); 319806c3fb27SDimitry Andric FunctionClonesThinBackend++; 319906c3fb27SDimitry Andric // Strip memprof and callsite metadata from clone as they are no longer 320006c3fb27SDimitry Andric // needed. 320106c3fb27SDimitry Andric for (auto &BB : *NewF) { 320206c3fb27SDimitry Andric for (auto &Inst : BB) { 320306c3fb27SDimitry Andric Inst.setMetadata(LLVMContext::MD_memprof, nullptr); 320406c3fb27SDimitry Andric Inst.setMetadata(LLVMContext::MD_callsite, nullptr); 320506c3fb27SDimitry Andric } 320606c3fb27SDimitry Andric } 320706c3fb27SDimitry Andric std::string Name = getMemProfFuncName(F.getName(), I); 320806c3fb27SDimitry Andric auto *PrevF = M.getFunction(Name); 320906c3fb27SDimitry Andric if (PrevF) { 321006c3fb27SDimitry Andric // We might have created this when adjusting callsite in another 321106c3fb27SDimitry Andric // function. It should be a declaration. 321206c3fb27SDimitry Andric assert(PrevF->isDeclaration()); 321306c3fb27SDimitry Andric NewF->takeName(PrevF); 321406c3fb27SDimitry Andric PrevF->replaceAllUsesWith(NewF); 321506c3fb27SDimitry Andric PrevF->eraseFromParent(); 321606c3fb27SDimitry Andric } else 321706c3fb27SDimitry Andric NewF->setName(Name); 321806c3fb27SDimitry Andric ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) 321906c3fb27SDimitry Andric << "created clone " << ore::NV("NewFunction", NewF)); 322006c3fb27SDimitry Andric 322106c3fb27SDimitry Andric // Now handle aliases to this function, and clone those as well. 322206c3fb27SDimitry Andric if (!FuncToAliasMap.count(&F)) 322306c3fb27SDimitry Andric continue; 322406c3fb27SDimitry Andric for (auto *A : FuncToAliasMap[&F]) { 322506c3fb27SDimitry Andric std::string Name = getMemProfFuncName(A->getName(), I); 322606c3fb27SDimitry Andric auto *PrevA = M.getNamedAlias(Name); 322706c3fb27SDimitry Andric auto *NewA = GlobalAlias::create(A->getValueType(), 322806c3fb27SDimitry Andric A->getType()->getPointerAddressSpace(), 322906c3fb27SDimitry Andric A->getLinkage(), Name, NewF); 323006c3fb27SDimitry Andric NewA->copyAttributesFrom(A); 323106c3fb27SDimitry Andric if (PrevA) { 323206c3fb27SDimitry Andric // We might have created this when adjusting callsite in another 323306c3fb27SDimitry Andric // function. It should be a declaration. 323406c3fb27SDimitry Andric assert(PrevA->isDeclaration()); 323506c3fb27SDimitry Andric NewA->takeName(PrevA); 323606c3fb27SDimitry Andric PrevA->replaceAllUsesWith(NewA); 323706c3fb27SDimitry Andric PrevA->eraseFromParent(); 323806c3fb27SDimitry Andric } 323906c3fb27SDimitry Andric } 324006c3fb27SDimitry Andric } 324106c3fb27SDimitry Andric return VMaps; 324206c3fb27SDimitry Andric } 324306c3fb27SDimitry Andric 324406c3fb27SDimitry Andric // Locate the summary for F. This is complicated by the fact that it might 324506c3fb27SDimitry Andric // have been internalized or promoted. 324606c3fb27SDimitry Andric static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, 324706c3fb27SDimitry Andric const ModuleSummaryIndex *ImportSummary) { 324806c3fb27SDimitry Andric // FIXME: Ideally we would retain the original GUID in some fashion on the 324906c3fb27SDimitry Andric // function (e.g. as metadata), but for now do our best to locate the 325006c3fb27SDimitry Andric // summary without that information. 325106c3fb27SDimitry Andric ValueInfo TheFnVI = ImportSummary->getValueInfo(F.getGUID()); 325206c3fb27SDimitry Andric if (!TheFnVI) 325306c3fb27SDimitry Andric // See if theFn was internalized, by checking index directly with 325406c3fb27SDimitry Andric // original name (this avoids the name adjustment done by getGUID() for 325506c3fb27SDimitry Andric // internal symbols). 325606c3fb27SDimitry Andric TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(F.getName())); 325706c3fb27SDimitry Andric if (TheFnVI) 325806c3fb27SDimitry Andric return TheFnVI; 325906c3fb27SDimitry Andric // Now query with the original name before any promotion was performed. 326006c3fb27SDimitry Andric StringRef OrigName = 326106c3fb27SDimitry Andric ModuleSummaryIndex::getOriginalNameBeforePromote(F.getName()); 326206c3fb27SDimitry Andric std::string OrigId = GlobalValue::getGlobalIdentifier( 326306c3fb27SDimitry Andric OrigName, GlobalValue::InternalLinkage, M.getSourceFileName()); 326406c3fb27SDimitry Andric TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(OrigId)); 326506c3fb27SDimitry Andric if (TheFnVI) 326606c3fb27SDimitry Andric return TheFnVI; 326706c3fb27SDimitry Andric // Could be a promoted local imported from another module. We need to pass 326806c3fb27SDimitry Andric // down more info here to find the original module id. For now, try with 326906c3fb27SDimitry Andric // the OrigName which might have been stored in the OidGuidMap in the 327006c3fb27SDimitry Andric // index. This would not work if there were same-named locals in multiple 327106c3fb27SDimitry Andric // modules, however. 327206c3fb27SDimitry Andric auto OrigGUID = 327306c3fb27SDimitry Andric ImportSummary->getGUIDFromOriginalID(GlobalValue::getGUID(OrigName)); 327406c3fb27SDimitry Andric if (OrigGUID) 327506c3fb27SDimitry Andric TheFnVI = ImportSummary->getValueInfo(OrigGUID); 327606c3fb27SDimitry Andric return TheFnVI; 327706c3fb27SDimitry Andric } 327806c3fb27SDimitry Andric 327906c3fb27SDimitry Andric bool MemProfContextDisambiguation::applyImport(Module &M) { 328006c3fb27SDimitry Andric assert(ImportSummary); 328106c3fb27SDimitry Andric bool Changed = false; 328206c3fb27SDimitry Andric 328306c3fb27SDimitry Andric auto IsMemProfClone = [](const Function &F) { 328406c3fb27SDimitry Andric return F.getName().contains(MemProfCloneSuffix); 328506c3fb27SDimitry Andric }; 328606c3fb27SDimitry Andric 328706c3fb27SDimitry Andric // We also need to clone any aliases that reference cloned functions, because 328806c3fb27SDimitry Andric // the modified callsites may invoke via the alias. Keep track of the aliases 328906c3fb27SDimitry Andric // for each function. 329006c3fb27SDimitry Andric std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>> 329106c3fb27SDimitry Andric FuncToAliasMap; 329206c3fb27SDimitry Andric for (auto &A : M.aliases()) { 329306c3fb27SDimitry Andric auto *Aliasee = A.getAliaseeObject(); 329406c3fb27SDimitry Andric if (auto *F = dyn_cast<Function>(Aliasee)) 329506c3fb27SDimitry Andric FuncToAliasMap[F].insert(&A); 329606c3fb27SDimitry Andric } 329706c3fb27SDimitry Andric 329806c3fb27SDimitry Andric for (auto &F : M) { 329906c3fb27SDimitry Andric if (F.isDeclaration() || IsMemProfClone(F)) 330006c3fb27SDimitry Andric continue; 330106c3fb27SDimitry Andric 330206c3fb27SDimitry Andric OptimizationRemarkEmitter ORE(&F); 330306c3fb27SDimitry Andric 330406c3fb27SDimitry Andric SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; 330506c3fb27SDimitry Andric bool ClonesCreated = false; 330606c3fb27SDimitry Andric unsigned NumClonesCreated = 0; 330706c3fb27SDimitry Andric auto CloneFuncIfNeeded = [&](unsigned NumClones) { 330806c3fb27SDimitry Andric // We should at least have version 0 which is the original copy. 330906c3fb27SDimitry Andric assert(NumClones > 0); 331006c3fb27SDimitry Andric // If only one copy needed use original. 331106c3fb27SDimitry Andric if (NumClones == 1) 331206c3fb27SDimitry Andric return; 331306c3fb27SDimitry Andric // If we already performed cloning of this function, confirm that the 331406c3fb27SDimitry Andric // requested number of clones matches (the thin link should ensure the 331506c3fb27SDimitry Andric // number of clones for each constituent callsite is consistent within 331606c3fb27SDimitry Andric // each function), before returning. 331706c3fb27SDimitry Andric if (ClonesCreated) { 331806c3fb27SDimitry Andric assert(NumClonesCreated == NumClones); 331906c3fb27SDimitry Andric return; 332006c3fb27SDimitry Andric } 332106c3fb27SDimitry Andric VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap); 332206c3fb27SDimitry Andric // The first "clone" is the original copy, which doesn't have a VMap. 332306c3fb27SDimitry Andric assert(VMaps.size() == NumClones - 1); 332406c3fb27SDimitry Andric Changed = true; 332506c3fb27SDimitry Andric ClonesCreated = true; 332606c3fb27SDimitry Andric NumClonesCreated = NumClones; 332706c3fb27SDimitry Andric }; 332806c3fb27SDimitry Andric 3329*297eecfbSDimitry Andric auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB, 3330*297eecfbSDimitry Andric Function *CalledFunction) { 3331*297eecfbSDimitry Andric // Perform cloning if not yet done. 3332*297eecfbSDimitry Andric CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size()); 3333*297eecfbSDimitry Andric 3334*297eecfbSDimitry Andric // Should have skipped indirect calls via mayHaveMemprofSummary. 3335*297eecfbSDimitry Andric assert(CalledFunction); 3336*297eecfbSDimitry Andric assert(!IsMemProfClone(*CalledFunction)); 3337*297eecfbSDimitry Andric 3338*297eecfbSDimitry Andric // Update the calls per the summary info. 3339*297eecfbSDimitry Andric // Save orig name since it gets updated in the first iteration 3340*297eecfbSDimitry Andric // below. 3341*297eecfbSDimitry Andric auto CalleeOrigName = CalledFunction->getName(); 3342*297eecfbSDimitry Andric for (unsigned J = 0; J < StackNode.Clones.size(); J++) { 3343*297eecfbSDimitry Andric // Do nothing if this version calls the original version of its 3344*297eecfbSDimitry Andric // callee. 3345*297eecfbSDimitry Andric if (!StackNode.Clones[J]) 3346*297eecfbSDimitry Andric continue; 3347*297eecfbSDimitry Andric auto NewF = M.getOrInsertFunction( 3348*297eecfbSDimitry Andric getMemProfFuncName(CalleeOrigName, StackNode.Clones[J]), 3349*297eecfbSDimitry Andric CalledFunction->getFunctionType()); 3350*297eecfbSDimitry Andric CallBase *CBClone; 3351*297eecfbSDimitry Andric // Copy 0 is the original function. 3352*297eecfbSDimitry Andric if (!J) 3353*297eecfbSDimitry Andric CBClone = CB; 3354*297eecfbSDimitry Andric else 3355*297eecfbSDimitry Andric CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); 3356*297eecfbSDimitry Andric CBClone->setCalledFunction(NewF); 3357*297eecfbSDimitry Andric ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone) 3358*297eecfbSDimitry Andric << ore::NV("Call", CBClone) << " in clone " 3359*297eecfbSDimitry Andric << ore::NV("Caller", CBClone->getFunction()) 3360*297eecfbSDimitry Andric << " assigned to call function clone " 3361*297eecfbSDimitry Andric << ore::NV("Callee", NewF.getCallee())); 3362*297eecfbSDimitry Andric } 3363*297eecfbSDimitry Andric }; 3364*297eecfbSDimitry Andric 336506c3fb27SDimitry Andric // Locate the summary for F. 336606c3fb27SDimitry Andric ValueInfo TheFnVI = findValueInfoForFunc(F, M, ImportSummary); 336706c3fb27SDimitry Andric // If not found, this could be an imported local (see comment in 336806c3fb27SDimitry Andric // findValueInfoForFunc). Skip for now as it will be cloned in its original 336906c3fb27SDimitry Andric // module (where it would have been promoted to global scope so should 337006c3fb27SDimitry Andric // satisfy any reference in this module). 337106c3fb27SDimitry Andric if (!TheFnVI) 337206c3fb27SDimitry Andric continue; 337306c3fb27SDimitry Andric 337406c3fb27SDimitry Andric auto *GVSummary = 337506c3fb27SDimitry Andric ImportSummary->findSummaryInModule(TheFnVI, M.getModuleIdentifier()); 337606c3fb27SDimitry Andric if (!GVSummary) 337706c3fb27SDimitry Andric // Must have been imported, use the first summary (might be multiple if 337806c3fb27SDimitry Andric // this was a linkonce_odr). 337906c3fb27SDimitry Andric GVSummary = TheFnVI.getSummaryList().front().get(); 338006c3fb27SDimitry Andric 338106c3fb27SDimitry Andric // If this was an imported alias skip it as we won't have the function 338206c3fb27SDimitry Andric // summary, and it should be cloned in the original module. 338306c3fb27SDimitry Andric if (isa<AliasSummary>(GVSummary)) 338406c3fb27SDimitry Andric continue; 338506c3fb27SDimitry Andric 338606c3fb27SDimitry Andric auto *FS = cast<FunctionSummary>(GVSummary->getBaseObject()); 338706c3fb27SDimitry Andric 338806c3fb27SDimitry Andric if (FS->allocs().empty() && FS->callsites().empty()) 338906c3fb27SDimitry Andric continue; 339006c3fb27SDimitry Andric 339106c3fb27SDimitry Andric auto SI = FS->callsites().begin(); 339206c3fb27SDimitry Andric auto AI = FS->allocs().begin(); 339306c3fb27SDimitry Andric 3394*297eecfbSDimitry Andric // To handle callsite infos synthesized for tail calls which have missing 3395*297eecfbSDimitry Andric // frames in the profiled context, map callee VI to the synthesized callsite 3396*297eecfbSDimitry Andric // info. 3397*297eecfbSDimitry Andric DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite; 3398*297eecfbSDimitry Andric // Iterate the callsites for this function in reverse, since we place all 3399*297eecfbSDimitry Andric // those synthesized for tail calls at the end. 3400*297eecfbSDimitry Andric for (auto CallsiteIt = FS->callsites().rbegin(); 3401*297eecfbSDimitry Andric CallsiteIt != FS->callsites().rend(); CallsiteIt++) { 3402*297eecfbSDimitry Andric auto &Callsite = *CallsiteIt; 3403*297eecfbSDimitry Andric // Stop as soon as we see a non-synthesized callsite info (see comment 3404*297eecfbSDimitry Andric // above loop). All the entries added for discovered tail calls have empty 3405*297eecfbSDimitry Andric // stack ids. 3406*297eecfbSDimitry Andric if (!Callsite.StackIdIndices.empty()) 3407*297eecfbSDimitry Andric break; 3408*297eecfbSDimitry Andric MapTailCallCalleeVIToCallsite.insert({Callsite.Callee, Callsite}); 3409*297eecfbSDimitry Andric } 3410*297eecfbSDimitry Andric 341106c3fb27SDimitry Andric // Assume for now that the instructions are in the exact same order 341206c3fb27SDimitry Andric // as when the summary was created, but confirm this is correct by 341306c3fb27SDimitry Andric // matching the stack ids. 341406c3fb27SDimitry Andric for (auto &BB : F) { 341506c3fb27SDimitry Andric for (auto &I : BB) { 341606c3fb27SDimitry Andric auto *CB = dyn_cast<CallBase>(&I); 341706c3fb27SDimitry Andric // Same handling as when creating module summary. 341806c3fb27SDimitry Andric if (!mayHaveMemprofSummary(CB)) 341906c3fb27SDimitry Andric continue; 342006c3fb27SDimitry Andric 34215f757f3fSDimitry Andric auto *CalledValue = CB->getCalledOperand(); 34225f757f3fSDimitry Andric auto *CalledFunction = CB->getCalledFunction(); 34235f757f3fSDimitry Andric if (CalledValue && !CalledFunction) { 34245f757f3fSDimitry Andric CalledValue = CalledValue->stripPointerCasts(); 34255f757f3fSDimitry Andric // Stripping pointer casts can reveal a called function. 34265f757f3fSDimitry Andric CalledFunction = dyn_cast<Function>(CalledValue); 34275f757f3fSDimitry Andric } 34285f757f3fSDimitry Andric // Check if this is an alias to a function. If so, get the 34295f757f3fSDimitry Andric // called aliasee for the checks below. 34305f757f3fSDimitry Andric if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) { 34315f757f3fSDimitry Andric assert(!CalledFunction && 34325f757f3fSDimitry Andric "Expected null called function in callsite for alias"); 34335f757f3fSDimitry Andric CalledFunction = dyn_cast<Function>(GA->getAliaseeObject()); 34345f757f3fSDimitry Andric } 34355f757f3fSDimitry Andric 343606c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext( 343706c3fb27SDimitry Andric I.getMetadata(LLVMContext::MD_callsite)); 343806c3fb27SDimitry Andric auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof); 343906c3fb27SDimitry Andric 344006c3fb27SDimitry Andric // Include allocs that were already assigned a memprof function 344106c3fb27SDimitry Andric // attribute in the statistics. 344206c3fb27SDimitry Andric if (CB->getAttributes().hasFnAttr("memprof")) { 344306c3fb27SDimitry Andric assert(!MemProfMD); 344406c3fb27SDimitry Andric CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold" 344506c3fb27SDimitry Andric ? AllocTypeColdThinBackend++ 344606c3fb27SDimitry Andric : AllocTypeNotColdThinBackend++; 344706c3fb27SDimitry Andric OrigAllocsThinBackend++; 344806c3fb27SDimitry Andric AllocVersionsThinBackend++; 344906c3fb27SDimitry Andric if (!MaxAllocVersionsThinBackend) 345006c3fb27SDimitry Andric MaxAllocVersionsThinBackend = 1; 345106c3fb27SDimitry Andric // Remove any remaining callsite metadata and we can skip the rest of 345206c3fb27SDimitry Andric // the handling for this instruction, since no cloning needed. 345306c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_callsite, nullptr); 345406c3fb27SDimitry Andric continue; 345506c3fb27SDimitry Andric } 345606c3fb27SDimitry Andric 345706c3fb27SDimitry Andric if (MemProfMD) { 345806c3fb27SDimitry Andric // Consult the next alloc node. 345906c3fb27SDimitry Andric assert(AI != FS->allocs().end()); 346006c3fb27SDimitry Andric auto &AllocNode = *(AI++); 346106c3fb27SDimitry Andric 346206c3fb27SDimitry Andric // Sanity check that the MIB stack ids match between the summary and 346306c3fb27SDimitry Andric // instruction metadata. 346406c3fb27SDimitry Andric auto MIBIter = AllocNode.MIBs.begin(); 346506c3fb27SDimitry Andric for (auto &MDOp : MemProfMD->operands()) { 346606c3fb27SDimitry Andric assert(MIBIter != AllocNode.MIBs.end()); 346706c3fb27SDimitry Andric LLVM_ATTRIBUTE_UNUSED auto StackIdIndexIter = 346806c3fb27SDimitry Andric MIBIter->StackIdIndices.begin(); 346906c3fb27SDimitry Andric auto *MIBMD = cast<const MDNode>(MDOp); 347006c3fb27SDimitry Andric MDNode *StackMDNode = getMIBStackNode(MIBMD); 347106c3fb27SDimitry Andric assert(StackMDNode); 347206c3fb27SDimitry Andric SmallVector<unsigned> StackIdsFromMetadata; 347306c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> StackContext(StackMDNode); 347406c3fb27SDimitry Andric for (auto ContextIter = 347506c3fb27SDimitry Andric StackContext.beginAfterSharedPrefix(CallsiteContext); 347606c3fb27SDimitry Andric ContextIter != StackContext.end(); ++ContextIter) { 347706c3fb27SDimitry Andric // If this is a direct recursion, simply skip the duplicate 347806c3fb27SDimitry Andric // entries, to be consistent with how the summary ids were 347906c3fb27SDimitry Andric // generated during ModuleSummaryAnalysis. 348006c3fb27SDimitry Andric if (!StackIdsFromMetadata.empty() && 348106c3fb27SDimitry Andric StackIdsFromMetadata.back() == *ContextIter) 348206c3fb27SDimitry Andric continue; 348306c3fb27SDimitry Andric assert(StackIdIndexIter != MIBIter->StackIdIndices.end()); 348406c3fb27SDimitry Andric assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) == 348506c3fb27SDimitry Andric *ContextIter); 348606c3fb27SDimitry Andric StackIdIndexIter++; 348706c3fb27SDimitry Andric } 348806c3fb27SDimitry Andric MIBIter++; 348906c3fb27SDimitry Andric } 349006c3fb27SDimitry Andric 349106c3fb27SDimitry Andric // Perform cloning if not yet done. 349206c3fb27SDimitry Andric CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size()); 349306c3fb27SDimitry Andric 349406c3fb27SDimitry Andric OrigAllocsThinBackend++; 349506c3fb27SDimitry Andric AllocVersionsThinBackend += AllocNode.Versions.size(); 349606c3fb27SDimitry Andric if (MaxAllocVersionsThinBackend < AllocNode.Versions.size()) 349706c3fb27SDimitry Andric MaxAllocVersionsThinBackend = AllocNode.Versions.size(); 349806c3fb27SDimitry Andric 349906c3fb27SDimitry Andric // If there is only one version that means we didn't end up 350006c3fb27SDimitry Andric // considering this function for cloning, and in that case the alloc 350106c3fb27SDimitry Andric // will still be none type or should have gotten the default NotCold. 350206c3fb27SDimitry Andric // Skip that after calling clone helper since that does some sanity 350306c3fb27SDimitry Andric // checks that confirm we haven't decided yet that we need cloning. 350406c3fb27SDimitry Andric if (AllocNode.Versions.size() == 1) { 350506c3fb27SDimitry Andric assert((AllocationType)AllocNode.Versions[0] == 350606c3fb27SDimitry Andric AllocationType::NotCold || 350706c3fb27SDimitry Andric (AllocationType)AllocNode.Versions[0] == 350806c3fb27SDimitry Andric AllocationType::None); 350906c3fb27SDimitry Andric UnclonableAllocsThinBackend++; 351006c3fb27SDimitry Andric continue; 351106c3fb27SDimitry Andric } 351206c3fb27SDimitry Andric 351306c3fb27SDimitry Andric // All versions should have a singular allocation type. 351406c3fb27SDimitry Andric assert(llvm::none_of(AllocNode.Versions, [](uint8_t Type) { 351506c3fb27SDimitry Andric return Type == ((uint8_t)AllocationType::NotCold | 351606c3fb27SDimitry Andric (uint8_t)AllocationType::Cold); 351706c3fb27SDimitry Andric })); 351806c3fb27SDimitry Andric 351906c3fb27SDimitry Andric // Update the allocation types per the summary info. 352006c3fb27SDimitry Andric for (unsigned J = 0; J < AllocNode.Versions.size(); J++) { 352106c3fb27SDimitry Andric // Ignore any that didn't get an assigned allocation type. 352206c3fb27SDimitry Andric if (AllocNode.Versions[J] == (uint8_t)AllocationType::None) 352306c3fb27SDimitry Andric continue; 352406c3fb27SDimitry Andric AllocationType AllocTy = (AllocationType)AllocNode.Versions[J]; 352506c3fb27SDimitry Andric AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++ 352606c3fb27SDimitry Andric : AllocTypeNotColdThinBackend++; 352706c3fb27SDimitry Andric std::string AllocTypeString = getAllocTypeAttributeString(AllocTy); 352806c3fb27SDimitry Andric auto A = llvm::Attribute::get(F.getContext(), "memprof", 352906c3fb27SDimitry Andric AllocTypeString); 353006c3fb27SDimitry Andric CallBase *CBClone; 353106c3fb27SDimitry Andric // Copy 0 is the original function. 353206c3fb27SDimitry Andric if (!J) 353306c3fb27SDimitry Andric CBClone = CB; 353406c3fb27SDimitry Andric else 353506c3fb27SDimitry Andric // Since VMaps are only created for new clones, we index with 353606c3fb27SDimitry Andric // clone J-1 (J==0 is the original clone and does not have a VMaps 353706c3fb27SDimitry Andric // entry). 353806c3fb27SDimitry Andric CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); 353906c3fb27SDimitry Andric CBClone->addFnAttr(A); 354006c3fb27SDimitry Andric ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", CBClone) 354106c3fb27SDimitry Andric << ore::NV("AllocationCall", CBClone) << " in clone " 354206c3fb27SDimitry Andric << ore::NV("Caller", CBClone->getFunction()) 354306c3fb27SDimitry Andric << " marked with memprof allocation attribute " 354406c3fb27SDimitry Andric << ore::NV("Attribute", AllocTypeString)); 354506c3fb27SDimitry Andric } 354606c3fb27SDimitry Andric } else if (!CallsiteContext.empty()) { 354706c3fb27SDimitry Andric // Consult the next callsite node. 354806c3fb27SDimitry Andric assert(SI != FS->callsites().end()); 354906c3fb27SDimitry Andric auto &StackNode = *(SI++); 355006c3fb27SDimitry Andric 355106c3fb27SDimitry Andric #ifndef NDEBUG 355206c3fb27SDimitry Andric // Sanity check that the stack ids match between the summary and 355306c3fb27SDimitry Andric // instruction metadata. 355406c3fb27SDimitry Andric auto StackIdIndexIter = StackNode.StackIdIndices.begin(); 355506c3fb27SDimitry Andric for (auto StackId : CallsiteContext) { 355606c3fb27SDimitry Andric assert(StackIdIndexIter != StackNode.StackIdIndices.end()); 355706c3fb27SDimitry Andric assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) == 355806c3fb27SDimitry Andric StackId); 355906c3fb27SDimitry Andric StackIdIndexIter++; 356006c3fb27SDimitry Andric } 356106c3fb27SDimitry Andric #endif 356206c3fb27SDimitry Andric 3563*297eecfbSDimitry Andric CloneCallsite(StackNode, CB, CalledFunction); 3564*297eecfbSDimitry Andric } else if (CB->isTailCall()) { 3565*297eecfbSDimitry Andric // Locate the synthesized callsite info for the callee VI, if any was 3566*297eecfbSDimitry Andric // created, and use that for cloning. 3567*297eecfbSDimitry Andric ValueInfo CalleeVI = 3568*297eecfbSDimitry Andric findValueInfoForFunc(*CalledFunction, M, ImportSummary); 3569*297eecfbSDimitry Andric if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) { 3570*297eecfbSDimitry Andric auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI); 3571*297eecfbSDimitry Andric assert(Callsite != MapTailCallCalleeVIToCallsite.end()); 3572*297eecfbSDimitry Andric CloneCallsite(Callsite->second, CB, CalledFunction); 357306c3fb27SDimitry Andric } 357406c3fb27SDimitry Andric } 357506c3fb27SDimitry Andric // Memprof and callsite metadata on memory allocations no longer needed. 357606c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_memprof, nullptr); 357706c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_callsite, nullptr); 357806c3fb27SDimitry Andric } 357906c3fb27SDimitry Andric } 358006c3fb27SDimitry Andric } 358106c3fb27SDimitry Andric 358206c3fb27SDimitry Andric return Changed; 358306c3fb27SDimitry Andric } 358406c3fb27SDimitry Andric 358506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 358606c3fb27SDimitry Andric bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() { 358706c3fb27SDimitry Andric if (DumpCCG) { 358806c3fb27SDimitry Andric dbgs() << "CCG before cloning:\n"; 358906c3fb27SDimitry Andric dbgs() << *this; 359006c3fb27SDimitry Andric } 359106c3fb27SDimitry Andric if (ExportToDot) 359206c3fb27SDimitry Andric exportToDot("postbuild"); 359306c3fb27SDimitry Andric 359406c3fb27SDimitry Andric if (VerifyCCG) { 359506c3fb27SDimitry Andric check(); 359606c3fb27SDimitry Andric } 359706c3fb27SDimitry Andric 359806c3fb27SDimitry Andric identifyClones(); 359906c3fb27SDimitry Andric 360006c3fb27SDimitry Andric if (VerifyCCG) { 360106c3fb27SDimitry Andric check(); 360206c3fb27SDimitry Andric } 360306c3fb27SDimitry Andric 360406c3fb27SDimitry Andric if (DumpCCG) { 360506c3fb27SDimitry Andric dbgs() << "CCG after cloning:\n"; 360606c3fb27SDimitry Andric dbgs() << *this; 360706c3fb27SDimitry Andric } 360806c3fb27SDimitry Andric if (ExportToDot) 360906c3fb27SDimitry Andric exportToDot("cloned"); 361006c3fb27SDimitry Andric 361106c3fb27SDimitry Andric bool Changed = assignFunctions(); 361206c3fb27SDimitry Andric 361306c3fb27SDimitry Andric if (DumpCCG) { 361406c3fb27SDimitry Andric dbgs() << "CCG after assigning function clones:\n"; 361506c3fb27SDimitry Andric dbgs() << *this; 361606c3fb27SDimitry Andric } 361706c3fb27SDimitry Andric if (ExportToDot) 361806c3fb27SDimitry Andric exportToDot("clonefuncassign"); 361906c3fb27SDimitry Andric 362006c3fb27SDimitry Andric return Changed; 362106c3fb27SDimitry Andric } 362206c3fb27SDimitry Andric 362306c3fb27SDimitry Andric bool MemProfContextDisambiguation::processModule( 362406c3fb27SDimitry Andric Module &M, 362506c3fb27SDimitry Andric function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) { 362606c3fb27SDimitry Andric 362706c3fb27SDimitry Andric // If we have an import summary, then the cloning decisions were made during 362806c3fb27SDimitry Andric // the thin link on the index. Apply them and return. 362906c3fb27SDimitry Andric if (ImportSummary) 363006c3fb27SDimitry Andric return applyImport(M); 363106c3fb27SDimitry Andric 363206c3fb27SDimitry Andric // TODO: If/when other types of memprof cloning are enabled beyond just for 363306c3fb27SDimitry Andric // hot and cold, we will need to change this to individually control the 363406c3fb27SDimitry Andric // AllocationType passed to addStackNodesForMIB during CCG construction. 363506c3fb27SDimitry Andric // Note that we specifically check this after applying imports above, so that 363606c3fb27SDimitry Andric // the option isn't needed to be passed to distributed ThinLTO backend 363706c3fb27SDimitry Andric // clang processes, which won't necessarily have visibility into the linker 363806c3fb27SDimitry Andric // dependences. Instead the information is communicated from the LTO link to 363906c3fb27SDimitry Andric // the backends via the combined summary index. 364006c3fb27SDimitry Andric if (!SupportsHotColdNew) 364106c3fb27SDimitry Andric return false; 364206c3fb27SDimitry Andric 364306c3fb27SDimitry Andric ModuleCallsiteContextGraph CCG(M, OREGetter); 364406c3fb27SDimitry Andric return CCG.process(); 364506c3fb27SDimitry Andric } 364606c3fb27SDimitry Andric 364706c3fb27SDimitry Andric MemProfContextDisambiguation::MemProfContextDisambiguation( 364806c3fb27SDimitry Andric const ModuleSummaryIndex *Summary) 364906c3fb27SDimitry Andric : ImportSummary(Summary) { 365006c3fb27SDimitry Andric if (ImportSummary) { 365106c3fb27SDimitry Andric // The MemProfImportSummary should only be used for testing ThinLTO 365206c3fb27SDimitry Andric // distributed backend handling via opt, in which case we don't have a 365306c3fb27SDimitry Andric // summary from the pass pipeline. 365406c3fb27SDimitry Andric assert(MemProfImportSummary.empty()); 365506c3fb27SDimitry Andric return; 365606c3fb27SDimitry Andric } 365706c3fb27SDimitry Andric if (MemProfImportSummary.empty()) 365806c3fb27SDimitry Andric return; 365906c3fb27SDimitry Andric 366006c3fb27SDimitry Andric auto ReadSummaryFile = 366106c3fb27SDimitry Andric errorOrToExpected(MemoryBuffer::getFile(MemProfImportSummary)); 366206c3fb27SDimitry Andric if (!ReadSummaryFile) { 366306c3fb27SDimitry Andric logAllUnhandledErrors(ReadSummaryFile.takeError(), errs(), 366406c3fb27SDimitry Andric "Error loading file '" + MemProfImportSummary + 366506c3fb27SDimitry Andric "': "); 366606c3fb27SDimitry Andric return; 366706c3fb27SDimitry Andric } 366806c3fb27SDimitry Andric auto ImportSummaryForTestingOrErr = getModuleSummaryIndex(**ReadSummaryFile); 366906c3fb27SDimitry Andric if (!ImportSummaryForTestingOrErr) { 367006c3fb27SDimitry Andric logAllUnhandledErrors(ImportSummaryForTestingOrErr.takeError(), errs(), 367106c3fb27SDimitry Andric "Error parsing file '" + MemProfImportSummary + 367206c3fb27SDimitry Andric "': "); 367306c3fb27SDimitry Andric return; 367406c3fb27SDimitry Andric } 367506c3fb27SDimitry Andric ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr); 367606c3fb27SDimitry Andric ImportSummary = ImportSummaryForTesting.get(); 367706c3fb27SDimitry Andric } 367806c3fb27SDimitry Andric 367906c3fb27SDimitry Andric PreservedAnalyses MemProfContextDisambiguation::run(Module &M, 368006c3fb27SDimitry Andric ModuleAnalysisManager &AM) { 368106c3fb27SDimitry Andric auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 368206c3fb27SDimitry Andric auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & { 368306c3fb27SDimitry Andric return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); 368406c3fb27SDimitry Andric }; 368506c3fb27SDimitry Andric if (!processModule(M, OREGetter)) 368606c3fb27SDimitry Andric return PreservedAnalyses::all(); 368706c3fb27SDimitry Andric return PreservedAnalyses::none(); 368806c3fb27SDimitry Andric } 368906c3fb27SDimitry Andric 369006c3fb27SDimitry Andric void MemProfContextDisambiguation::run( 369106c3fb27SDimitry Andric ModuleSummaryIndex &Index, 369206c3fb27SDimitry Andric function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 369306c3fb27SDimitry Andric isPrevailing) { 369406c3fb27SDimitry Andric // TODO: If/when other types of memprof cloning are enabled beyond just for 369506c3fb27SDimitry Andric // hot and cold, we will need to change this to individually control the 369606c3fb27SDimitry Andric // AllocationType passed to addStackNodesForMIB during CCG construction. 369706c3fb27SDimitry Andric // The index was set from the option, so these should be in sync. 369806c3fb27SDimitry Andric assert(Index.withSupportsHotColdNew() == SupportsHotColdNew); 369906c3fb27SDimitry Andric if (!SupportsHotColdNew) 370006c3fb27SDimitry Andric return; 370106c3fb27SDimitry Andric 370206c3fb27SDimitry Andric IndexCallsiteContextGraph CCG(Index, isPrevailing); 370306c3fb27SDimitry Andric CCG.process(); 370406c3fb27SDimitry Andric } 3705