106c3fb27SDimitry Andric //==-- MemProfContextDisambiguation.cpp - Disambiguate contexts -------------=// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric // 906c3fb27SDimitry Andric // This file implements support for context disambiguation of allocation 1006c3fb27SDimitry Andric // calls for profile guided heap optimization. Specifically, it uses Memprof 1106c3fb27SDimitry Andric // profiles which indicate context specific allocation behavior (currently 1206c3fb27SDimitry Andric // distinguishing cold vs hot memory allocations). Cloning is performed to 1306c3fb27SDimitry Andric // expose the cold allocation call contexts, and the allocation calls are 1406c3fb27SDimitry Andric // subsequently annotated with an attribute for later transformation. 1506c3fb27SDimitry Andric // 1606c3fb27SDimitry Andric // The transformations can be performed either directly on IR (regular LTO), or 1706c3fb27SDimitry Andric // on a ThinLTO index (and later applied to the IR during the ThinLTO backend). 1806c3fb27SDimitry Andric // Both types of LTO operate on a the same base graph representation, which 1906c3fb27SDimitry Andric // uses CRTP to support either IR or Index formats. 2006c3fb27SDimitry Andric // 2106c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 2206c3fb27SDimitry Andric 2306c3fb27SDimitry Andric #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" 2406c3fb27SDimitry Andric #include "llvm/ADT/DenseMap.h" 2506c3fb27SDimitry Andric #include "llvm/ADT/DenseSet.h" 2606c3fb27SDimitry Andric #include "llvm/ADT/MapVector.h" 2706c3fb27SDimitry Andric #include "llvm/ADT/SetOperations.h" 2806c3fb27SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 2906c3fb27SDimitry Andric #include "llvm/ADT/SmallSet.h" 3006c3fb27SDimitry Andric #include "llvm/ADT/SmallVector.h" 3106c3fb27SDimitry Andric #include "llvm/ADT/Statistic.h" 3206c3fb27SDimitry Andric #include "llvm/Analysis/MemoryProfileInfo.h" 3306c3fb27SDimitry Andric #include "llvm/Analysis/ModuleSummaryAnalysis.h" 3406c3fb27SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 3506c3fb27SDimitry Andric #include "llvm/Bitcode/BitcodeReader.h" 3606c3fb27SDimitry Andric #include "llvm/IR/Constants.h" 3706c3fb27SDimitry Andric #include "llvm/IR/Instructions.h" 3806c3fb27SDimitry Andric #include "llvm/IR/Module.h" 3906c3fb27SDimitry Andric #include "llvm/IR/ModuleSummaryIndex.h" 4006c3fb27SDimitry Andric #include "llvm/Pass.h" 4106c3fb27SDimitry Andric #include "llvm/Support/CommandLine.h" 4206c3fb27SDimitry Andric #include "llvm/Support/FileSystem.h" 4306c3fb27SDimitry Andric #include "llvm/Support/GraphWriter.h" 4406c3fb27SDimitry Andric #include "llvm/Support/raw_ostream.h" 4506c3fb27SDimitry Andric #include "llvm/Transforms/IPO.h" 4606c3fb27SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h" 4706c3fb27SDimitry Andric #include <sstream> 48*7a6dacacSDimitry Andric #include <unordered_map> 4906c3fb27SDimitry Andric #include <vector> 5006c3fb27SDimitry Andric using namespace llvm; 5106c3fb27SDimitry Andric using namespace llvm::memprof; 5206c3fb27SDimitry Andric 5306c3fb27SDimitry Andric #define DEBUG_TYPE "memprof-context-disambiguation" 5406c3fb27SDimitry Andric 5506c3fb27SDimitry Andric STATISTIC(FunctionClonesAnalysis, 5606c3fb27SDimitry Andric "Number of function clones created during whole program analysis"); 5706c3fb27SDimitry Andric STATISTIC(FunctionClonesThinBackend, 5806c3fb27SDimitry Andric "Number of function clones created during ThinLTO backend"); 5906c3fb27SDimitry Andric STATISTIC(FunctionsClonedThinBackend, 6006c3fb27SDimitry Andric "Number of functions that had clones created during ThinLTO backend"); 6106c3fb27SDimitry Andric STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly " 6206c3fb27SDimitry Andric "cloned) during whole program analysis"); 6306c3fb27SDimitry Andric STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) " 6406c3fb27SDimitry Andric "during whole program analysis"); 6506c3fb27SDimitry Andric STATISTIC(AllocTypeNotColdThinBackend, 6606c3fb27SDimitry Andric "Number of not cold static allocations (possibly cloned) during " 6706c3fb27SDimitry Andric "ThinLTO backend"); 6806c3fb27SDimitry Andric STATISTIC(AllocTypeColdThinBackend, "Number of cold static allocations " 6906c3fb27SDimitry Andric "(possibly cloned) during ThinLTO backend"); 7006c3fb27SDimitry Andric STATISTIC(OrigAllocsThinBackend, 7106c3fb27SDimitry Andric "Number of original (not cloned) allocations with memprof profiles " 7206c3fb27SDimitry Andric "during ThinLTO backend"); 7306c3fb27SDimitry Andric STATISTIC( 7406c3fb27SDimitry Andric AllocVersionsThinBackend, 7506c3fb27SDimitry Andric "Number of allocation versions (including clones) during ThinLTO backend"); 7606c3fb27SDimitry Andric STATISTIC(MaxAllocVersionsThinBackend, 7706c3fb27SDimitry Andric "Maximum number of allocation versions created for an original " 7806c3fb27SDimitry Andric "allocation during ThinLTO backend"); 7906c3fb27SDimitry Andric STATISTIC(UnclonableAllocsThinBackend, 8006c3fb27SDimitry Andric "Number of unclonable ambigous allocations during ThinLTO backend"); 81297eecfbSDimitry Andric STATISTIC(RemovedEdgesWithMismatchedCallees, 82297eecfbSDimitry Andric "Number of edges removed due to mismatched callees (profiled vs IR)"); 83297eecfbSDimitry Andric STATISTIC(FoundProfiledCalleeCount, 84297eecfbSDimitry Andric "Number of profiled callees found via tail calls"); 85297eecfbSDimitry Andric STATISTIC(FoundProfiledCalleeDepth, 86297eecfbSDimitry Andric "Aggregate depth of profiled callees found via tail calls"); 87297eecfbSDimitry Andric STATISTIC(FoundProfiledCalleeMaxDepth, 88297eecfbSDimitry Andric "Maximum depth of profiled callees found via tail calls"); 89297eecfbSDimitry Andric STATISTIC(FoundProfiledCalleeNonUniquelyCount, 90297eecfbSDimitry Andric "Number of profiled callees found via multiple tail call chains"); 9106c3fb27SDimitry Andric 9206c3fb27SDimitry Andric static cl::opt<std::string> DotFilePathPrefix( 9306c3fb27SDimitry Andric "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, 9406c3fb27SDimitry Andric cl::value_desc("filename"), 9506c3fb27SDimitry Andric cl::desc("Specify the path prefix of the MemProf dot files.")); 9606c3fb27SDimitry Andric 9706c3fb27SDimitry Andric static cl::opt<bool> ExportToDot("memprof-export-to-dot", cl::init(false), 9806c3fb27SDimitry Andric cl::Hidden, 9906c3fb27SDimitry Andric cl::desc("Export graph to dot files.")); 10006c3fb27SDimitry Andric 10106c3fb27SDimitry Andric static cl::opt<bool> 10206c3fb27SDimitry Andric DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, 10306c3fb27SDimitry Andric cl::desc("Dump CallingContextGraph to stdout after each stage.")); 10406c3fb27SDimitry Andric 10506c3fb27SDimitry Andric static cl::opt<bool> 10606c3fb27SDimitry Andric VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, 10706c3fb27SDimitry Andric cl::desc("Perform verification checks on CallingContextGraph.")); 10806c3fb27SDimitry Andric 10906c3fb27SDimitry Andric static cl::opt<bool> 11006c3fb27SDimitry Andric VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, 11106c3fb27SDimitry Andric cl::desc("Perform frequent verification checks on nodes.")); 11206c3fb27SDimitry Andric 11306c3fb27SDimitry Andric static cl::opt<std::string> MemProfImportSummary( 11406c3fb27SDimitry Andric "memprof-import-summary", 11506c3fb27SDimitry Andric cl::desc("Import summary to use for testing the ThinLTO backend via opt"), 11606c3fb27SDimitry Andric cl::Hidden); 11706c3fb27SDimitry Andric 118297eecfbSDimitry Andric static cl::opt<unsigned> 119297eecfbSDimitry Andric TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), 120297eecfbSDimitry Andric cl::Hidden, 121297eecfbSDimitry Andric cl::desc("Max depth to recursively search for missing " 122297eecfbSDimitry Andric "frames through tail calls.")); 123297eecfbSDimitry Andric 1245f757f3fSDimitry Andric namespace llvm { 12506c3fb27SDimitry Andric // Indicate we are linking with an allocator that supports hot/cold operator 12606c3fb27SDimitry Andric // new interfaces. 12706c3fb27SDimitry Andric cl::opt<bool> SupportsHotColdNew( 12806c3fb27SDimitry Andric "supports-hot-cold-new", cl::init(false), cl::Hidden, 12906c3fb27SDimitry Andric cl::desc("Linking with hot/cold operator new interfaces")); 1305f757f3fSDimitry Andric } // namespace llvm 13106c3fb27SDimitry Andric 13206c3fb27SDimitry Andric namespace { 13306c3fb27SDimitry Andric /// CRTP base for graphs built from either IR or ThinLTO summary index. 13406c3fb27SDimitry Andric /// 13506c3fb27SDimitry Andric /// The graph represents the call contexts in all memprof metadata on allocation 13606c3fb27SDimitry Andric /// calls, with nodes for the allocations themselves, as well as for the calls 13706c3fb27SDimitry Andric /// in each context. The graph is initially built from the allocation memprof 13806c3fb27SDimitry Andric /// metadata (or summary) MIBs. It is then updated to match calls with callsite 13906c3fb27SDimitry Andric /// metadata onto the nodes, updating it to reflect any inlining performed on 14006c3fb27SDimitry Andric /// those calls. 14106c3fb27SDimitry Andric /// 14206c3fb27SDimitry Andric /// Each MIB (representing an allocation's call context with allocation 14306c3fb27SDimitry Andric /// behavior) is assigned a unique context id during the graph build. The edges 14406c3fb27SDimitry Andric /// and nodes in the graph are decorated with the context ids they carry. This 14506c3fb27SDimitry Andric /// is used to correctly update the graph when cloning is performed so that we 14606c3fb27SDimitry Andric /// can uniquify the context for a single (possibly cloned) allocation. 14706c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 14806c3fb27SDimitry Andric class CallsiteContextGraph { 14906c3fb27SDimitry Andric public: 15006c3fb27SDimitry Andric CallsiteContextGraph() = default; 15106c3fb27SDimitry Andric CallsiteContextGraph(const CallsiteContextGraph &) = default; 15206c3fb27SDimitry Andric CallsiteContextGraph(CallsiteContextGraph &&) = default; 15306c3fb27SDimitry Andric 15406c3fb27SDimitry Andric /// Main entry point to perform analysis and transformations on graph. 15506c3fb27SDimitry Andric bool process(); 15606c3fb27SDimitry Andric 15706c3fb27SDimitry Andric /// Perform cloning on the graph necessary to uniquely identify the allocation 15806c3fb27SDimitry Andric /// behavior of an allocation based on its context. 15906c3fb27SDimitry Andric void identifyClones(); 16006c3fb27SDimitry Andric 16106c3fb27SDimitry Andric /// Assign callsite clones to functions, cloning functions as needed to 16206c3fb27SDimitry Andric /// accommodate the combinations of their callsite clones reached by callers. 16306c3fb27SDimitry Andric /// For regular LTO this clones functions and callsites in the IR, but for 16406c3fb27SDimitry Andric /// ThinLTO the cloning decisions are noted in the summaries and later applied 16506c3fb27SDimitry Andric /// in applyImport. 16606c3fb27SDimitry Andric bool assignFunctions(); 16706c3fb27SDimitry Andric 16806c3fb27SDimitry Andric void dump() const; 16906c3fb27SDimitry Andric void print(raw_ostream &OS) const; 17006c3fb27SDimitry Andric 17106c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, 17206c3fb27SDimitry Andric const CallsiteContextGraph &CCG) { 17306c3fb27SDimitry Andric CCG.print(OS); 17406c3fb27SDimitry Andric return OS; 17506c3fb27SDimitry Andric } 17606c3fb27SDimitry Andric 17706c3fb27SDimitry Andric friend struct GraphTraits< 17806c3fb27SDimitry Andric const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>; 17906c3fb27SDimitry Andric friend struct DOTGraphTraits< 18006c3fb27SDimitry Andric const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>; 18106c3fb27SDimitry Andric 18206c3fb27SDimitry Andric void exportToDot(std::string Label) const; 18306c3fb27SDimitry Andric 18406c3fb27SDimitry Andric /// Represents a function clone via FuncTy pointer and clone number pair. 18506c3fb27SDimitry Andric struct FuncInfo final 18606c3fb27SDimitry Andric : public std::pair<FuncTy *, unsigned /*Clone number*/> { 18706c3fb27SDimitry Andric using Base = std::pair<FuncTy *, unsigned>; 18806c3fb27SDimitry Andric FuncInfo(const Base &B) : Base(B) {} 18906c3fb27SDimitry Andric FuncInfo(FuncTy *F = nullptr, unsigned CloneNo = 0) : Base(F, CloneNo) {} 19006c3fb27SDimitry Andric explicit operator bool() const { return this->first != nullptr; } 19106c3fb27SDimitry Andric FuncTy *func() const { return this->first; } 19206c3fb27SDimitry Andric unsigned cloneNo() const { return this->second; } 19306c3fb27SDimitry Andric }; 19406c3fb27SDimitry Andric 19506c3fb27SDimitry Andric /// Represents a callsite clone via CallTy and clone number pair. 19606c3fb27SDimitry Andric struct CallInfo final : public std::pair<CallTy, unsigned /*Clone number*/> { 19706c3fb27SDimitry Andric using Base = std::pair<CallTy, unsigned>; 19806c3fb27SDimitry Andric CallInfo(const Base &B) : Base(B) {} 19906c3fb27SDimitry Andric CallInfo(CallTy Call = nullptr, unsigned CloneNo = 0) 20006c3fb27SDimitry Andric : Base(Call, CloneNo) {} 20106c3fb27SDimitry Andric explicit operator bool() const { return (bool)this->first; } 20206c3fb27SDimitry Andric CallTy call() const { return this->first; } 20306c3fb27SDimitry Andric unsigned cloneNo() const { return this->second; } 20406c3fb27SDimitry Andric void setCloneNo(unsigned N) { this->second = N; } 20506c3fb27SDimitry Andric void print(raw_ostream &OS) const { 20606c3fb27SDimitry Andric if (!operator bool()) { 20706c3fb27SDimitry Andric assert(!cloneNo()); 20806c3fb27SDimitry Andric OS << "null Call"; 20906c3fb27SDimitry Andric return; 21006c3fb27SDimitry Andric } 21106c3fb27SDimitry Andric call()->print(OS); 21206c3fb27SDimitry Andric OS << "\t(clone " << cloneNo() << ")"; 21306c3fb27SDimitry Andric } 21406c3fb27SDimitry Andric void dump() const { 21506c3fb27SDimitry Andric print(dbgs()); 21606c3fb27SDimitry Andric dbgs() << "\n"; 21706c3fb27SDimitry Andric } 21806c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, const CallInfo &Call) { 21906c3fb27SDimitry Andric Call.print(OS); 22006c3fb27SDimitry Andric return OS; 22106c3fb27SDimitry Andric } 22206c3fb27SDimitry Andric }; 22306c3fb27SDimitry Andric 22406c3fb27SDimitry Andric struct ContextEdge; 22506c3fb27SDimitry Andric 22606c3fb27SDimitry Andric /// Node in the Callsite Context Graph 22706c3fb27SDimitry Andric struct ContextNode { 22806c3fb27SDimitry Andric // Keep this for now since in the IR case where we have an Instruction* it 22906c3fb27SDimitry Andric // is not as immediately discoverable. Used for printing richer information 23006c3fb27SDimitry Andric // when dumping graph. 23106c3fb27SDimitry Andric bool IsAllocation; 23206c3fb27SDimitry Andric 23306c3fb27SDimitry Andric // Keeps track of when the Call was reset to null because there was 23406c3fb27SDimitry Andric // recursion. 23506c3fb27SDimitry Andric bool Recursive = false; 23606c3fb27SDimitry Andric 23706c3fb27SDimitry Andric // The corresponding allocation or interior call. 23806c3fb27SDimitry Andric CallInfo Call; 23906c3fb27SDimitry Andric 24006c3fb27SDimitry Andric // For alloc nodes this is a unique id assigned when constructed, and for 24106c3fb27SDimitry Andric // callsite stack nodes it is the original stack id when the node is 24206c3fb27SDimitry Andric // constructed from the memprof MIB metadata on the alloc nodes. Note that 24306c3fb27SDimitry Andric // this is only used when matching callsite metadata onto the stack nodes 24406c3fb27SDimitry Andric // created when processing the allocation memprof MIBs, and for labeling 24506c3fb27SDimitry Andric // nodes in the dot graph. Therefore we don't bother to assign a value for 24606c3fb27SDimitry Andric // clones. 24706c3fb27SDimitry Andric uint64_t OrigStackOrAllocId = 0; 24806c3fb27SDimitry Andric 24906c3fb27SDimitry Andric // This will be formed by ORing together the AllocationType enum values 25006c3fb27SDimitry Andric // for contexts including this node. 25106c3fb27SDimitry Andric uint8_t AllocTypes = 0; 25206c3fb27SDimitry Andric 25306c3fb27SDimitry Andric // Edges to all callees in the profiled call stacks. 25406c3fb27SDimitry Andric // TODO: Should this be a map (from Callee node) for more efficient lookup? 25506c3fb27SDimitry Andric std::vector<std::shared_ptr<ContextEdge>> CalleeEdges; 25606c3fb27SDimitry Andric 25706c3fb27SDimitry Andric // Edges to all callers in the profiled call stacks. 25806c3fb27SDimitry Andric // TODO: Should this be a map (from Caller node) for more efficient lookup? 25906c3fb27SDimitry Andric std::vector<std::shared_ptr<ContextEdge>> CallerEdges; 26006c3fb27SDimitry Andric 26106c3fb27SDimitry Andric // The set of IDs for contexts including this node. 26206c3fb27SDimitry Andric DenseSet<uint32_t> ContextIds; 26306c3fb27SDimitry Andric 26406c3fb27SDimitry Andric // List of clones of this ContextNode, initially empty. 26506c3fb27SDimitry Andric std::vector<ContextNode *> Clones; 26606c3fb27SDimitry Andric 26706c3fb27SDimitry Andric // If a clone, points to the original uncloned node. 26806c3fb27SDimitry Andric ContextNode *CloneOf = nullptr; 26906c3fb27SDimitry Andric 27006c3fb27SDimitry Andric ContextNode(bool IsAllocation) : IsAllocation(IsAllocation), Call() {} 27106c3fb27SDimitry Andric 27206c3fb27SDimitry Andric ContextNode(bool IsAllocation, CallInfo C) 27306c3fb27SDimitry Andric : IsAllocation(IsAllocation), Call(C) {} 27406c3fb27SDimitry Andric 27506c3fb27SDimitry Andric void addClone(ContextNode *Clone) { 27606c3fb27SDimitry Andric if (CloneOf) { 27706c3fb27SDimitry Andric CloneOf->Clones.push_back(Clone); 27806c3fb27SDimitry Andric Clone->CloneOf = CloneOf; 27906c3fb27SDimitry Andric } else { 28006c3fb27SDimitry Andric Clones.push_back(Clone); 28106c3fb27SDimitry Andric assert(!Clone->CloneOf); 28206c3fb27SDimitry Andric Clone->CloneOf = this; 28306c3fb27SDimitry Andric } 28406c3fb27SDimitry Andric } 28506c3fb27SDimitry Andric 28606c3fb27SDimitry Andric ContextNode *getOrigNode() { 28706c3fb27SDimitry Andric if (!CloneOf) 28806c3fb27SDimitry Andric return this; 28906c3fb27SDimitry Andric return CloneOf; 29006c3fb27SDimitry Andric } 29106c3fb27SDimitry Andric 29206c3fb27SDimitry Andric void addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, 29306c3fb27SDimitry Andric unsigned int ContextId); 29406c3fb27SDimitry Andric 29506c3fb27SDimitry Andric ContextEdge *findEdgeFromCallee(const ContextNode *Callee); 29606c3fb27SDimitry Andric ContextEdge *findEdgeFromCaller(const ContextNode *Caller); 29706c3fb27SDimitry Andric void eraseCalleeEdge(const ContextEdge *Edge); 29806c3fb27SDimitry Andric void eraseCallerEdge(const ContextEdge *Edge); 29906c3fb27SDimitry Andric 30006c3fb27SDimitry Andric void setCall(CallInfo C) { Call = C; } 30106c3fb27SDimitry Andric 30206c3fb27SDimitry Andric bool hasCall() const { return (bool)Call.call(); } 30306c3fb27SDimitry Andric 30406c3fb27SDimitry Andric void printCall(raw_ostream &OS) const { Call.print(OS); } 30506c3fb27SDimitry Andric 30606c3fb27SDimitry Andric // True if this node was effectively removed from the graph, in which case 30706c3fb27SDimitry Andric // its context id set, caller edges, and callee edges should all be empty. 30806c3fb27SDimitry Andric bool isRemoved() const { 30906c3fb27SDimitry Andric assert(ContextIds.empty() == 31006c3fb27SDimitry Andric (CalleeEdges.empty() && CallerEdges.empty())); 31106c3fb27SDimitry Andric return ContextIds.empty(); 31206c3fb27SDimitry Andric } 31306c3fb27SDimitry Andric 31406c3fb27SDimitry Andric void dump() const; 31506c3fb27SDimitry Andric void print(raw_ostream &OS) const; 31606c3fb27SDimitry Andric 31706c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, const ContextNode &Node) { 31806c3fb27SDimitry Andric Node.print(OS); 31906c3fb27SDimitry Andric return OS; 32006c3fb27SDimitry Andric } 32106c3fb27SDimitry Andric }; 32206c3fb27SDimitry Andric 32306c3fb27SDimitry Andric /// Edge in the Callsite Context Graph from a ContextNode N to a caller or 32406c3fb27SDimitry Andric /// callee. 32506c3fb27SDimitry Andric struct ContextEdge { 32606c3fb27SDimitry Andric ContextNode *Callee; 32706c3fb27SDimitry Andric ContextNode *Caller; 32806c3fb27SDimitry Andric 32906c3fb27SDimitry Andric // This will be formed by ORing together the AllocationType enum values 33006c3fb27SDimitry Andric // for contexts including this edge. 33106c3fb27SDimitry Andric uint8_t AllocTypes = 0; 33206c3fb27SDimitry Andric 33306c3fb27SDimitry Andric // The set of IDs for contexts including this edge. 33406c3fb27SDimitry Andric DenseSet<uint32_t> ContextIds; 33506c3fb27SDimitry Andric 33606c3fb27SDimitry Andric ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t AllocType, 33706c3fb27SDimitry Andric DenseSet<uint32_t> ContextIds) 33806c3fb27SDimitry Andric : Callee(Callee), Caller(Caller), AllocTypes(AllocType), 33906c3fb27SDimitry Andric ContextIds(ContextIds) {} 34006c3fb27SDimitry Andric 34106c3fb27SDimitry Andric DenseSet<uint32_t> &getContextIds() { return ContextIds; } 34206c3fb27SDimitry Andric 34306c3fb27SDimitry Andric void dump() const; 34406c3fb27SDimitry Andric void print(raw_ostream &OS) const; 34506c3fb27SDimitry Andric 34606c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, const ContextEdge &Edge) { 34706c3fb27SDimitry Andric Edge.print(OS); 34806c3fb27SDimitry Andric return OS; 34906c3fb27SDimitry Andric } 35006c3fb27SDimitry Andric }; 35106c3fb27SDimitry Andric 35206c3fb27SDimitry Andric /// Helper to remove callee edges that have allocation type None (due to not 35306c3fb27SDimitry Andric /// carrying any context ids) after transformations. 35406c3fb27SDimitry Andric void removeNoneTypeCalleeEdges(ContextNode *Node); 35506c3fb27SDimitry Andric 35606c3fb27SDimitry Andric protected: 35706c3fb27SDimitry Andric /// Get a list of nodes corresponding to the stack ids in the given callsite 35806c3fb27SDimitry Andric /// context. 35906c3fb27SDimitry Andric template <class NodeT, class IteratorT> 36006c3fb27SDimitry Andric std::vector<uint64_t> 36106c3fb27SDimitry Andric getStackIdsWithContextNodes(CallStack<NodeT, IteratorT> &CallsiteContext); 36206c3fb27SDimitry Andric 36306c3fb27SDimitry Andric /// Adds nodes for the given allocation and any stack ids on its memprof MIB 36406c3fb27SDimitry Andric /// metadata (or summary). 36506c3fb27SDimitry Andric ContextNode *addAllocNode(CallInfo Call, const FuncTy *F); 36606c3fb27SDimitry Andric 36706c3fb27SDimitry Andric /// Adds nodes for the given MIB stack ids. 36806c3fb27SDimitry Andric template <class NodeT, class IteratorT> 36906c3fb27SDimitry Andric void addStackNodesForMIB(ContextNode *AllocNode, 37006c3fb27SDimitry Andric CallStack<NodeT, IteratorT> &StackContext, 37106c3fb27SDimitry Andric CallStack<NodeT, IteratorT> &CallsiteContext, 37206c3fb27SDimitry Andric AllocationType AllocType); 37306c3fb27SDimitry Andric 37406c3fb27SDimitry Andric /// Matches all callsite metadata (or summary) to the nodes created for 37506c3fb27SDimitry Andric /// allocation memprof MIB metadata, synthesizing new nodes to reflect any 37606c3fb27SDimitry Andric /// inlining performed on those callsite instructions. 37706c3fb27SDimitry Andric void updateStackNodes(); 37806c3fb27SDimitry Andric 37906c3fb27SDimitry Andric /// Update graph to conservatively handle any callsite stack nodes that target 38006c3fb27SDimitry Andric /// multiple different callee target functions. 38106c3fb27SDimitry Andric void handleCallsitesWithMultipleTargets(); 38206c3fb27SDimitry Andric 38306c3fb27SDimitry Andric /// Save lists of calls with MemProf metadata in each function, for faster 38406c3fb27SDimitry Andric /// iteration. 385297eecfbSDimitry Andric MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata; 38606c3fb27SDimitry Andric 38706c3fb27SDimitry Andric /// Map from callsite node to the enclosing caller function. 38806c3fb27SDimitry Andric std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc; 38906c3fb27SDimitry Andric 39006c3fb27SDimitry Andric private: 39106c3fb27SDimitry Andric using EdgeIter = typename std::vector<std::shared_ptr<ContextEdge>>::iterator; 39206c3fb27SDimitry Andric 39306c3fb27SDimitry Andric using CallContextInfo = std::tuple<CallTy, std::vector<uint64_t>, 39406c3fb27SDimitry Andric const FuncTy *, DenseSet<uint32_t>>; 39506c3fb27SDimitry Andric 39606c3fb27SDimitry Andric /// Assigns the given Node to calls at or inlined into the location with 39706c3fb27SDimitry Andric /// the Node's stack id, after post order traversing and processing its 39806c3fb27SDimitry Andric /// caller nodes. Uses the call information recorded in the given 39906c3fb27SDimitry Andric /// StackIdToMatchingCalls map, and creates new nodes for inlined sequences 40006c3fb27SDimitry Andric /// as needed. Called by updateStackNodes which sets up the given 40106c3fb27SDimitry Andric /// StackIdToMatchingCalls map. 40206c3fb27SDimitry Andric void assignStackNodesPostOrder( 40306c3fb27SDimitry Andric ContextNode *Node, DenseSet<const ContextNode *> &Visited, 40406c3fb27SDimitry Andric DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls); 40506c3fb27SDimitry Andric 40606c3fb27SDimitry Andric /// Duplicates the given set of context ids, updating the provided 40706c3fb27SDimitry Andric /// map from each original id with the newly generated context ids, 40806c3fb27SDimitry Andric /// and returning the new duplicated id set. 40906c3fb27SDimitry Andric DenseSet<uint32_t> duplicateContextIds( 41006c3fb27SDimitry Andric const DenseSet<uint32_t> &StackSequenceContextIds, 41106c3fb27SDimitry Andric DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds); 41206c3fb27SDimitry Andric 41306c3fb27SDimitry Andric /// Propagates all duplicated context ids across the graph. 41406c3fb27SDimitry Andric void propagateDuplicateContextIds( 41506c3fb27SDimitry Andric const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds); 41606c3fb27SDimitry Andric 41706c3fb27SDimitry Andric /// Connect the NewNode to OrigNode's callees if TowardsCallee is true, 41806c3fb27SDimitry Andric /// else to its callers. Also updates OrigNode's edges to remove any context 41906c3fb27SDimitry Andric /// ids moved to the newly created edge. 42006c3fb27SDimitry Andric void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode, 42106c3fb27SDimitry Andric bool TowardsCallee); 42206c3fb27SDimitry Andric 42306c3fb27SDimitry Andric /// Get the stack id corresponding to the given Id or Index (for IR this will 42406c3fb27SDimitry Andric /// return itself, for a summary index this will return the id recorded in the 42506c3fb27SDimitry Andric /// index for that stack id index value). 42606c3fb27SDimitry Andric uint64_t getStackId(uint64_t IdOrIndex) const { 42706c3fb27SDimitry Andric return static_cast<const DerivedCCG *>(this)->getStackId(IdOrIndex); 42806c3fb27SDimitry Andric } 42906c3fb27SDimitry Andric 430297eecfbSDimitry Andric /// Returns true if the given call targets the callee of the given edge, or if 431297eecfbSDimitry Andric /// we were able to identify the call chain through intermediate tail calls. 432297eecfbSDimitry Andric /// In the latter case new context nodes are added to the graph for the 433297eecfbSDimitry Andric /// identified tail calls, and their synthesized nodes are added to 434297eecfbSDimitry Andric /// TailCallToContextNodeMap. The EdgeIter is updated in either case to the 435297eecfbSDimitry Andric /// next element after the input position (either incremented or updated after 436297eecfbSDimitry Andric /// removing the old edge). 437297eecfbSDimitry Andric bool 438297eecfbSDimitry Andric calleesMatch(CallTy Call, EdgeIter &EI, 439297eecfbSDimitry Andric MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap); 440297eecfbSDimitry Andric 441297eecfbSDimitry Andric /// Returns true if the given call targets the given function, or if we were 442297eecfbSDimitry Andric /// able to identify the call chain through intermediate tail calls (in which 443297eecfbSDimitry Andric /// case FoundCalleeChain will be populated). 444297eecfbSDimitry Andric bool calleeMatchesFunc( 445297eecfbSDimitry Andric CallTy Call, const FuncTy *Func, const FuncTy *CallerFunc, 446297eecfbSDimitry Andric std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) { 447297eecfbSDimitry Andric return static_cast<DerivedCCG *>(this)->calleeMatchesFunc( 448297eecfbSDimitry Andric Call, Func, CallerFunc, FoundCalleeChain); 44906c3fb27SDimitry Andric } 45006c3fb27SDimitry Andric 45106c3fb27SDimitry Andric /// Get a list of nodes corresponding to the stack ids in the given 45206c3fb27SDimitry Andric /// callsite's context. 45306c3fb27SDimitry Andric std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) { 45406c3fb27SDimitry Andric return static_cast<DerivedCCG *>(this)->getStackIdsWithContextNodesForCall( 45506c3fb27SDimitry Andric Call); 45606c3fb27SDimitry Andric } 45706c3fb27SDimitry Andric 45806c3fb27SDimitry Andric /// Get the last stack id in the context for callsite. 45906c3fb27SDimitry Andric uint64_t getLastStackId(CallTy Call) { 46006c3fb27SDimitry Andric return static_cast<DerivedCCG *>(this)->getLastStackId(Call); 46106c3fb27SDimitry Andric } 46206c3fb27SDimitry Andric 46306c3fb27SDimitry Andric /// Update the allocation call to record type of allocated memory. 46406c3fb27SDimitry Andric void updateAllocationCall(CallInfo &Call, AllocationType AllocType) { 46506c3fb27SDimitry Andric AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++; 46606c3fb27SDimitry Andric static_cast<DerivedCCG *>(this)->updateAllocationCall(Call, AllocType); 46706c3fb27SDimitry Andric } 46806c3fb27SDimitry Andric 46906c3fb27SDimitry Andric /// Update non-allocation call to invoke (possibly cloned) function 47006c3fb27SDimitry Andric /// CalleeFunc. 47106c3fb27SDimitry Andric void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) { 47206c3fb27SDimitry Andric static_cast<DerivedCCG *>(this)->updateCall(CallerCall, CalleeFunc); 47306c3fb27SDimitry Andric } 47406c3fb27SDimitry Andric 47506c3fb27SDimitry Andric /// Clone the given function for the given callsite, recording mapping of all 47606c3fb27SDimitry Andric /// of the functions tracked calls to their new versions in the CallMap. 47706c3fb27SDimitry Andric /// Assigns new clones to clone number CloneNo. 47806c3fb27SDimitry Andric FuncInfo cloneFunctionForCallsite( 47906c3fb27SDimitry Andric FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap, 48006c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) { 48106c3fb27SDimitry Andric return static_cast<DerivedCCG *>(this)->cloneFunctionForCallsite( 48206c3fb27SDimitry Andric Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo); 48306c3fb27SDimitry Andric } 48406c3fb27SDimitry Andric 48506c3fb27SDimitry Andric /// Gets a label to use in the dot graph for the given call clone in the given 48606c3fb27SDimitry Andric /// function. 48706c3fb27SDimitry Andric std::string getLabel(const FuncTy *Func, const CallTy Call, 48806c3fb27SDimitry Andric unsigned CloneNo) const { 48906c3fb27SDimitry Andric return static_cast<const DerivedCCG *>(this)->getLabel(Func, Call, CloneNo); 49006c3fb27SDimitry Andric } 49106c3fb27SDimitry Andric 49206c3fb27SDimitry Andric /// Helpers to find the node corresponding to the given call or stackid. 49306c3fb27SDimitry Andric ContextNode *getNodeForInst(const CallInfo &C); 49406c3fb27SDimitry Andric ContextNode *getNodeForAlloc(const CallInfo &C); 49506c3fb27SDimitry Andric ContextNode *getNodeForStackId(uint64_t StackId); 49606c3fb27SDimitry Andric 49706c3fb27SDimitry Andric /// Removes the node information recorded for the given call. 49806c3fb27SDimitry Andric void unsetNodeForInst(const CallInfo &C); 49906c3fb27SDimitry Andric 50006c3fb27SDimitry Andric /// Computes the alloc type corresponding to the given context ids, by 50106c3fb27SDimitry Andric /// unioning their recorded alloc types. 50206c3fb27SDimitry Andric uint8_t computeAllocType(DenseSet<uint32_t> &ContextIds); 50306c3fb27SDimitry Andric 50406c3fb27SDimitry Andric /// Returns the alloction type of the intersection of the contexts of two 50506c3fb27SDimitry Andric /// nodes (based on their provided context id sets), optimized for the case 50606c3fb27SDimitry Andric /// when Node1Ids is smaller than Node2Ids. 50706c3fb27SDimitry Andric uint8_t intersectAllocTypesImpl(const DenseSet<uint32_t> &Node1Ids, 50806c3fb27SDimitry Andric const DenseSet<uint32_t> &Node2Ids); 50906c3fb27SDimitry Andric 51006c3fb27SDimitry Andric /// Returns the alloction type of the intersection of the contexts of two 51106c3fb27SDimitry Andric /// nodes (based on their provided context id sets). 51206c3fb27SDimitry Andric uint8_t intersectAllocTypes(const DenseSet<uint32_t> &Node1Ids, 51306c3fb27SDimitry Andric const DenseSet<uint32_t> &Node2Ids); 51406c3fb27SDimitry Andric 51506c3fb27SDimitry Andric /// Create a clone of Edge's callee and move Edge to that new callee node, 51606c3fb27SDimitry Andric /// performing the necessary context id and allocation type updates. 51706c3fb27SDimitry Andric /// If callee's caller edge iterator is supplied, it is updated when removing 51806c3fb27SDimitry Andric /// the edge from that list. 51906c3fb27SDimitry Andric ContextNode * 52006c3fb27SDimitry Andric moveEdgeToNewCalleeClone(const std::shared_ptr<ContextEdge> &Edge, 52106c3fb27SDimitry Andric EdgeIter *CallerEdgeI = nullptr); 52206c3fb27SDimitry Andric 52306c3fb27SDimitry Andric /// Change the callee of Edge to existing callee clone NewCallee, performing 52406c3fb27SDimitry Andric /// the necessary context id and allocation type updates. 52506c3fb27SDimitry Andric /// If callee's caller edge iterator is supplied, it is updated when removing 52606c3fb27SDimitry Andric /// the edge from that list. 52706c3fb27SDimitry Andric void moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge, 52806c3fb27SDimitry Andric ContextNode *NewCallee, 52906c3fb27SDimitry Andric EdgeIter *CallerEdgeI = nullptr, 53006c3fb27SDimitry Andric bool NewClone = false); 53106c3fb27SDimitry Andric 53206c3fb27SDimitry Andric /// Recursively perform cloning on the graph for the given Node and its 53306c3fb27SDimitry Andric /// callers, in order to uniquely identify the allocation behavior of an 53406c3fb27SDimitry Andric /// allocation given its context. 53506c3fb27SDimitry Andric void identifyClones(ContextNode *Node, 53606c3fb27SDimitry Andric DenseSet<const ContextNode *> &Visited); 53706c3fb27SDimitry Andric 53806c3fb27SDimitry Andric /// Map from each context ID to the AllocationType assigned to that context. 53906c3fb27SDimitry Andric std::map<uint32_t, AllocationType> ContextIdToAllocationType; 54006c3fb27SDimitry Andric 54106c3fb27SDimitry Andric /// Identifies the context node created for a stack id when adding the MIB 54206c3fb27SDimitry Andric /// contexts to the graph. This is used to locate the context nodes when 54306c3fb27SDimitry Andric /// trying to assign the corresponding callsites with those stack ids to these 54406c3fb27SDimitry Andric /// nodes. 54506c3fb27SDimitry Andric std::map<uint64_t, ContextNode *> StackEntryIdToContextNodeMap; 54606c3fb27SDimitry Andric 54706c3fb27SDimitry Andric /// Maps to track the calls to their corresponding nodes in the graph. 54806c3fb27SDimitry Andric MapVector<CallInfo, ContextNode *> AllocationCallToContextNodeMap; 54906c3fb27SDimitry Andric MapVector<CallInfo, ContextNode *> NonAllocationCallToContextNodeMap; 55006c3fb27SDimitry Andric 55106c3fb27SDimitry Andric /// Owner of all ContextNode unique_ptrs. 55206c3fb27SDimitry Andric std::vector<std::unique_ptr<ContextNode>> NodeOwner; 55306c3fb27SDimitry Andric 55406c3fb27SDimitry Andric /// Perform sanity checks on graph when requested. 55506c3fb27SDimitry Andric void check() const; 55606c3fb27SDimitry Andric 55706c3fb27SDimitry Andric /// Keeps track of the last unique context id assigned. 55806c3fb27SDimitry Andric unsigned int LastContextId = 0; 55906c3fb27SDimitry Andric }; 56006c3fb27SDimitry Andric 56106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 56206c3fb27SDimitry Andric using ContextNode = 56306c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode; 56406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 56506c3fb27SDimitry Andric using ContextEdge = 56606c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge; 56706c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 56806c3fb27SDimitry Andric using FuncInfo = 56906c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo; 57006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 57106c3fb27SDimitry Andric using CallInfo = 57206c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo; 57306c3fb27SDimitry Andric 57406c3fb27SDimitry Andric /// CRTP derived class for graphs built from IR (regular LTO). 57506c3fb27SDimitry Andric class ModuleCallsiteContextGraph 57606c3fb27SDimitry Andric : public CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 57706c3fb27SDimitry Andric Instruction *> { 57806c3fb27SDimitry Andric public: 57906c3fb27SDimitry Andric ModuleCallsiteContextGraph( 58006c3fb27SDimitry Andric Module &M, 581*7a6dacacSDimitry Andric llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter); 58206c3fb27SDimitry Andric 58306c3fb27SDimitry Andric private: 58406c3fb27SDimitry Andric friend CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 58506c3fb27SDimitry Andric Instruction *>; 58606c3fb27SDimitry Andric 58706c3fb27SDimitry Andric uint64_t getStackId(uint64_t IdOrIndex) const; 588297eecfbSDimitry Andric bool calleeMatchesFunc( 589297eecfbSDimitry Andric Instruction *Call, const Function *Func, const Function *CallerFunc, 590297eecfbSDimitry Andric std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain); 591297eecfbSDimitry Andric bool findProfiledCalleeThroughTailCalls( 592297eecfbSDimitry Andric const Function *ProfiledCallee, Value *CurCallee, unsigned Depth, 593297eecfbSDimitry Andric std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain, 594297eecfbSDimitry Andric bool &FoundMultipleCalleeChains); 59506c3fb27SDimitry Andric uint64_t getLastStackId(Instruction *Call); 59606c3fb27SDimitry Andric std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *Call); 59706c3fb27SDimitry Andric void updateAllocationCall(CallInfo &Call, AllocationType AllocType); 59806c3fb27SDimitry Andric void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc); 59906c3fb27SDimitry Andric CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 60006c3fb27SDimitry Andric Instruction *>::FuncInfo 60106c3fb27SDimitry Andric cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call, 60206c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap, 60306c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, 60406c3fb27SDimitry Andric unsigned CloneNo); 60506c3fb27SDimitry Andric std::string getLabel(const Function *Func, const Instruction *Call, 60606c3fb27SDimitry Andric unsigned CloneNo) const; 60706c3fb27SDimitry Andric 60806c3fb27SDimitry Andric const Module &Mod; 609*7a6dacacSDimitry Andric llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter; 61006c3fb27SDimitry Andric }; 61106c3fb27SDimitry Andric 61206c3fb27SDimitry Andric /// Represents a call in the summary index graph, which can either be an 61306c3fb27SDimitry Andric /// allocation or an interior callsite node in an allocation's context. 61406c3fb27SDimitry Andric /// Holds a pointer to the corresponding data structure in the index. 61506c3fb27SDimitry Andric struct IndexCall : public PointerUnion<CallsiteInfo *, AllocInfo *> { 61606c3fb27SDimitry Andric IndexCall() : PointerUnion() {} 61706c3fb27SDimitry Andric IndexCall(std::nullptr_t) : IndexCall() {} 61806c3fb27SDimitry Andric IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {} 61906c3fb27SDimitry Andric IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {} 62006c3fb27SDimitry Andric IndexCall(PointerUnion PT) : PointerUnion(PT) {} 62106c3fb27SDimitry Andric 62206c3fb27SDimitry Andric IndexCall *operator->() { return this; } 62306c3fb27SDimitry Andric 62406c3fb27SDimitry Andric PointerUnion<CallsiteInfo *, AllocInfo *> getBase() const { return *this; } 62506c3fb27SDimitry Andric 62606c3fb27SDimitry Andric void print(raw_ostream &OS) const { 62706c3fb27SDimitry Andric if (auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(getBase())) { 62806c3fb27SDimitry Andric OS << *AI; 62906c3fb27SDimitry Andric } else { 63006c3fb27SDimitry Andric auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(getBase()); 63106c3fb27SDimitry Andric assert(CI); 63206c3fb27SDimitry Andric OS << *CI; 63306c3fb27SDimitry Andric } 63406c3fb27SDimitry Andric } 63506c3fb27SDimitry Andric }; 63606c3fb27SDimitry Andric 63706c3fb27SDimitry Andric /// CRTP derived class for graphs built from summary index (ThinLTO). 63806c3fb27SDimitry Andric class IndexCallsiteContextGraph 63906c3fb27SDimitry Andric : public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 64006c3fb27SDimitry Andric IndexCall> { 64106c3fb27SDimitry Andric public: 64206c3fb27SDimitry Andric IndexCallsiteContextGraph( 64306c3fb27SDimitry Andric ModuleSummaryIndex &Index, 644*7a6dacacSDimitry Andric llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 64506c3fb27SDimitry Andric isPrevailing); 64606c3fb27SDimitry Andric 647297eecfbSDimitry Andric ~IndexCallsiteContextGraph() { 648297eecfbSDimitry Andric // Now that we are done with the graph it is safe to add the new 649297eecfbSDimitry Andric // CallsiteInfo structs to the function summary vectors. The graph nodes 650297eecfbSDimitry Andric // point into locations within these vectors, so we don't want to add them 651297eecfbSDimitry Andric // any earlier. 652297eecfbSDimitry Andric for (auto &I : FunctionCalleesToSynthesizedCallsiteInfos) { 653297eecfbSDimitry Andric auto *FS = I.first; 654297eecfbSDimitry Andric for (auto &Callsite : I.second) 655297eecfbSDimitry Andric FS->addCallsite(*Callsite.second); 656297eecfbSDimitry Andric } 657297eecfbSDimitry Andric } 658297eecfbSDimitry Andric 65906c3fb27SDimitry Andric private: 66006c3fb27SDimitry Andric friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 66106c3fb27SDimitry Andric IndexCall>; 66206c3fb27SDimitry Andric 66306c3fb27SDimitry Andric uint64_t getStackId(uint64_t IdOrIndex) const; 664297eecfbSDimitry Andric bool calleeMatchesFunc( 665297eecfbSDimitry Andric IndexCall &Call, const FunctionSummary *Func, 666297eecfbSDimitry Andric const FunctionSummary *CallerFunc, 667297eecfbSDimitry Andric std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain); 668297eecfbSDimitry Andric bool findProfiledCalleeThroughTailCalls( 669297eecfbSDimitry Andric ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth, 670297eecfbSDimitry Andric std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain, 671297eecfbSDimitry Andric bool &FoundMultipleCalleeChains); 67206c3fb27SDimitry Andric uint64_t getLastStackId(IndexCall &Call); 67306c3fb27SDimitry Andric std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call); 67406c3fb27SDimitry Andric void updateAllocationCall(CallInfo &Call, AllocationType AllocType); 67506c3fb27SDimitry Andric void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc); 67606c3fb27SDimitry Andric CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 67706c3fb27SDimitry Andric IndexCall>::FuncInfo 67806c3fb27SDimitry Andric cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call, 67906c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap, 68006c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, 68106c3fb27SDimitry Andric unsigned CloneNo); 68206c3fb27SDimitry Andric std::string getLabel(const FunctionSummary *Func, const IndexCall &Call, 68306c3fb27SDimitry Andric unsigned CloneNo) const; 68406c3fb27SDimitry Andric 68506c3fb27SDimitry Andric // Saves mapping from function summaries containing memprof records back to 68606c3fb27SDimitry Andric // its VI, for use in checking and debugging. 68706c3fb27SDimitry Andric std::map<const FunctionSummary *, ValueInfo> FSToVIMap; 68806c3fb27SDimitry Andric 68906c3fb27SDimitry Andric const ModuleSummaryIndex &Index; 690*7a6dacacSDimitry Andric llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 691297eecfbSDimitry Andric isPrevailing; 692297eecfbSDimitry Andric 693297eecfbSDimitry Andric // Saves/owns the callsite info structures synthesized for missing tail call 694297eecfbSDimitry Andric // frames that we discover while building the graph. 695297eecfbSDimitry Andric // It maps from the summary of the function making the tail call, to a map 696297eecfbSDimitry Andric // of callee ValueInfo to corresponding synthesized callsite info. 697297eecfbSDimitry Andric std::unordered_map<FunctionSummary *, 698297eecfbSDimitry Andric std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>> 699297eecfbSDimitry Andric FunctionCalleesToSynthesizedCallsiteInfos; 70006c3fb27SDimitry Andric }; 70106c3fb27SDimitry Andric } // namespace 70206c3fb27SDimitry Andric 70306c3fb27SDimitry Andric namespace llvm { 70406c3fb27SDimitry Andric template <> 70506c3fb27SDimitry Andric struct DenseMapInfo<typename CallsiteContextGraph< 70606c3fb27SDimitry Andric ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo> 70706c3fb27SDimitry Andric : public DenseMapInfo<std::pair<Instruction *, unsigned>> {}; 70806c3fb27SDimitry Andric template <> 70906c3fb27SDimitry Andric struct DenseMapInfo<typename CallsiteContextGraph< 71006c3fb27SDimitry Andric IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo> 71106c3fb27SDimitry Andric : public DenseMapInfo<std::pair<IndexCall, unsigned>> {}; 71206c3fb27SDimitry Andric template <> 71306c3fb27SDimitry Andric struct DenseMapInfo<IndexCall> 71406c3fb27SDimitry Andric : public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {}; 71506c3fb27SDimitry Andric } // end namespace llvm 71606c3fb27SDimitry Andric 71706c3fb27SDimitry Andric namespace { 71806c3fb27SDimitry Andric 71906c3fb27SDimitry Andric struct FieldSeparator { 72006c3fb27SDimitry Andric bool Skip = true; 72106c3fb27SDimitry Andric const char *Sep; 72206c3fb27SDimitry Andric 72306c3fb27SDimitry Andric FieldSeparator(const char *Sep = ", ") : Sep(Sep) {} 72406c3fb27SDimitry Andric }; 72506c3fb27SDimitry Andric 72606c3fb27SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) { 72706c3fb27SDimitry Andric if (FS.Skip) { 72806c3fb27SDimitry Andric FS.Skip = false; 72906c3fb27SDimitry Andric return OS; 73006c3fb27SDimitry Andric } 73106c3fb27SDimitry Andric return OS << FS.Sep; 73206c3fb27SDimitry Andric } 73306c3fb27SDimitry Andric 73406c3fb27SDimitry Andric // Map the uint8_t alloc types (which may contain NotCold|Cold) to the alloc 73506c3fb27SDimitry Andric // type we should actually use on the corresponding allocation. 73606c3fb27SDimitry Andric // If we can't clone a node that has NotCold+Cold alloc type, we will fall 73706c3fb27SDimitry Andric // back to using NotCold. So don't bother cloning to distinguish NotCold+Cold 73806c3fb27SDimitry Andric // from NotCold. 73906c3fb27SDimitry Andric AllocationType allocTypeToUse(uint8_t AllocTypes) { 74006c3fb27SDimitry Andric assert(AllocTypes != (uint8_t)AllocationType::None); 74106c3fb27SDimitry Andric if (AllocTypes == 74206c3fb27SDimitry Andric ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold)) 74306c3fb27SDimitry Andric return AllocationType::NotCold; 74406c3fb27SDimitry Andric else 74506c3fb27SDimitry Andric return (AllocationType)AllocTypes; 74606c3fb27SDimitry Andric } 74706c3fb27SDimitry Andric 74806c3fb27SDimitry Andric // Helper to check if the alloc types for all edges recorded in the 74906c3fb27SDimitry Andric // InAllocTypes vector match the alloc types for all edges in the Edges 75006c3fb27SDimitry Andric // vector. 75106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 75206c3fb27SDimitry Andric bool allocTypesMatch( 75306c3fb27SDimitry Andric const std::vector<uint8_t> &InAllocTypes, 75406c3fb27SDimitry Andric const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>> 75506c3fb27SDimitry Andric &Edges) { 75606c3fb27SDimitry Andric return std::equal( 75706c3fb27SDimitry Andric InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), 75806c3fb27SDimitry Andric [](const uint8_t &l, 75906c3fb27SDimitry Andric const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) { 76006c3fb27SDimitry Andric // Can share if one of the edges is None type - don't 76106c3fb27SDimitry Andric // care about the type along that edge as it doesn't 76206c3fb27SDimitry Andric // exist for those context ids. 76306c3fb27SDimitry Andric if (l == (uint8_t)AllocationType::None || 76406c3fb27SDimitry Andric r->AllocTypes == (uint8_t)AllocationType::None) 76506c3fb27SDimitry Andric return true; 76606c3fb27SDimitry Andric return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes); 76706c3fb27SDimitry Andric }); 76806c3fb27SDimitry Andric } 76906c3fb27SDimitry Andric 77006c3fb27SDimitry Andric } // end anonymous namespace 77106c3fb27SDimitry Andric 77206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 77306c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 77406c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst( 77506c3fb27SDimitry Andric const CallInfo &C) { 77606c3fb27SDimitry Andric ContextNode *Node = getNodeForAlloc(C); 77706c3fb27SDimitry Andric if (Node) 77806c3fb27SDimitry Andric return Node; 77906c3fb27SDimitry Andric 78006c3fb27SDimitry Andric return NonAllocationCallToContextNodeMap.lookup(C); 78106c3fb27SDimitry Andric } 78206c3fb27SDimitry Andric 78306c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 78406c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 78506c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc( 78606c3fb27SDimitry Andric const CallInfo &C) { 78706c3fb27SDimitry Andric return AllocationCallToContextNodeMap.lookup(C); 78806c3fb27SDimitry Andric } 78906c3fb27SDimitry Andric 79006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 79106c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 79206c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId( 79306c3fb27SDimitry Andric uint64_t StackId) { 79406c3fb27SDimitry Andric auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId); 79506c3fb27SDimitry Andric if (StackEntryNode != StackEntryIdToContextNodeMap.end()) 79606c3fb27SDimitry Andric return StackEntryNode->second; 79706c3fb27SDimitry Andric return nullptr; 79806c3fb27SDimitry Andric } 79906c3fb27SDimitry Andric 80006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 80106c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::unsetNodeForInst( 80206c3fb27SDimitry Andric const CallInfo &C) { 80306c3fb27SDimitry Andric AllocationCallToContextNodeMap.erase(C) || 80406c3fb27SDimitry Andric NonAllocationCallToContextNodeMap.erase(C); 80506c3fb27SDimitry Andric assert(!AllocationCallToContextNodeMap.count(C) && 80606c3fb27SDimitry Andric !NonAllocationCallToContextNodeMap.count(C)); 80706c3fb27SDimitry Andric } 80806c3fb27SDimitry Andric 80906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 81006c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 81106c3fb27SDimitry Andric addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, 81206c3fb27SDimitry Andric unsigned int ContextId) { 81306c3fb27SDimitry Andric for (auto &Edge : CallerEdges) { 81406c3fb27SDimitry Andric if (Edge->Caller == Caller) { 81506c3fb27SDimitry Andric Edge->AllocTypes |= (uint8_t)AllocType; 81606c3fb27SDimitry Andric Edge->getContextIds().insert(ContextId); 81706c3fb27SDimitry Andric return; 81806c3fb27SDimitry Andric } 81906c3fb27SDimitry Andric } 82006c3fb27SDimitry Andric std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>( 82106c3fb27SDimitry Andric this, Caller, (uint8_t)AllocType, DenseSet<uint32_t>({ContextId})); 82206c3fb27SDimitry Andric CallerEdges.push_back(Edge); 82306c3fb27SDimitry Andric Caller->CalleeEdges.push_back(Edge); 82406c3fb27SDimitry Andric } 82506c3fb27SDimitry Andric 82606c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 82706c3fb27SDimitry Andric void CallsiteContextGraph< 82806c3fb27SDimitry Andric DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) { 82906c3fb27SDimitry Andric for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) { 83006c3fb27SDimitry Andric auto Edge = *EI; 83106c3fb27SDimitry Andric if (Edge->AllocTypes == (uint8_t)AllocationType::None) { 83206c3fb27SDimitry Andric assert(Edge->ContextIds.empty()); 83306c3fb27SDimitry Andric Edge->Callee->eraseCallerEdge(Edge.get()); 83406c3fb27SDimitry Andric EI = Node->CalleeEdges.erase(EI); 83506c3fb27SDimitry Andric } else 83606c3fb27SDimitry Andric ++EI; 83706c3fb27SDimitry Andric } 83806c3fb27SDimitry Andric } 83906c3fb27SDimitry Andric 84006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 84106c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge * 84206c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 84306c3fb27SDimitry Andric findEdgeFromCallee(const ContextNode *Callee) { 84406c3fb27SDimitry Andric for (const auto &Edge : CalleeEdges) 84506c3fb27SDimitry Andric if (Edge->Callee == Callee) 84606c3fb27SDimitry Andric return Edge.get(); 84706c3fb27SDimitry Andric return nullptr; 84806c3fb27SDimitry Andric } 84906c3fb27SDimitry Andric 85006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 85106c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge * 85206c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 85306c3fb27SDimitry Andric findEdgeFromCaller(const ContextNode *Caller) { 85406c3fb27SDimitry Andric for (const auto &Edge : CallerEdges) 85506c3fb27SDimitry Andric if (Edge->Caller == Caller) 85606c3fb27SDimitry Andric return Edge.get(); 85706c3fb27SDimitry Andric return nullptr; 85806c3fb27SDimitry Andric } 85906c3fb27SDimitry Andric 86006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 86106c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 86206c3fb27SDimitry Andric eraseCalleeEdge(const ContextEdge *Edge) { 8635f757f3fSDimitry Andric auto EI = llvm::find_if( 8645f757f3fSDimitry Andric CalleeEdges, [Edge](const std::shared_ptr<ContextEdge> &CalleeEdge) { 86506c3fb27SDimitry Andric return CalleeEdge.get() == Edge; 86606c3fb27SDimitry Andric }); 86706c3fb27SDimitry Andric assert(EI != CalleeEdges.end()); 86806c3fb27SDimitry Andric CalleeEdges.erase(EI); 86906c3fb27SDimitry Andric } 87006c3fb27SDimitry Andric 87106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 87206c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 87306c3fb27SDimitry Andric eraseCallerEdge(const ContextEdge *Edge) { 8745f757f3fSDimitry Andric auto EI = llvm::find_if( 8755f757f3fSDimitry Andric CallerEdges, [Edge](const std::shared_ptr<ContextEdge> &CallerEdge) { 87606c3fb27SDimitry Andric return CallerEdge.get() == Edge; 87706c3fb27SDimitry Andric }); 87806c3fb27SDimitry Andric assert(EI != CallerEdges.end()); 87906c3fb27SDimitry Andric CallerEdges.erase(EI); 88006c3fb27SDimitry Andric } 88106c3fb27SDimitry Andric 88206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 88306c3fb27SDimitry Andric uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType( 88406c3fb27SDimitry Andric DenseSet<uint32_t> &ContextIds) { 88506c3fb27SDimitry Andric uint8_t BothTypes = 88606c3fb27SDimitry Andric (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold; 88706c3fb27SDimitry Andric uint8_t AllocType = (uint8_t)AllocationType::None; 88806c3fb27SDimitry Andric for (auto Id : ContextIds) { 88906c3fb27SDimitry Andric AllocType |= (uint8_t)ContextIdToAllocationType[Id]; 89006c3fb27SDimitry Andric // Bail early if alloc type reached both, no further refinement. 89106c3fb27SDimitry Andric if (AllocType == BothTypes) 89206c3fb27SDimitry Andric return AllocType; 89306c3fb27SDimitry Andric } 89406c3fb27SDimitry Andric return AllocType; 89506c3fb27SDimitry Andric } 89606c3fb27SDimitry Andric 89706c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 89806c3fb27SDimitry Andric uint8_t 89906c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl( 90006c3fb27SDimitry Andric const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) { 90106c3fb27SDimitry Andric uint8_t BothTypes = 90206c3fb27SDimitry Andric (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold; 90306c3fb27SDimitry Andric uint8_t AllocType = (uint8_t)AllocationType::None; 90406c3fb27SDimitry Andric for (auto Id : Node1Ids) { 90506c3fb27SDimitry Andric if (!Node2Ids.count(Id)) 90606c3fb27SDimitry Andric continue; 90706c3fb27SDimitry Andric AllocType |= (uint8_t)ContextIdToAllocationType[Id]; 90806c3fb27SDimitry Andric // Bail early if alloc type reached both, no further refinement. 90906c3fb27SDimitry Andric if (AllocType == BothTypes) 91006c3fb27SDimitry Andric return AllocType; 91106c3fb27SDimitry Andric } 91206c3fb27SDimitry Andric return AllocType; 91306c3fb27SDimitry Andric } 91406c3fb27SDimitry Andric 91506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 91606c3fb27SDimitry Andric uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes( 91706c3fb27SDimitry Andric const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) { 91806c3fb27SDimitry Andric if (Node1Ids.size() < Node2Ids.size()) 91906c3fb27SDimitry Andric return intersectAllocTypesImpl(Node1Ids, Node2Ids); 92006c3fb27SDimitry Andric else 92106c3fb27SDimitry Andric return intersectAllocTypesImpl(Node2Ids, Node1Ids); 92206c3fb27SDimitry Andric } 92306c3fb27SDimitry Andric 92406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 92506c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 92606c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode( 92706c3fb27SDimitry Andric CallInfo Call, const FuncTy *F) { 92806c3fb27SDimitry Andric assert(!getNodeForAlloc(Call)); 92906c3fb27SDimitry Andric NodeOwner.push_back( 93006c3fb27SDimitry Andric std::make_unique<ContextNode>(/*IsAllocation=*/true, Call)); 93106c3fb27SDimitry Andric ContextNode *AllocNode = NodeOwner.back().get(); 93206c3fb27SDimitry Andric AllocationCallToContextNodeMap[Call] = AllocNode; 93306c3fb27SDimitry Andric NodeToCallingFunc[AllocNode] = F; 93406c3fb27SDimitry Andric // Use LastContextId as a uniq id for MIB allocation nodes. 93506c3fb27SDimitry Andric AllocNode->OrigStackOrAllocId = LastContextId; 93606c3fb27SDimitry Andric // Alloc type should be updated as we add in the MIBs. We should assert 93706c3fb27SDimitry Andric // afterwards that it is not still None. 93806c3fb27SDimitry Andric AllocNode->AllocTypes = (uint8_t)AllocationType::None; 93906c3fb27SDimitry Andric 94006c3fb27SDimitry Andric return AllocNode; 94106c3fb27SDimitry Andric } 94206c3fb27SDimitry Andric 94306c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 94406c3fb27SDimitry Andric template <class NodeT, class IteratorT> 94506c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB( 94606c3fb27SDimitry Andric ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext, 94706c3fb27SDimitry Andric CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType) { 94806c3fb27SDimitry Andric // Treating the hot alloc type as NotCold before the disambiguation for "hot" 94906c3fb27SDimitry Andric // is done. 95006c3fb27SDimitry Andric if (AllocType == AllocationType::Hot) 95106c3fb27SDimitry Andric AllocType = AllocationType::NotCold; 95206c3fb27SDimitry Andric 95306c3fb27SDimitry Andric ContextIdToAllocationType[++LastContextId] = AllocType; 95406c3fb27SDimitry Andric 95506c3fb27SDimitry Andric // Update alloc type and context ids for this MIB. 95606c3fb27SDimitry Andric AllocNode->AllocTypes |= (uint8_t)AllocType; 95706c3fb27SDimitry Andric AllocNode->ContextIds.insert(LastContextId); 95806c3fb27SDimitry Andric 95906c3fb27SDimitry Andric // Now add or update nodes for each stack id in alloc's context. 96006c3fb27SDimitry Andric // Later when processing the stack ids on non-alloc callsites we will adjust 96106c3fb27SDimitry Andric // for any inlining in the context. 96206c3fb27SDimitry Andric ContextNode *PrevNode = AllocNode; 96306c3fb27SDimitry Andric // Look for recursion (direct recursion should have been collapsed by 96406c3fb27SDimitry Andric // module summary analysis, here we should just be detecting mutual 96506c3fb27SDimitry Andric // recursion). Mark these nodes so we don't try to clone. 96606c3fb27SDimitry Andric SmallSet<uint64_t, 8> StackIdSet; 96706c3fb27SDimitry Andric // Skip any on the allocation call (inlining). 96806c3fb27SDimitry Andric for (auto ContextIter = StackContext.beginAfterSharedPrefix(CallsiteContext); 96906c3fb27SDimitry Andric ContextIter != StackContext.end(); ++ContextIter) { 97006c3fb27SDimitry Andric auto StackId = getStackId(*ContextIter); 97106c3fb27SDimitry Andric ContextNode *StackNode = getNodeForStackId(StackId); 97206c3fb27SDimitry Andric if (!StackNode) { 97306c3fb27SDimitry Andric NodeOwner.push_back( 97406c3fb27SDimitry Andric std::make_unique<ContextNode>(/*IsAllocation=*/false)); 97506c3fb27SDimitry Andric StackNode = NodeOwner.back().get(); 97606c3fb27SDimitry Andric StackEntryIdToContextNodeMap[StackId] = StackNode; 97706c3fb27SDimitry Andric StackNode->OrigStackOrAllocId = StackId; 97806c3fb27SDimitry Andric } 97906c3fb27SDimitry Andric auto Ins = StackIdSet.insert(StackId); 98006c3fb27SDimitry Andric if (!Ins.second) 98106c3fb27SDimitry Andric StackNode->Recursive = true; 98206c3fb27SDimitry Andric StackNode->ContextIds.insert(LastContextId); 98306c3fb27SDimitry Andric StackNode->AllocTypes |= (uint8_t)AllocType; 98406c3fb27SDimitry Andric PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId); 98506c3fb27SDimitry Andric PrevNode = StackNode; 98606c3fb27SDimitry Andric } 98706c3fb27SDimitry Andric } 98806c3fb27SDimitry Andric 98906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 99006c3fb27SDimitry Andric DenseSet<uint32_t> 99106c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds( 99206c3fb27SDimitry Andric const DenseSet<uint32_t> &StackSequenceContextIds, 99306c3fb27SDimitry Andric DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) { 99406c3fb27SDimitry Andric DenseSet<uint32_t> NewContextIds; 99506c3fb27SDimitry Andric for (auto OldId : StackSequenceContextIds) { 99606c3fb27SDimitry Andric NewContextIds.insert(++LastContextId); 99706c3fb27SDimitry Andric OldToNewContextIds[OldId].insert(LastContextId); 99806c3fb27SDimitry Andric assert(ContextIdToAllocationType.count(OldId)); 99906c3fb27SDimitry Andric // The new context has the same allocation type as original. 100006c3fb27SDimitry Andric ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId]; 100106c3fb27SDimitry Andric } 100206c3fb27SDimitry Andric return NewContextIds; 100306c3fb27SDimitry Andric } 100406c3fb27SDimitry Andric 100506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 100606c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: 100706c3fb27SDimitry Andric propagateDuplicateContextIds( 100806c3fb27SDimitry Andric const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) { 100906c3fb27SDimitry Andric // Build a set of duplicated context ids corresponding to the input id set. 101006c3fb27SDimitry Andric auto GetNewIds = [&OldToNewContextIds](const DenseSet<uint32_t> &ContextIds) { 101106c3fb27SDimitry Andric DenseSet<uint32_t> NewIds; 101206c3fb27SDimitry Andric for (auto Id : ContextIds) 101306c3fb27SDimitry Andric if (auto NewId = OldToNewContextIds.find(Id); 101406c3fb27SDimitry Andric NewId != OldToNewContextIds.end()) 101506c3fb27SDimitry Andric NewIds.insert(NewId->second.begin(), NewId->second.end()); 101606c3fb27SDimitry Andric return NewIds; 101706c3fb27SDimitry Andric }; 101806c3fb27SDimitry Andric 101906c3fb27SDimitry Andric // Recursively update context ids sets along caller edges. 102006c3fb27SDimitry Andric auto UpdateCallers = [&](ContextNode *Node, 102106c3fb27SDimitry Andric DenseSet<const ContextEdge *> &Visited, 102206c3fb27SDimitry Andric auto &&UpdateCallers) -> void { 102306c3fb27SDimitry Andric for (const auto &Edge : Node->CallerEdges) { 102406c3fb27SDimitry Andric auto Inserted = Visited.insert(Edge.get()); 102506c3fb27SDimitry Andric if (!Inserted.second) 102606c3fb27SDimitry Andric continue; 102706c3fb27SDimitry Andric ContextNode *NextNode = Edge->Caller; 102806c3fb27SDimitry Andric DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Edge->getContextIds()); 102906c3fb27SDimitry Andric // Only need to recursively iterate to NextNode via this caller edge if 103006c3fb27SDimitry Andric // it resulted in any added ids to NextNode. 103106c3fb27SDimitry Andric if (!NewIdsToAdd.empty()) { 103206c3fb27SDimitry Andric Edge->getContextIds().insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); 103306c3fb27SDimitry Andric NextNode->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); 103406c3fb27SDimitry Andric UpdateCallers(NextNode, Visited, UpdateCallers); 103506c3fb27SDimitry Andric } 103606c3fb27SDimitry Andric } 103706c3fb27SDimitry Andric }; 103806c3fb27SDimitry Andric 103906c3fb27SDimitry Andric DenseSet<const ContextEdge *> Visited; 104006c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) { 104106c3fb27SDimitry Andric auto *Node = Entry.second; 104206c3fb27SDimitry Andric // Update ids on the allocation nodes before calling the recursive 104306c3fb27SDimitry Andric // update along caller edges, since this simplifies the logic during 104406c3fb27SDimitry Andric // that traversal. 104506c3fb27SDimitry Andric DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Node->ContextIds); 104606c3fb27SDimitry Andric Node->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); 104706c3fb27SDimitry Andric UpdateCallers(Node, Visited, UpdateCallers); 104806c3fb27SDimitry Andric } 104906c3fb27SDimitry Andric } 105006c3fb27SDimitry Andric 105106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 105206c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode( 105306c3fb27SDimitry Andric ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee) { 105406c3fb27SDimitry Andric // Make a copy of the context ids, since this will be adjusted below as they 105506c3fb27SDimitry Andric // are moved. 105606c3fb27SDimitry Andric DenseSet<uint32_t> RemainingContextIds = NewNode->ContextIds; 105706c3fb27SDimitry Andric auto &OrigEdges = 105806c3fb27SDimitry Andric TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges; 105906c3fb27SDimitry Andric // Increment iterator in loop so that we can remove edges as needed. 106006c3fb27SDimitry Andric for (auto EI = OrigEdges.begin(); EI != OrigEdges.end();) { 106106c3fb27SDimitry Andric auto Edge = *EI; 106206c3fb27SDimitry Andric // Remove any matching context ids from Edge, return set that were found and 106306c3fb27SDimitry Andric // removed, these are the new edge's context ids. Also update the remaining 106406c3fb27SDimitry Andric // (not found ids). 106506c3fb27SDimitry Andric DenseSet<uint32_t> NewEdgeContextIds, NotFoundContextIds; 106606c3fb27SDimitry Andric set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds, 106706c3fb27SDimitry Andric NotFoundContextIds); 106806c3fb27SDimitry Andric RemainingContextIds.swap(NotFoundContextIds); 106906c3fb27SDimitry Andric // If no matching context ids for this edge, skip it. 107006c3fb27SDimitry Andric if (NewEdgeContextIds.empty()) { 107106c3fb27SDimitry Andric ++EI; 107206c3fb27SDimitry Andric continue; 107306c3fb27SDimitry Andric } 107406c3fb27SDimitry Andric if (TowardsCallee) { 107506c3fb27SDimitry Andric auto NewEdge = std::make_shared<ContextEdge>( 107606c3fb27SDimitry Andric Edge->Callee, NewNode, computeAllocType(NewEdgeContextIds), 107706c3fb27SDimitry Andric NewEdgeContextIds); 107806c3fb27SDimitry Andric NewNode->CalleeEdges.push_back(NewEdge); 107906c3fb27SDimitry Andric NewEdge->Callee->CallerEdges.push_back(NewEdge); 108006c3fb27SDimitry Andric } else { 108106c3fb27SDimitry Andric auto NewEdge = std::make_shared<ContextEdge>( 108206c3fb27SDimitry Andric NewNode, Edge->Caller, computeAllocType(NewEdgeContextIds), 108306c3fb27SDimitry Andric NewEdgeContextIds); 108406c3fb27SDimitry Andric NewNode->CallerEdges.push_back(NewEdge); 108506c3fb27SDimitry Andric NewEdge->Caller->CalleeEdges.push_back(NewEdge); 108606c3fb27SDimitry Andric } 108706c3fb27SDimitry Andric // Remove old edge if context ids empty. 108806c3fb27SDimitry Andric if (Edge->getContextIds().empty()) { 108906c3fb27SDimitry Andric if (TowardsCallee) { 109006c3fb27SDimitry Andric Edge->Callee->eraseCallerEdge(Edge.get()); 109106c3fb27SDimitry Andric EI = OrigNode->CalleeEdges.erase(EI); 109206c3fb27SDimitry Andric } else { 109306c3fb27SDimitry Andric Edge->Caller->eraseCalleeEdge(Edge.get()); 109406c3fb27SDimitry Andric EI = OrigNode->CallerEdges.erase(EI); 109506c3fb27SDimitry Andric } 109606c3fb27SDimitry Andric continue; 109706c3fb27SDimitry Andric } 109806c3fb27SDimitry Andric ++EI; 109906c3fb27SDimitry Andric } 110006c3fb27SDimitry Andric } 110106c3fb27SDimitry Andric 110206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 110306c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: 110406c3fb27SDimitry Andric assignStackNodesPostOrder(ContextNode *Node, 110506c3fb27SDimitry Andric DenseSet<const ContextNode *> &Visited, 110606c3fb27SDimitry Andric DenseMap<uint64_t, std::vector<CallContextInfo>> 110706c3fb27SDimitry Andric &StackIdToMatchingCalls) { 110806c3fb27SDimitry Andric auto Inserted = Visited.insert(Node); 110906c3fb27SDimitry Andric if (!Inserted.second) 111006c3fb27SDimitry Andric return; 111106c3fb27SDimitry Andric // Post order traversal. Iterate over a copy since we may add nodes and 111206c3fb27SDimitry Andric // therefore new callers during the recursive call, invalidating any 111306c3fb27SDimitry Andric // iterator over the original edge vector. We don't need to process these 111406c3fb27SDimitry Andric // new nodes as they were already processed on creation. 111506c3fb27SDimitry Andric auto CallerEdges = Node->CallerEdges; 111606c3fb27SDimitry Andric for (auto &Edge : CallerEdges) { 111706c3fb27SDimitry Andric // Skip any that have been removed during the recursion. 111806c3fb27SDimitry Andric if (!Edge) 111906c3fb27SDimitry Andric continue; 112006c3fb27SDimitry Andric assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls); 112106c3fb27SDimitry Andric } 112206c3fb27SDimitry Andric 112306c3fb27SDimitry Andric // If this node's stack id is in the map, update the graph to contain new 112406c3fb27SDimitry Andric // nodes representing any inlining at interior callsites. Note we move the 112506c3fb27SDimitry Andric // associated context ids over to the new nodes. 112606c3fb27SDimitry Andric 112706c3fb27SDimitry Andric // Ignore this node if it is for an allocation or we didn't record any 112806c3fb27SDimitry Andric // stack id lists ending at it. 112906c3fb27SDimitry Andric if (Node->IsAllocation || 113006c3fb27SDimitry Andric !StackIdToMatchingCalls.count(Node->OrigStackOrAllocId)) 113106c3fb27SDimitry Andric return; 113206c3fb27SDimitry Andric 113306c3fb27SDimitry Andric auto &Calls = StackIdToMatchingCalls[Node->OrigStackOrAllocId]; 113406c3fb27SDimitry Andric // Handle the simple case first. A single call with a single stack id. 113506c3fb27SDimitry Andric // In this case there is no need to create any new context nodes, simply 113606c3fb27SDimitry Andric // assign the context node for stack id to this Call. 113706c3fb27SDimitry Andric if (Calls.size() == 1) { 113806c3fb27SDimitry Andric auto &[Call, Ids, Func, SavedContextIds] = Calls[0]; 113906c3fb27SDimitry Andric if (Ids.size() == 1) { 114006c3fb27SDimitry Andric assert(SavedContextIds.empty()); 114106c3fb27SDimitry Andric // It should be this Node 114206c3fb27SDimitry Andric assert(Node == getNodeForStackId(Ids[0])); 114306c3fb27SDimitry Andric if (Node->Recursive) 114406c3fb27SDimitry Andric return; 114506c3fb27SDimitry Andric Node->setCall(Call); 114606c3fb27SDimitry Andric NonAllocationCallToContextNodeMap[Call] = Node; 114706c3fb27SDimitry Andric NodeToCallingFunc[Node] = Func; 114806c3fb27SDimitry Andric return; 114906c3fb27SDimitry Andric } 115006c3fb27SDimitry Andric } 115106c3fb27SDimitry Andric 115206c3fb27SDimitry Andric // Find the node for the last stack id, which should be the same 115306c3fb27SDimitry Andric // across all calls recorded for this id, and is this node's id. 115406c3fb27SDimitry Andric uint64_t LastId = Node->OrigStackOrAllocId; 115506c3fb27SDimitry Andric ContextNode *LastNode = getNodeForStackId(LastId); 115606c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 115706c3fb27SDimitry Andric assert(LastNode); 115806c3fb27SDimitry Andric 115906c3fb27SDimitry Andric for (unsigned I = 0; I < Calls.size(); I++) { 116006c3fb27SDimitry Andric auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; 116106c3fb27SDimitry Andric // Skip any for which we didn't assign any ids, these don't get a node in 116206c3fb27SDimitry Andric // the graph. 116306c3fb27SDimitry Andric if (SavedContextIds.empty()) 116406c3fb27SDimitry Andric continue; 116506c3fb27SDimitry Andric 116606c3fb27SDimitry Andric assert(LastId == Ids.back()); 116706c3fb27SDimitry Andric 116806c3fb27SDimitry Andric ContextNode *FirstNode = getNodeForStackId(Ids[0]); 116906c3fb27SDimitry Andric assert(FirstNode); 117006c3fb27SDimitry Andric 117106c3fb27SDimitry Andric // Recompute the context ids for this stack id sequence (the 117206c3fb27SDimitry Andric // intersection of the context ids of the corresponding nodes). 117306c3fb27SDimitry Andric // Start with the ids we saved in the map for this call, which could be 117406c3fb27SDimitry Andric // duplicated context ids. We have to recompute as we might have overlap 117506c3fb27SDimitry Andric // overlap between the saved context ids for different last nodes, and 117606c3fb27SDimitry Andric // removed them already during the post order traversal. 117706c3fb27SDimitry Andric set_intersect(SavedContextIds, FirstNode->ContextIds); 117806c3fb27SDimitry Andric ContextNode *PrevNode = nullptr; 117906c3fb27SDimitry Andric for (auto Id : Ids) { 118006c3fb27SDimitry Andric ContextNode *CurNode = getNodeForStackId(Id); 118106c3fb27SDimitry Andric // We should only have kept stack ids that had nodes and weren't 118206c3fb27SDimitry Andric // recursive. 118306c3fb27SDimitry Andric assert(CurNode); 118406c3fb27SDimitry Andric assert(!CurNode->Recursive); 118506c3fb27SDimitry Andric if (!PrevNode) { 118606c3fb27SDimitry Andric PrevNode = CurNode; 118706c3fb27SDimitry Andric continue; 118806c3fb27SDimitry Andric } 118906c3fb27SDimitry Andric auto *Edge = CurNode->findEdgeFromCallee(PrevNode); 119006c3fb27SDimitry Andric if (!Edge) { 119106c3fb27SDimitry Andric SavedContextIds.clear(); 119206c3fb27SDimitry Andric break; 119306c3fb27SDimitry Andric } 119406c3fb27SDimitry Andric PrevNode = CurNode; 119506c3fb27SDimitry Andric set_intersect(SavedContextIds, Edge->getContextIds()); 119606c3fb27SDimitry Andric 119706c3fb27SDimitry Andric // If we now have no context ids for clone, skip this call. 119806c3fb27SDimitry Andric if (SavedContextIds.empty()) 119906c3fb27SDimitry Andric break; 120006c3fb27SDimitry Andric } 120106c3fb27SDimitry Andric if (SavedContextIds.empty()) 120206c3fb27SDimitry Andric continue; 120306c3fb27SDimitry Andric 120406c3fb27SDimitry Andric // Create new context node. 120506c3fb27SDimitry Andric NodeOwner.push_back( 120606c3fb27SDimitry Andric std::make_unique<ContextNode>(/*IsAllocation=*/false, Call)); 120706c3fb27SDimitry Andric ContextNode *NewNode = NodeOwner.back().get(); 120806c3fb27SDimitry Andric NodeToCallingFunc[NewNode] = Func; 120906c3fb27SDimitry Andric NonAllocationCallToContextNodeMap[Call] = NewNode; 121006c3fb27SDimitry Andric NewNode->ContextIds = SavedContextIds; 121106c3fb27SDimitry Andric NewNode->AllocTypes = computeAllocType(NewNode->ContextIds); 121206c3fb27SDimitry Andric 121306c3fb27SDimitry Andric // Connect to callees of innermost stack frame in inlined call chain. 121406c3fb27SDimitry Andric // This updates context ids for FirstNode's callee's to reflect those 121506c3fb27SDimitry Andric // moved to NewNode. 121606c3fb27SDimitry Andric connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true); 121706c3fb27SDimitry Andric 121806c3fb27SDimitry Andric // Connect to callers of outermost stack frame in inlined call chain. 121906c3fb27SDimitry Andric // This updates context ids for FirstNode's caller's to reflect those 122006c3fb27SDimitry Andric // moved to NewNode. 122106c3fb27SDimitry Andric connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false); 122206c3fb27SDimitry Andric 122306c3fb27SDimitry Andric // Now we need to remove context ids from edges/nodes between First and 122406c3fb27SDimitry Andric // Last Node. 122506c3fb27SDimitry Andric PrevNode = nullptr; 122606c3fb27SDimitry Andric for (auto Id : Ids) { 122706c3fb27SDimitry Andric ContextNode *CurNode = getNodeForStackId(Id); 122806c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 122906c3fb27SDimitry Andric assert(CurNode); 123006c3fb27SDimitry Andric 123106c3fb27SDimitry Andric // Remove the context ids moved to NewNode from CurNode, and the 123206c3fb27SDimitry Andric // edge from the prior node. 123306c3fb27SDimitry Andric set_subtract(CurNode->ContextIds, NewNode->ContextIds); 123406c3fb27SDimitry Andric if (PrevNode) { 123506c3fb27SDimitry Andric auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode); 123606c3fb27SDimitry Andric assert(PrevEdge); 123706c3fb27SDimitry Andric set_subtract(PrevEdge->getContextIds(), NewNode->ContextIds); 123806c3fb27SDimitry Andric if (PrevEdge->getContextIds().empty()) { 123906c3fb27SDimitry Andric PrevNode->eraseCallerEdge(PrevEdge); 124006c3fb27SDimitry Andric CurNode->eraseCalleeEdge(PrevEdge); 124106c3fb27SDimitry Andric } 124206c3fb27SDimitry Andric } 124306c3fb27SDimitry Andric PrevNode = CurNode; 124406c3fb27SDimitry Andric } 124506c3fb27SDimitry Andric } 124606c3fb27SDimitry Andric } 124706c3fb27SDimitry Andric 124806c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 124906c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() { 125006c3fb27SDimitry Andric // Map of stack id to all calls with that as the last (outermost caller) 125106c3fb27SDimitry Andric // callsite id that has a context node (some might not due to pruning 125206c3fb27SDimitry Andric // performed during matching of the allocation profile contexts). 125306c3fb27SDimitry Andric // The CallContextInfo contains the Call and a list of its stack ids with 125406c3fb27SDimitry Andric // ContextNodes, the function containing Call, and the set of context ids 125506c3fb27SDimitry Andric // the analysis will eventually identify for use in any new node created 125606c3fb27SDimitry Andric // for that callsite. 125706c3fb27SDimitry Andric DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls; 125806c3fb27SDimitry Andric for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) { 125906c3fb27SDimitry Andric for (auto &Call : CallsWithMetadata) { 126006c3fb27SDimitry Andric // Ignore allocations, already handled. 126106c3fb27SDimitry Andric if (AllocationCallToContextNodeMap.count(Call)) 126206c3fb27SDimitry Andric continue; 126306c3fb27SDimitry Andric auto StackIdsWithContextNodes = 126406c3fb27SDimitry Andric getStackIdsWithContextNodesForCall(Call.call()); 126506c3fb27SDimitry Andric // If there were no nodes created for MIBs on allocs (maybe this was in 126606c3fb27SDimitry Andric // the unambiguous part of the MIB stack that was pruned), ignore. 126706c3fb27SDimitry Andric if (StackIdsWithContextNodes.empty()) 126806c3fb27SDimitry Andric continue; 126906c3fb27SDimitry Andric // Otherwise, record this Call along with the list of ids for the last 127006c3fb27SDimitry Andric // (outermost caller) stack id with a node. 127106c3fb27SDimitry Andric StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back( 127206c3fb27SDimitry Andric {Call.call(), StackIdsWithContextNodes, Func, {}}); 127306c3fb27SDimitry Andric } 127406c3fb27SDimitry Andric } 127506c3fb27SDimitry Andric 127606c3fb27SDimitry Andric // First make a pass through all stack ids that correspond to a call, 127706c3fb27SDimitry Andric // as identified in the above loop. Compute the context ids corresponding to 127806c3fb27SDimitry Andric // each of these calls when they correspond to multiple stack ids due to 127906c3fb27SDimitry Andric // due to inlining. Perform any duplication of context ids required when 128006c3fb27SDimitry Andric // there is more than one call with the same stack ids. Their (possibly newly 128106c3fb27SDimitry Andric // duplicated) context ids are saved in the StackIdToMatchingCalls map. 128206c3fb27SDimitry Andric DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds; 128306c3fb27SDimitry Andric for (auto &It : StackIdToMatchingCalls) { 128406c3fb27SDimitry Andric auto &Calls = It.getSecond(); 128506c3fb27SDimitry Andric // Skip single calls with a single stack id. These don't need a new node. 128606c3fb27SDimitry Andric if (Calls.size() == 1) { 128706c3fb27SDimitry Andric auto &Ids = std::get<1>(Calls[0]); 128806c3fb27SDimitry Andric if (Ids.size() == 1) 128906c3fb27SDimitry Andric continue; 129006c3fb27SDimitry Andric } 129106c3fb27SDimitry Andric // In order to do the best and maximal matching of inlined calls to context 129206c3fb27SDimitry Andric // node sequences we will sort the vectors of stack ids in descending order 129306c3fb27SDimitry Andric // of length, and within each length, lexicographically by stack id. The 129406c3fb27SDimitry Andric // latter is so that we can specially handle calls that have identical stack 129506c3fb27SDimitry Andric // id sequences (either due to cloning or artificially because of the MIB 129606c3fb27SDimitry Andric // context pruning). 129706c3fb27SDimitry Andric std::stable_sort(Calls.begin(), Calls.end(), 129806c3fb27SDimitry Andric [](const CallContextInfo &A, const CallContextInfo &B) { 129906c3fb27SDimitry Andric auto &IdsA = std::get<1>(A); 130006c3fb27SDimitry Andric auto &IdsB = std::get<1>(B); 130106c3fb27SDimitry Andric return IdsA.size() > IdsB.size() || 130206c3fb27SDimitry Andric (IdsA.size() == IdsB.size() && IdsA < IdsB); 130306c3fb27SDimitry Andric }); 130406c3fb27SDimitry Andric 130506c3fb27SDimitry Andric // Find the node for the last stack id, which should be the same 130606c3fb27SDimitry Andric // across all calls recorded for this id, and is the id for this 130706c3fb27SDimitry Andric // entry in the StackIdToMatchingCalls map. 130806c3fb27SDimitry Andric uint64_t LastId = It.getFirst(); 130906c3fb27SDimitry Andric ContextNode *LastNode = getNodeForStackId(LastId); 131006c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 131106c3fb27SDimitry Andric assert(LastNode); 131206c3fb27SDimitry Andric 131306c3fb27SDimitry Andric if (LastNode->Recursive) 131406c3fb27SDimitry Andric continue; 131506c3fb27SDimitry Andric 131606c3fb27SDimitry Andric // Initialize the context ids with the last node's. We will subsequently 131706c3fb27SDimitry Andric // refine the context ids by computing the intersection along all edges. 131806c3fb27SDimitry Andric DenseSet<uint32_t> LastNodeContextIds = LastNode->ContextIds; 131906c3fb27SDimitry Andric assert(!LastNodeContextIds.empty()); 132006c3fb27SDimitry Andric 132106c3fb27SDimitry Andric for (unsigned I = 0; I < Calls.size(); I++) { 132206c3fb27SDimitry Andric auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; 132306c3fb27SDimitry Andric assert(SavedContextIds.empty()); 132406c3fb27SDimitry Andric assert(LastId == Ids.back()); 132506c3fb27SDimitry Andric 132606c3fb27SDimitry Andric // First compute the context ids for this stack id sequence (the 132706c3fb27SDimitry Andric // intersection of the context ids of the corresponding nodes). 132806c3fb27SDimitry Andric // Start with the remaining saved ids for the last node. 132906c3fb27SDimitry Andric assert(!LastNodeContextIds.empty()); 133006c3fb27SDimitry Andric DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds; 133106c3fb27SDimitry Andric 133206c3fb27SDimitry Andric ContextNode *PrevNode = LastNode; 133306c3fb27SDimitry Andric ContextNode *CurNode = LastNode; 133406c3fb27SDimitry Andric bool Skip = false; 133506c3fb27SDimitry Andric 133606c3fb27SDimitry Andric // Iterate backwards through the stack Ids, starting after the last Id 133706c3fb27SDimitry Andric // in the list, which was handled once outside for all Calls. 133806c3fb27SDimitry Andric for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) { 133906c3fb27SDimitry Andric auto Id = *IdIter; 134006c3fb27SDimitry Andric CurNode = getNodeForStackId(Id); 134106c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 134206c3fb27SDimitry Andric assert(CurNode); 134306c3fb27SDimitry Andric 134406c3fb27SDimitry Andric if (CurNode->Recursive) { 134506c3fb27SDimitry Andric Skip = true; 134606c3fb27SDimitry Andric break; 134706c3fb27SDimitry Andric } 134806c3fb27SDimitry Andric 134906c3fb27SDimitry Andric auto *Edge = CurNode->findEdgeFromCaller(PrevNode); 135006c3fb27SDimitry Andric // If there is no edge then the nodes belong to different MIB contexts, 135106c3fb27SDimitry Andric // and we should skip this inlined context sequence. For example, this 135206c3fb27SDimitry Andric // particular inlined context may include stack ids A->B, and we may 135306c3fb27SDimitry Andric // indeed have nodes for both A and B, but it is possible that they were 135406c3fb27SDimitry Andric // never profiled in sequence in a single MIB for any allocation (i.e. 135506c3fb27SDimitry Andric // we might have profiled an allocation that involves the callsite A, 135606c3fb27SDimitry Andric // but through a different one of its callee callsites, and we might 135706c3fb27SDimitry Andric // have profiled an allocation that involves callsite B, but reached 135806c3fb27SDimitry Andric // from a different caller callsite). 135906c3fb27SDimitry Andric if (!Edge) { 136006c3fb27SDimitry Andric Skip = true; 136106c3fb27SDimitry Andric break; 136206c3fb27SDimitry Andric } 136306c3fb27SDimitry Andric PrevNode = CurNode; 136406c3fb27SDimitry Andric 136506c3fb27SDimitry Andric // Update the context ids, which is the intersection of the ids along 136606c3fb27SDimitry Andric // all edges in the sequence. 136706c3fb27SDimitry Andric set_intersect(StackSequenceContextIds, Edge->getContextIds()); 136806c3fb27SDimitry Andric 136906c3fb27SDimitry Andric // If we now have no context ids for clone, skip this call. 137006c3fb27SDimitry Andric if (StackSequenceContextIds.empty()) { 137106c3fb27SDimitry Andric Skip = true; 137206c3fb27SDimitry Andric break; 137306c3fb27SDimitry Andric } 137406c3fb27SDimitry Andric } 137506c3fb27SDimitry Andric if (Skip) 137606c3fb27SDimitry Andric continue; 137706c3fb27SDimitry Andric 137806c3fb27SDimitry Andric // If some of this call's stack ids did not have corresponding nodes (due 137906c3fb27SDimitry Andric // to pruning), don't include any context ids for contexts that extend 138006c3fb27SDimitry Andric // beyond these nodes. Otherwise we would be matching part of unrelated / 138106c3fb27SDimitry Andric // not fully matching stack contexts. To do this, subtract any context ids 138206c3fb27SDimitry Andric // found in caller nodes of the last node found above. 138306c3fb27SDimitry Andric if (Ids.back() != getLastStackId(Call)) { 138406c3fb27SDimitry Andric for (const auto &PE : CurNode->CallerEdges) { 138506c3fb27SDimitry Andric set_subtract(StackSequenceContextIds, PE->getContextIds()); 138606c3fb27SDimitry Andric if (StackSequenceContextIds.empty()) 138706c3fb27SDimitry Andric break; 138806c3fb27SDimitry Andric } 138906c3fb27SDimitry Andric // If we now have no context ids for clone, skip this call. 139006c3fb27SDimitry Andric if (StackSequenceContextIds.empty()) 139106c3fb27SDimitry Andric continue; 139206c3fb27SDimitry Andric } 139306c3fb27SDimitry Andric 139406c3fb27SDimitry Andric // Check if the next set of stack ids is the same (since the Calls vector 139506c3fb27SDimitry Andric // of tuples is sorted by the stack ids we can just look at the next one). 139606c3fb27SDimitry Andric bool DuplicateContextIds = false; 139706c3fb27SDimitry Andric if (I + 1 < Calls.size()) { 139806c3fb27SDimitry Andric auto NextIds = std::get<1>(Calls[I + 1]); 139906c3fb27SDimitry Andric DuplicateContextIds = Ids == NextIds; 140006c3fb27SDimitry Andric } 140106c3fb27SDimitry Andric 140206c3fb27SDimitry Andric // If we don't have duplicate context ids, then we can assign all the 140306c3fb27SDimitry Andric // context ids computed for the original node sequence to this call. 140406c3fb27SDimitry Andric // If there are duplicate calls with the same stack ids then we synthesize 140506c3fb27SDimitry Andric // new context ids that are duplicates of the originals. These are 140606c3fb27SDimitry Andric // assigned to SavedContextIds, which is a reference into the map entry 140706c3fb27SDimitry Andric // for this call, allowing us to access these ids later on. 140806c3fb27SDimitry Andric OldToNewContextIds.reserve(OldToNewContextIds.size() + 140906c3fb27SDimitry Andric StackSequenceContextIds.size()); 141006c3fb27SDimitry Andric SavedContextIds = 141106c3fb27SDimitry Andric DuplicateContextIds 141206c3fb27SDimitry Andric ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds) 141306c3fb27SDimitry Andric : StackSequenceContextIds; 141406c3fb27SDimitry Andric assert(!SavedContextIds.empty()); 141506c3fb27SDimitry Andric 141606c3fb27SDimitry Andric if (!DuplicateContextIds) { 141706c3fb27SDimitry Andric // Update saved last node's context ids to remove those that are 141806c3fb27SDimitry Andric // assigned to other calls, so that it is ready for the next call at 141906c3fb27SDimitry Andric // this stack id. 142006c3fb27SDimitry Andric set_subtract(LastNodeContextIds, StackSequenceContextIds); 142106c3fb27SDimitry Andric if (LastNodeContextIds.empty()) 142206c3fb27SDimitry Andric break; 142306c3fb27SDimitry Andric } 142406c3fb27SDimitry Andric } 142506c3fb27SDimitry Andric } 142606c3fb27SDimitry Andric 142706c3fb27SDimitry Andric // Propagate the duplicate context ids over the graph. 142806c3fb27SDimitry Andric propagateDuplicateContextIds(OldToNewContextIds); 142906c3fb27SDimitry Andric 143006c3fb27SDimitry Andric if (VerifyCCG) 143106c3fb27SDimitry Andric check(); 143206c3fb27SDimitry Andric 143306c3fb27SDimitry Andric // Now perform a post-order traversal over the graph, starting with the 143406c3fb27SDimitry Andric // allocation nodes, essentially processing nodes from callers to callees. 143506c3fb27SDimitry Andric // For any that contains an id in the map, update the graph to contain new 143606c3fb27SDimitry Andric // nodes representing any inlining at interior callsites. Note we move the 143706c3fb27SDimitry Andric // associated context ids over to the new nodes. 143806c3fb27SDimitry Andric DenseSet<const ContextNode *> Visited; 143906c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) 144006c3fb27SDimitry Andric assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls); 144106c3fb27SDimitry Andric } 144206c3fb27SDimitry Andric 144306c3fb27SDimitry Andric uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) { 144406c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext( 144506c3fb27SDimitry Andric Call->getMetadata(LLVMContext::MD_callsite)); 144606c3fb27SDimitry Andric return CallsiteContext.back(); 144706c3fb27SDimitry Andric } 144806c3fb27SDimitry Andric 144906c3fb27SDimitry Andric uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) { 145006c3fb27SDimitry Andric assert(isa<CallsiteInfo *>(Call.getBase())); 145106c3fb27SDimitry Andric CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator> 145206c3fb27SDimitry Andric CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase())); 145306c3fb27SDimitry Andric // Need to convert index into stack id. 145406c3fb27SDimitry Andric return Index.getStackIdAtIndex(CallsiteContext.back()); 145506c3fb27SDimitry Andric } 145606c3fb27SDimitry Andric 145706c3fb27SDimitry Andric static const std::string MemProfCloneSuffix = ".memprof."; 145806c3fb27SDimitry Andric 145906c3fb27SDimitry Andric static std::string getMemProfFuncName(Twine Base, unsigned CloneNo) { 146006c3fb27SDimitry Andric // We use CloneNo == 0 to refer to the original version, which doesn't get 146106c3fb27SDimitry Andric // renamed with a suffix. 146206c3fb27SDimitry Andric if (!CloneNo) 146306c3fb27SDimitry Andric return Base.str(); 146406c3fb27SDimitry Andric return (Base + MemProfCloneSuffix + Twine(CloneNo)).str(); 146506c3fb27SDimitry Andric } 146606c3fb27SDimitry Andric 146706c3fb27SDimitry Andric std::string ModuleCallsiteContextGraph::getLabel(const Function *Func, 146806c3fb27SDimitry Andric const Instruction *Call, 146906c3fb27SDimitry Andric unsigned CloneNo) const { 147006c3fb27SDimitry Andric return (Twine(Call->getFunction()->getName()) + " -> " + 147106c3fb27SDimitry Andric cast<CallBase>(Call)->getCalledFunction()->getName()) 147206c3fb27SDimitry Andric .str(); 147306c3fb27SDimitry Andric } 147406c3fb27SDimitry Andric 147506c3fb27SDimitry Andric std::string IndexCallsiteContextGraph::getLabel(const FunctionSummary *Func, 147606c3fb27SDimitry Andric const IndexCall &Call, 147706c3fb27SDimitry Andric unsigned CloneNo) const { 147806c3fb27SDimitry Andric auto VI = FSToVIMap.find(Func); 147906c3fb27SDimitry Andric assert(VI != FSToVIMap.end()); 148006c3fb27SDimitry Andric if (isa<AllocInfo *>(Call.getBase())) 148106c3fb27SDimitry Andric return (VI->second.name() + " -> alloc").str(); 148206c3fb27SDimitry Andric else { 148306c3fb27SDimitry Andric auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(Call.getBase()); 148406c3fb27SDimitry Andric return (VI->second.name() + " -> " + 148506c3fb27SDimitry Andric getMemProfFuncName(Callsite->Callee.name(), 148606c3fb27SDimitry Andric Callsite->Clones[CloneNo])) 148706c3fb27SDimitry Andric .str(); 148806c3fb27SDimitry Andric } 148906c3fb27SDimitry Andric } 149006c3fb27SDimitry Andric 149106c3fb27SDimitry Andric std::vector<uint64_t> 149206c3fb27SDimitry Andric ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall( 149306c3fb27SDimitry Andric Instruction *Call) { 149406c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext( 149506c3fb27SDimitry Andric Call->getMetadata(LLVMContext::MD_callsite)); 149606c3fb27SDimitry Andric return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>( 149706c3fb27SDimitry Andric CallsiteContext); 149806c3fb27SDimitry Andric } 149906c3fb27SDimitry Andric 150006c3fb27SDimitry Andric std::vector<uint64_t> 150106c3fb27SDimitry Andric IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) { 150206c3fb27SDimitry Andric assert(isa<CallsiteInfo *>(Call.getBase())); 150306c3fb27SDimitry Andric CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator> 150406c3fb27SDimitry Andric CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase())); 150506c3fb27SDimitry Andric return getStackIdsWithContextNodes<CallsiteInfo, 150606c3fb27SDimitry Andric SmallVector<unsigned>::const_iterator>( 150706c3fb27SDimitry Andric CallsiteContext); 150806c3fb27SDimitry Andric } 150906c3fb27SDimitry Andric 151006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 151106c3fb27SDimitry Andric template <class NodeT, class IteratorT> 151206c3fb27SDimitry Andric std::vector<uint64_t> 151306c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes( 151406c3fb27SDimitry Andric CallStack<NodeT, IteratorT> &CallsiteContext) { 151506c3fb27SDimitry Andric std::vector<uint64_t> StackIds; 151606c3fb27SDimitry Andric for (auto IdOrIndex : CallsiteContext) { 151706c3fb27SDimitry Andric auto StackId = getStackId(IdOrIndex); 151806c3fb27SDimitry Andric ContextNode *Node = getNodeForStackId(StackId); 151906c3fb27SDimitry Andric if (!Node) 152006c3fb27SDimitry Andric break; 152106c3fb27SDimitry Andric StackIds.push_back(StackId); 152206c3fb27SDimitry Andric } 152306c3fb27SDimitry Andric return StackIds; 152406c3fb27SDimitry Andric } 152506c3fb27SDimitry Andric 152606c3fb27SDimitry Andric ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( 1527*7a6dacacSDimitry Andric Module &M, 1528*7a6dacacSDimitry Andric llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) 152906c3fb27SDimitry Andric : Mod(M), OREGetter(OREGetter) { 153006c3fb27SDimitry Andric for (auto &F : M) { 153106c3fb27SDimitry Andric std::vector<CallInfo> CallsWithMetadata; 153206c3fb27SDimitry Andric for (auto &BB : F) { 153306c3fb27SDimitry Andric for (auto &I : BB) { 153406c3fb27SDimitry Andric if (!isa<CallBase>(I)) 153506c3fb27SDimitry Andric continue; 153606c3fb27SDimitry Andric if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) { 153706c3fb27SDimitry Andric CallsWithMetadata.push_back(&I); 153806c3fb27SDimitry Andric auto *AllocNode = addAllocNode(&I, &F); 153906c3fb27SDimitry Andric auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite); 154006c3fb27SDimitry Andric assert(CallsiteMD); 154106c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext(CallsiteMD); 154206c3fb27SDimitry Andric // Add all of the MIBs and their stack nodes. 154306c3fb27SDimitry Andric for (auto &MDOp : MemProfMD->operands()) { 154406c3fb27SDimitry Andric auto *MIBMD = cast<const MDNode>(MDOp); 154506c3fb27SDimitry Andric MDNode *StackNode = getMIBStackNode(MIBMD); 154606c3fb27SDimitry Andric assert(StackNode); 154706c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode); 154806c3fb27SDimitry Andric addStackNodesForMIB<MDNode, MDNode::op_iterator>( 154906c3fb27SDimitry Andric AllocNode, StackContext, CallsiteContext, 155006c3fb27SDimitry Andric getMIBAllocType(MIBMD)); 155106c3fb27SDimitry Andric } 155206c3fb27SDimitry Andric assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); 155306c3fb27SDimitry Andric // Memprof and callsite metadata on memory allocations no longer 155406c3fb27SDimitry Andric // needed. 155506c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_memprof, nullptr); 155606c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_callsite, nullptr); 155706c3fb27SDimitry Andric } 155806c3fb27SDimitry Andric // For callsite metadata, add to list for this function for later use. 155906c3fb27SDimitry Andric else if (I.getMetadata(LLVMContext::MD_callsite)) 156006c3fb27SDimitry Andric CallsWithMetadata.push_back(&I); 156106c3fb27SDimitry Andric } 156206c3fb27SDimitry Andric } 156306c3fb27SDimitry Andric if (!CallsWithMetadata.empty()) 1564297eecfbSDimitry Andric FuncToCallsWithMetadata[&F] = CallsWithMetadata; 156506c3fb27SDimitry Andric } 156606c3fb27SDimitry Andric 156706c3fb27SDimitry Andric if (DumpCCG) { 156806c3fb27SDimitry Andric dbgs() << "CCG before updating call stack chains:\n"; 156906c3fb27SDimitry Andric dbgs() << *this; 157006c3fb27SDimitry Andric } 157106c3fb27SDimitry Andric 157206c3fb27SDimitry Andric if (ExportToDot) 157306c3fb27SDimitry Andric exportToDot("prestackupdate"); 157406c3fb27SDimitry Andric 157506c3fb27SDimitry Andric updateStackNodes(); 157606c3fb27SDimitry Andric 157706c3fb27SDimitry Andric handleCallsitesWithMultipleTargets(); 157806c3fb27SDimitry Andric 157906c3fb27SDimitry Andric // Strip off remaining callsite metadata, no longer needed. 158006c3fb27SDimitry Andric for (auto &FuncEntry : FuncToCallsWithMetadata) 158106c3fb27SDimitry Andric for (auto &Call : FuncEntry.second) 158206c3fb27SDimitry Andric Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr); 158306c3fb27SDimitry Andric } 158406c3fb27SDimitry Andric 158506c3fb27SDimitry Andric IndexCallsiteContextGraph::IndexCallsiteContextGraph( 158606c3fb27SDimitry Andric ModuleSummaryIndex &Index, 1587*7a6dacacSDimitry Andric llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 158806c3fb27SDimitry Andric isPrevailing) 1589297eecfbSDimitry Andric : Index(Index), isPrevailing(isPrevailing) { 159006c3fb27SDimitry Andric for (auto &I : Index) { 159106c3fb27SDimitry Andric auto VI = Index.getValueInfo(I); 159206c3fb27SDimitry Andric for (auto &S : VI.getSummaryList()) { 159306c3fb27SDimitry Andric // We should only add the prevailing nodes. Otherwise we may try to clone 159406c3fb27SDimitry Andric // in a weak copy that won't be linked (and may be different than the 159506c3fb27SDimitry Andric // prevailing version). 159606c3fb27SDimitry Andric // We only keep the memprof summary on the prevailing copy now when 159706c3fb27SDimitry Andric // building the combined index, as a space optimization, however don't 159806c3fb27SDimitry Andric // rely on this optimization. The linker doesn't resolve local linkage 159906c3fb27SDimitry Andric // values so don't check whether those are prevailing. 160006c3fb27SDimitry Andric if (!GlobalValue::isLocalLinkage(S->linkage()) && 160106c3fb27SDimitry Andric !isPrevailing(VI.getGUID(), S.get())) 160206c3fb27SDimitry Andric continue; 160306c3fb27SDimitry Andric auto *FS = dyn_cast<FunctionSummary>(S.get()); 160406c3fb27SDimitry Andric if (!FS) 160506c3fb27SDimitry Andric continue; 160606c3fb27SDimitry Andric std::vector<CallInfo> CallsWithMetadata; 160706c3fb27SDimitry Andric if (!FS->allocs().empty()) { 160806c3fb27SDimitry Andric for (auto &AN : FS->mutableAllocs()) { 160906c3fb27SDimitry Andric // This can happen because of recursion elimination handling that 161006c3fb27SDimitry Andric // currently exists in ModuleSummaryAnalysis. Skip these for now. 161106c3fb27SDimitry Andric // We still added them to the summary because we need to be able to 161206c3fb27SDimitry Andric // correlate properly in applyImport in the backends. 161306c3fb27SDimitry Andric if (AN.MIBs.empty()) 161406c3fb27SDimitry Andric continue; 161506c3fb27SDimitry Andric CallsWithMetadata.push_back({&AN}); 161606c3fb27SDimitry Andric auto *AllocNode = addAllocNode({&AN}, FS); 161706c3fb27SDimitry Andric // Pass an empty CallStack to the CallsiteContext (second) 161806c3fb27SDimitry Andric // parameter, since for ThinLTO we already collapsed out the inlined 161906c3fb27SDimitry Andric // stack ids on the allocation call during ModuleSummaryAnalysis. 162006c3fb27SDimitry Andric CallStack<MIBInfo, SmallVector<unsigned>::const_iterator> 162106c3fb27SDimitry Andric EmptyContext; 162206c3fb27SDimitry Andric // Now add all of the MIBs and their stack nodes. 162306c3fb27SDimitry Andric for (auto &MIB : AN.MIBs) { 162406c3fb27SDimitry Andric CallStack<MIBInfo, SmallVector<unsigned>::const_iterator> 162506c3fb27SDimitry Andric StackContext(&MIB); 162606c3fb27SDimitry Andric addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>( 162706c3fb27SDimitry Andric AllocNode, StackContext, EmptyContext, MIB.AllocType); 162806c3fb27SDimitry Andric } 162906c3fb27SDimitry Andric assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); 163006c3fb27SDimitry Andric // Initialize version 0 on the summary alloc node to the current alloc 163106c3fb27SDimitry Andric // type, unless it has both types in which case make it default, so 163206c3fb27SDimitry Andric // that in the case where we aren't able to clone the original version 163306c3fb27SDimitry Andric // always ends up with the default allocation behavior. 163406c3fb27SDimitry Andric AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes); 163506c3fb27SDimitry Andric } 163606c3fb27SDimitry Andric } 163706c3fb27SDimitry Andric // For callsite metadata, add to list for this function for later use. 163806c3fb27SDimitry Andric if (!FS->callsites().empty()) 163906c3fb27SDimitry Andric for (auto &SN : FS->mutableCallsites()) 164006c3fb27SDimitry Andric CallsWithMetadata.push_back({&SN}); 164106c3fb27SDimitry Andric 164206c3fb27SDimitry Andric if (!CallsWithMetadata.empty()) 1643297eecfbSDimitry Andric FuncToCallsWithMetadata[FS] = CallsWithMetadata; 164406c3fb27SDimitry Andric 164506c3fb27SDimitry Andric if (!FS->allocs().empty() || !FS->callsites().empty()) 164606c3fb27SDimitry Andric FSToVIMap[FS] = VI; 164706c3fb27SDimitry Andric } 164806c3fb27SDimitry Andric } 164906c3fb27SDimitry Andric 165006c3fb27SDimitry Andric if (DumpCCG) { 165106c3fb27SDimitry Andric dbgs() << "CCG before updating call stack chains:\n"; 165206c3fb27SDimitry Andric dbgs() << *this; 165306c3fb27SDimitry Andric } 165406c3fb27SDimitry Andric 165506c3fb27SDimitry Andric if (ExportToDot) 165606c3fb27SDimitry Andric exportToDot("prestackupdate"); 165706c3fb27SDimitry Andric 165806c3fb27SDimitry Andric updateStackNodes(); 165906c3fb27SDimitry Andric 166006c3fb27SDimitry Andric handleCallsitesWithMultipleTargets(); 166106c3fb27SDimitry Andric } 166206c3fb27SDimitry Andric 166306c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 166406c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, 166506c3fb27SDimitry Andric CallTy>::handleCallsitesWithMultipleTargets() { 166606c3fb27SDimitry Andric // Look for and workaround callsites that call multiple functions. 166706c3fb27SDimitry Andric // This can happen for indirect calls, which needs better handling, and in 166806c3fb27SDimitry Andric // more rare cases (e.g. macro expansion). 166906c3fb27SDimitry Andric // TODO: To fix this for indirect calls we will want to perform speculative 167006c3fb27SDimitry Andric // devirtualization using either the normal PGO info with ICP, or using the 167106c3fb27SDimitry Andric // information in the profiled MemProf contexts. We can do this prior to 167206c3fb27SDimitry Andric // this transformation for regular LTO, and for ThinLTO we can simulate that 167306c3fb27SDimitry Andric // effect in the summary and perform the actual speculative devirtualization 167406c3fb27SDimitry Andric // while cloning in the ThinLTO backend. 1675297eecfbSDimitry Andric 1676297eecfbSDimitry Andric // Keep track of the new nodes synthesized for discovered tail calls missing 1677297eecfbSDimitry Andric // from the profiled contexts. 1678297eecfbSDimitry Andric MapVector<CallInfo, ContextNode *> TailCallToContextNodeMap; 1679297eecfbSDimitry Andric 168006c3fb27SDimitry Andric for (auto Entry = NonAllocationCallToContextNodeMap.begin(); 168106c3fb27SDimitry Andric Entry != NonAllocationCallToContextNodeMap.end();) { 168206c3fb27SDimitry Andric auto *Node = Entry->second; 168306c3fb27SDimitry Andric assert(Node->Clones.empty()); 168406c3fb27SDimitry Andric // Check all node callees and see if in the same function. 168506c3fb27SDimitry Andric bool Removed = false; 168606c3fb27SDimitry Andric auto Call = Node->Call.call(); 1687297eecfbSDimitry Andric for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) { 1688297eecfbSDimitry Andric auto Edge = *EI; 1689297eecfbSDimitry Andric if (!Edge->Callee->hasCall()) { 1690297eecfbSDimitry Andric ++EI; 169106c3fb27SDimitry Andric continue; 1692297eecfbSDimitry Andric } 169306c3fb27SDimitry Andric assert(NodeToCallingFunc.count(Edge->Callee)); 169406c3fb27SDimitry Andric // Check if the called function matches that of the callee node. 1695297eecfbSDimitry Andric if (calleesMatch(Call, EI, TailCallToContextNodeMap)) 169606c3fb27SDimitry Andric continue; 1697297eecfbSDimitry Andric RemovedEdgesWithMismatchedCallees++; 169806c3fb27SDimitry Andric // Work around by setting Node to have a null call, so it gets 169906c3fb27SDimitry Andric // skipped during cloning. Otherwise assignFunctions will assert 170006c3fb27SDimitry Andric // because its data structures are not designed to handle this case. 170106c3fb27SDimitry Andric Entry = NonAllocationCallToContextNodeMap.erase(Entry); 170206c3fb27SDimitry Andric Node->setCall(CallInfo()); 170306c3fb27SDimitry Andric Removed = true; 170406c3fb27SDimitry Andric break; 170506c3fb27SDimitry Andric } 170606c3fb27SDimitry Andric if (!Removed) 170706c3fb27SDimitry Andric Entry++; 170806c3fb27SDimitry Andric } 1709297eecfbSDimitry Andric 1710297eecfbSDimitry Andric // Add the new nodes after the above loop so that the iteration is not 1711297eecfbSDimitry Andric // invalidated. 1712297eecfbSDimitry Andric for (auto &[Call, Node] : TailCallToContextNodeMap) 1713297eecfbSDimitry Andric NonAllocationCallToContextNodeMap[Call] = Node; 171406c3fb27SDimitry Andric } 171506c3fb27SDimitry Andric 171606c3fb27SDimitry Andric uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { 171706c3fb27SDimitry Andric // In the Module (IR) case this is already the Id. 171806c3fb27SDimitry Andric return IdOrIndex; 171906c3fb27SDimitry Andric } 172006c3fb27SDimitry Andric 172106c3fb27SDimitry Andric uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { 172206c3fb27SDimitry Andric // In the Index case this is an index into the stack id list in the summary 172306c3fb27SDimitry Andric // index, convert it to an Id. 172406c3fb27SDimitry Andric return Index.getStackIdAtIndex(IdOrIndex); 172506c3fb27SDimitry Andric } 172606c3fb27SDimitry Andric 1727297eecfbSDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1728297eecfbSDimitry Andric bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch( 1729297eecfbSDimitry Andric CallTy Call, EdgeIter &EI, 1730297eecfbSDimitry Andric MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) { 1731297eecfbSDimitry Andric auto Edge = *EI; 1732297eecfbSDimitry Andric const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee]; 1733297eecfbSDimitry Andric const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller]; 1734297eecfbSDimitry Andric // Will be populated in order of callee to caller if we find a chain of tail 1735297eecfbSDimitry Andric // calls between the profiled caller and callee. 1736297eecfbSDimitry Andric std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain; 1737297eecfbSDimitry Andric if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc, 1738297eecfbSDimitry Andric FoundCalleeChain)) { 1739297eecfbSDimitry Andric ++EI; 1740297eecfbSDimitry Andric return false; 1741297eecfbSDimitry Andric } 1742297eecfbSDimitry Andric 1743297eecfbSDimitry Andric // The usual case where the profiled callee matches that of the IR/summary. 1744297eecfbSDimitry Andric if (FoundCalleeChain.empty()) { 1745297eecfbSDimitry Andric ++EI; 1746297eecfbSDimitry Andric return true; 1747297eecfbSDimitry Andric } 1748297eecfbSDimitry Andric 1749297eecfbSDimitry Andric auto AddEdge = [Edge, &EI](ContextNode *Caller, ContextNode *Callee) { 1750297eecfbSDimitry Andric auto *CurEdge = Callee->findEdgeFromCaller(Caller); 1751297eecfbSDimitry Andric // If there is already an edge between these nodes, simply update it and 1752297eecfbSDimitry Andric // return. 1753297eecfbSDimitry Andric if (CurEdge) { 1754297eecfbSDimitry Andric CurEdge->ContextIds.insert(Edge->ContextIds.begin(), 1755297eecfbSDimitry Andric Edge->ContextIds.end()); 1756297eecfbSDimitry Andric CurEdge->AllocTypes |= Edge->AllocTypes; 1757297eecfbSDimitry Andric return; 1758297eecfbSDimitry Andric } 1759297eecfbSDimitry Andric // Otherwise, create a new edge and insert it into the caller and callee 1760297eecfbSDimitry Andric // lists. 1761297eecfbSDimitry Andric auto NewEdge = std::make_shared<ContextEdge>( 1762297eecfbSDimitry Andric Callee, Caller, Edge->AllocTypes, Edge->ContextIds); 1763297eecfbSDimitry Andric Callee->CallerEdges.push_back(NewEdge); 1764297eecfbSDimitry Andric if (Caller == Edge->Caller) { 1765297eecfbSDimitry Andric // If we are inserting the new edge into the current edge's caller, insert 1766297eecfbSDimitry Andric // the new edge before the current iterator position, and then increment 1767297eecfbSDimitry Andric // back to the current edge. 1768297eecfbSDimitry Andric EI = Caller->CalleeEdges.insert(EI, NewEdge); 1769297eecfbSDimitry Andric ++EI; 1770297eecfbSDimitry Andric assert(*EI == Edge && 1771297eecfbSDimitry Andric "Iterator position not restored after insert and increment"); 1772297eecfbSDimitry Andric } else 1773297eecfbSDimitry Andric Caller->CalleeEdges.push_back(NewEdge); 1774297eecfbSDimitry Andric }; 1775297eecfbSDimitry Andric 1776297eecfbSDimitry Andric // Create new nodes for each found callee and connect in between the profiled 1777297eecfbSDimitry Andric // caller and callee. 1778297eecfbSDimitry Andric auto *CurCalleeNode = Edge->Callee; 1779297eecfbSDimitry Andric for (auto &[NewCall, Func] : FoundCalleeChain) { 1780297eecfbSDimitry Andric ContextNode *NewNode = nullptr; 1781297eecfbSDimitry Andric // First check if we have already synthesized a node for this tail call. 1782297eecfbSDimitry Andric if (TailCallToContextNodeMap.count(NewCall)) { 1783297eecfbSDimitry Andric NewNode = TailCallToContextNodeMap[NewCall]; 1784297eecfbSDimitry Andric NewNode->ContextIds.insert(Edge->ContextIds.begin(), 1785297eecfbSDimitry Andric Edge->ContextIds.end()); 1786297eecfbSDimitry Andric NewNode->AllocTypes |= Edge->AllocTypes; 1787297eecfbSDimitry Andric } else { 1788297eecfbSDimitry Andric FuncToCallsWithMetadata[Func].push_back({NewCall}); 1789297eecfbSDimitry Andric // Create Node and record node info. 1790297eecfbSDimitry Andric NodeOwner.push_back( 1791297eecfbSDimitry Andric std::make_unique<ContextNode>(/*IsAllocation=*/false, NewCall)); 1792297eecfbSDimitry Andric NewNode = NodeOwner.back().get(); 1793297eecfbSDimitry Andric NodeToCallingFunc[NewNode] = Func; 1794297eecfbSDimitry Andric TailCallToContextNodeMap[NewCall] = NewNode; 1795297eecfbSDimitry Andric NewNode->ContextIds = Edge->ContextIds; 1796297eecfbSDimitry Andric NewNode->AllocTypes = Edge->AllocTypes; 1797297eecfbSDimitry Andric } 1798297eecfbSDimitry Andric 1799297eecfbSDimitry Andric // Hook up node to its callee node 1800297eecfbSDimitry Andric AddEdge(NewNode, CurCalleeNode); 1801297eecfbSDimitry Andric 1802297eecfbSDimitry Andric CurCalleeNode = NewNode; 1803297eecfbSDimitry Andric } 1804297eecfbSDimitry Andric 1805297eecfbSDimitry Andric // Hook up edge's original caller to new callee node. 1806297eecfbSDimitry Andric AddEdge(Edge->Caller, CurCalleeNode); 1807297eecfbSDimitry Andric 1808297eecfbSDimitry Andric // Remove old edge 1809297eecfbSDimitry Andric Edge->Callee->eraseCallerEdge(Edge.get()); 1810297eecfbSDimitry Andric EI = Edge->Caller->CalleeEdges.erase(EI); 1811297eecfbSDimitry Andric 1812297eecfbSDimitry Andric return true; 1813297eecfbSDimitry Andric } 1814297eecfbSDimitry Andric 1815297eecfbSDimitry Andric bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls( 1816297eecfbSDimitry Andric const Function *ProfiledCallee, Value *CurCallee, unsigned Depth, 1817297eecfbSDimitry Andric std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain, 1818297eecfbSDimitry Andric bool &FoundMultipleCalleeChains) { 1819297eecfbSDimitry Andric // Stop recursive search if we have already explored the maximum specified 1820297eecfbSDimitry Andric // depth. 1821297eecfbSDimitry Andric if (Depth > TailCallSearchDepth) 1822297eecfbSDimitry Andric return false; 1823297eecfbSDimitry Andric 1824297eecfbSDimitry Andric auto SaveCallsiteInfo = [&](Instruction *Callsite, Function *F) { 1825297eecfbSDimitry Andric FoundCalleeChain.push_back({Callsite, F}); 1826297eecfbSDimitry Andric }; 1827297eecfbSDimitry Andric 1828297eecfbSDimitry Andric auto *CalleeFunc = dyn_cast<Function>(CurCallee); 1829297eecfbSDimitry Andric if (!CalleeFunc) { 1830297eecfbSDimitry Andric auto *Alias = dyn_cast<GlobalAlias>(CurCallee); 1831297eecfbSDimitry Andric assert(Alias); 1832297eecfbSDimitry Andric CalleeFunc = dyn_cast<Function>(Alias->getAliasee()); 1833297eecfbSDimitry Andric assert(CalleeFunc); 1834297eecfbSDimitry Andric } 1835297eecfbSDimitry Andric 1836297eecfbSDimitry Andric // Look for tail calls in this function, and check if they either call the 1837297eecfbSDimitry Andric // profiled callee directly, or indirectly (via a recursive search). 1838297eecfbSDimitry Andric // Only succeed if there is a single unique tail call chain found between the 1839297eecfbSDimitry Andric // profiled caller and callee, otherwise we could perform incorrect cloning. 1840297eecfbSDimitry Andric bool FoundSingleCalleeChain = false; 1841297eecfbSDimitry Andric for (auto &BB : *CalleeFunc) { 1842297eecfbSDimitry Andric for (auto &I : BB) { 1843297eecfbSDimitry Andric auto *CB = dyn_cast<CallBase>(&I); 1844297eecfbSDimitry Andric if (!CB || !CB->isTailCall()) 1845297eecfbSDimitry Andric continue; 1846297eecfbSDimitry Andric auto *CalledValue = CB->getCalledOperand(); 1847297eecfbSDimitry Andric auto *CalledFunction = CB->getCalledFunction(); 1848297eecfbSDimitry Andric if (CalledValue && !CalledFunction) { 1849297eecfbSDimitry Andric CalledValue = CalledValue->stripPointerCasts(); 1850297eecfbSDimitry Andric // Stripping pointer casts can reveal a called function. 1851297eecfbSDimitry Andric CalledFunction = dyn_cast<Function>(CalledValue); 1852297eecfbSDimitry Andric } 1853297eecfbSDimitry Andric // Check if this is an alias to a function. If so, get the 1854297eecfbSDimitry Andric // called aliasee for the checks below. 1855297eecfbSDimitry Andric if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) { 1856297eecfbSDimitry Andric assert(!CalledFunction && 1857297eecfbSDimitry Andric "Expected null called function in callsite for alias"); 1858297eecfbSDimitry Andric CalledFunction = dyn_cast<Function>(GA->getAliaseeObject()); 1859297eecfbSDimitry Andric } 1860297eecfbSDimitry Andric if (!CalledFunction) 1861297eecfbSDimitry Andric continue; 1862297eecfbSDimitry Andric if (CalledFunction == ProfiledCallee) { 1863297eecfbSDimitry Andric if (FoundSingleCalleeChain) { 1864297eecfbSDimitry Andric FoundMultipleCalleeChains = true; 1865297eecfbSDimitry Andric return false; 1866297eecfbSDimitry Andric } 1867297eecfbSDimitry Andric FoundSingleCalleeChain = true; 1868297eecfbSDimitry Andric FoundProfiledCalleeCount++; 1869297eecfbSDimitry Andric FoundProfiledCalleeDepth += Depth; 1870297eecfbSDimitry Andric if (Depth > FoundProfiledCalleeMaxDepth) 1871297eecfbSDimitry Andric FoundProfiledCalleeMaxDepth = Depth; 1872297eecfbSDimitry Andric SaveCallsiteInfo(&I, CalleeFunc); 1873297eecfbSDimitry Andric } else if (findProfiledCalleeThroughTailCalls( 1874297eecfbSDimitry Andric ProfiledCallee, CalledFunction, Depth + 1, 1875297eecfbSDimitry Andric FoundCalleeChain, FoundMultipleCalleeChains)) { 1876297eecfbSDimitry Andric if (FoundMultipleCalleeChains) 1877297eecfbSDimitry Andric return false; 1878297eecfbSDimitry Andric if (FoundSingleCalleeChain) { 1879297eecfbSDimitry Andric FoundMultipleCalleeChains = true; 1880297eecfbSDimitry Andric return false; 1881297eecfbSDimitry Andric } 1882297eecfbSDimitry Andric FoundSingleCalleeChain = true; 1883297eecfbSDimitry Andric SaveCallsiteInfo(&I, CalleeFunc); 1884297eecfbSDimitry Andric } 1885297eecfbSDimitry Andric } 1886297eecfbSDimitry Andric } 1887297eecfbSDimitry Andric 1888297eecfbSDimitry Andric return FoundSingleCalleeChain; 1889297eecfbSDimitry Andric } 1890297eecfbSDimitry Andric 1891297eecfbSDimitry Andric bool ModuleCallsiteContextGraph::calleeMatchesFunc( 1892297eecfbSDimitry Andric Instruction *Call, const Function *Func, const Function *CallerFunc, 1893297eecfbSDimitry Andric std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) { 189406c3fb27SDimitry Andric auto *CB = dyn_cast<CallBase>(Call); 189506c3fb27SDimitry Andric if (!CB->getCalledOperand()) 189606c3fb27SDimitry Andric return false; 189706c3fb27SDimitry Andric auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts(); 189806c3fb27SDimitry Andric auto *CalleeFunc = dyn_cast<Function>(CalleeVal); 189906c3fb27SDimitry Andric if (CalleeFunc == Func) 190006c3fb27SDimitry Andric return true; 190106c3fb27SDimitry Andric auto *Alias = dyn_cast<GlobalAlias>(CalleeVal); 1902297eecfbSDimitry Andric if (Alias && Alias->getAliasee() == Func) 1903297eecfbSDimitry Andric return true; 1904297eecfbSDimitry Andric 1905297eecfbSDimitry Andric // Recursively search for the profiled callee through tail calls starting with 1906297eecfbSDimitry Andric // the actual Callee. The discovered tail call chain is saved in 1907297eecfbSDimitry Andric // FoundCalleeChain, and we will fixup the graph to include these callsites 1908297eecfbSDimitry Andric // after returning. 1909297eecfbSDimitry Andric // FIXME: We will currently redo the same recursive walk if we find the same 1910297eecfbSDimitry Andric // mismatched callee from another callsite. We can improve this with more 1911297eecfbSDimitry Andric // bookkeeping of the created chain of new nodes for each mismatch. 1912297eecfbSDimitry Andric unsigned Depth = 1; 1913297eecfbSDimitry Andric bool FoundMultipleCalleeChains = false; 1914297eecfbSDimitry Andric if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal, Depth, 1915297eecfbSDimitry Andric FoundCalleeChain, 1916297eecfbSDimitry Andric FoundMultipleCalleeChains)) { 1917297eecfbSDimitry Andric LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: " 1918297eecfbSDimitry Andric << Func->getName() << " from " << CallerFunc->getName() 1919297eecfbSDimitry Andric << " that actually called " << CalleeVal->getName() 1920297eecfbSDimitry Andric << (FoundMultipleCalleeChains 1921297eecfbSDimitry Andric ? " (found multiple possible chains)" 1922297eecfbSDimitry Andric : "") 1923297eecfbSDimitry Andric << "\n"); 1924297eecfbSDimitry Andric if (FoundMultipleCalleeChains) 1925297eecfbSDimitry Andric FoundProfiledCalleeNonUniquelyCount++; 1926297eecfbSDimitry Andric return false; 192706c3fb27SDimitry Andric } 192806c3fb27SDimitry Andric 1929297eecfbSDimitry Andric return true; 1930297eecfbSDimitry Andric } 1931297eecfbSDimitry Andric 1932297eecfbSDimitry Andric bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls( 1933297eecfbSDimitry Andric ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth, 1934297eecfbSDimitry Andric std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain, 1935297eecfbSDimitry Andric bool &FoundMultipleCalleeChains) { 1936297eecfbSDimitry Andric // Stop recursive search if we have already explored the maximum specified 1937297eecfbSDimitry Andric // depth. 1938297eecfbSDimitry Andric if (Depth > TailCallSearchDepth) 1939297eecfbSDimitry Andric return false; 1940297eecfbSDimitry Andric 1941297eecfbSDimitry Andric auto CreateAndSaveCallsiteInfo = [&](ValueInfo Callee, FunctionSummary *FS) { 1942297eecfbSDimitry Andric // Make a CallsiteInfo for each discovered callee, if one hasn't already 1943297eecfbSDimitry Andric // been synthesized. 1944297eecfbSDimitry Andric if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) || 1945297eecfbSDimitry Andric !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee)) 1946297eecfbSDimitry Andric // StackIds is empty (we don't have debug info available in the index for 1947297eecfbSDimitry Andric // these callsites) 1948297eecfbSDimitry Andric FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] = 1949297eecfbSDimitry Andric std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>()); 1950297eecfbSDimitry Andric CallsiteInfo *NewCallsiteInfo = 1951297eecfbSDimitry Andric FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee].get(); 1952297eecfbSDimitry Andric FoundCalleeChain.push_back({NewCallsiteInfo, FS}); 1953297eecfbSDimitry Andric }; 1954297eecfbSDimitry Andric 1955297eecfbSDimitry Andric // Look for tail calls in this function, and check if they either call the 1956297eecfbSDimitry Andric // profiled callee directly, or indirectly (via a recursive search). 1957297eecfbSDimitry Andric // Only succeed if there is a single unique tail call chain found between the 1958297eecfbSDimitry Andric // profiled caller and callee, otherwise we could perform incorrect cloning. 1959297eecfbSDimitry Andric bool FoundSingleCalleeChain = false; 1960297eecfbSDimitry Andric for (auto &S : CurCallee.getSummaryList()) { 1961297eecfbSDimitry Andric if (!GlobalValue::isLocalLinkage(S->linkage()) && 1962297eecfbSDimitry Andric !isPrevailing(CurCallee.getGUID(), S.get())) 1963297eecfbSDimitry Andric continue; 1964297eecfbSDimitry Andric auto *FS = dyn_cast<FunctionSummary>(S->getBaseObject()); 1965297eecfbSDimitry Andric if (!FS) 1966297eecfbSDimitry Andric continue; 1967297eecfbSDimitry Andric auto FSVI = CurCallee; 1968297eecfbSDimitry Andric auto *AS = dyn_cast<AliasSummary>(S.get()); 1969297eecfbSDimitry Andric if (AS) 1970297eecfbSDimitry Andric FSVI = AS->getAliaseeVI(); 1971297eecfbSDimitry Andric for (auto &CallEdge : FS->calls()) { 1972297eecfbSDimitry Andric if (!CallEdge.second.hasTailCall()) 1973297eecfbSDimitry Andric continue; 1974297eecfbSDimitry Andric if (CallEdge.first == ProfiledCallee) { 1975297eecfbSDimitry Andric if (FoundSingleCalleeChain) { 1976297eecfbSDimitry Andric FoundMultipleCalleeChains = true; 1977297eecfbSDimitry Andric return false; 1978297eecfbSDimitry Andric } 1979297eecfbSDimitry Andric FoundSingleCalleeChain = true; 1980297eecfbSDimitry Andric FoundProfiledCalleeCount++; 1981297eecfbSDimitry Andric FoundProfiledCalleeDepth += Depth; 1982297eecfbSDimitry Andric if (Depth > FoundProfiledCalleeMaxDepth) 1983297eecfbSDimitry Andric FoundProfiledCalleeMaxDepth = Depth; 1984297eecfbSDimitry Andric CreateAndSaveCallsiteInfo(CallEdge.first, FS); 1985297eecfbSDimitry Andric // Add FS to FSToVIMap in case it isn't already there. 1986297eecfbSDimitry Andric assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI); 1987297eecfbSDimitry Andric FSToVIMap[FS] = FSVI; 1988297eecfbSDimitry Andric } else if (findProfiledCalleeThroughTailCalls( 1989297eecfbSDimitry Andric ProfiledCallee, CallEdge.first, Depth + 1, 1990297eecfbSDimitry Andric FoundCalleeChain, FoundMultipleCalleeChains)) { 1991297eecfbSDimitry Andric if (FoundMultipleCalleeChains) 1992297eecfbSDimitry Andric return false; 1993297eecfbSDimitry Andric if (FoundSingleCalleeChain) { 1994297eecfbSDimitry Andric FoundMultipleCalleeChains = true; 1995297eecfbSDimitry Andric return false; 1996297eecfbSDimitry Andric } 1997297eecfbSDimitry Andric FoundSingleCalleeChain = true; 1998297eecfbSDimitry Andric CreateAndSaveCallsiteInfo(CallEdge.first, FS); 1999297eecfbSDimitry Andric // Add FS to FSToVIMap in case it isn't already there. 2000297eecfbSDimitry Andric assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI); 2001297eecfbSDimitry Andric FSToVIMap[FS] = FSVI; 2002297eecfbSDimitry Andric } 2003297eecfbSDimitry Andric } 2004297eecfbSDimitry Andric } 2005297eecfbSDimitry Andric 2006297eecfbSDimitry Andric return FoundSingleCalleeChain; 2007297eecfbSDimitry Andric } 2008297eecfbSDimitry Andric 2009297eecfbSDimitry Andric bool IndexCallsiteContextGraph::calleeMatchesFunc( 2010297eecfbSDimitry Andric IndexCall &Call, const FunctionSummary *Func, 2011297eecfbSDimitry Andric const FunctionSummary *CallerFunc, 2012297eecfbSDimitry Andric std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) { 201306c3fb27SDimitry Andric ValueInfo Callee = 201406c3fb27SDimitry Andric dyn_cast_if_present<CallsiteInfo *>(Call.getBase())->Callee; 201506c3fb27SDimitry Andric // If there is no summary list then this is a call to an externally defined 201606c3fb27SDimitry Andric // symbol. 201706c3fb27SDimitry Andric AliasSummary *Alias = 201806c3fb27SDimitry Andric Callee.getSummaryList().empty() 201906c3fb27SDimitry Andric ? nullptr 202006c3fb27SDimitry Andric : dyn_cast<AliasSummary>(Callee.getSummaryList()[0].get()); 202106c3fb27SDimitry Andric assert(FSToVIMap.count(Func)); 2022297eecfbSDimitry Andric auto FuncVI = FSToVIMap[Func]; 2023297eecfbSDimitry Andric if (Callee == FuncVI || 202406c3fb27SDimitry Andric // If callee is an alias, check the aliasee, since only function 202506c3fb27SDimitry Andric // summary base objects will contain the stack node summaries and thus 202606c3fb27SDimitry Andric // get a context node. 2027297eecfbSDimitry Andric (Alias && Alias->getAliaseeVI() == FuncVI)) 2028297eecfbSDimitry Andric return true; 2029297eecfbSDimitry Andric 2030297eecfbSDimitry Andric // Recursively search for the profiled callee through tail calls starting with 2031297eecfbSDimitry Andric // the actual Callee. The discovered tail call chain is saved in 2032297eecfbSDimitry Andric // FoundCalleeChain, and we will fixup the graph to include these callsites 2033297eecfbSDimitry Andric // after returning. 2034297eecfbSDimitry Andric // FIXME: We will currently redo the same recursive walk if we find the same 2035297eecfbSDimitry Andric // mismatched callee from another callsite. We can improve this with more 2036297eecfbSDimitry Andric // bookkeeping of the created chain of new nodes for each mismatch. 2037297eecfbSDimitry Andric unsigned Depth = 1; 2038297eecfbSDimitry Andric bool FoundMultipleCalleeChains = false; 2039297eecfbSDimitry Andric if (!findProfiledCalleeThroughTailCalls( 2040297eecfbSDimitry Andric FuncVI, Callee, Depth, FoundCalleeChain, FoundMultipleCalleeChains)) { 2041297eecfbSDimitry Andric LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: " << FuncVI 2042297eecfbSDimitry Andric << " from " << FSToVIMap[CallerFunc] 2043297eecfbSDimitry Andric << " that actually called " << Callee 2044297eecfbSDimitry Andric << (FoundMultipleCalleeChains 2045297eecfbSDimitry Andric ? " (found multiple possible chains)" 2046297eecfbSDimitry Andric : "") 2047297eecfbSDimitry Andric << "\n"); 2048297eecfbSDimitry Andric if (FoundMultipleCalleeChains) 2049297eecfbSDimitry Andric FoundProfiledCalleeNonUniquelyCount++; 2050297eecfbSDimitry Andric return false; 2051297eecfbSDimitry Andric } 2052297eecfbSDimitry Andric 2053297eecfbSDimitry Andric return true; 205406c3fb27SDimitry Andric } 205506c3fb27SDimitry Andric 205606c3fb27SDimitry Andric static std::string getAllocTypeString(uint8_t AllocTypes) { 205706c3fb27SDimitry Andric if (!AllocTypes) 205806c3fb27SDimitry Andric return "None"; 205906c3fb27SDimitry Andric std::string Str; 206006c3fb27SDimitry Andric if (AllocTypes & (uint8_t)AllocationType::NotCold) 206106c3fb27SDimitry Andric Str += "NotCold"; 206206c3fb27SDimitry Andric if (AllocTypes & (uint8_t)AllocationType::Cold) 206306c3fb27SDimitry Andric Str += "Cold"; 206406c3fb27SDimitry Andric return Str; 206506c3fb27SDimitry Andric } 206606c3fb27SDimitry Andric 206706c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 206806c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump() 206906c3fb27SDimitry Andric const { 207006c3fb27SDimitry Andric print(dbgs()); 207106c3fb27SDimitry Andric dbgs() << "\n"; 207206c3fb27SDimitry Andric } 207306c3fb27SDimitry Andric 207406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 207506c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print( 207606c3fb27SDimitry Andric raw_ostream &OS) const { 207706c3fb27SDimitry Andric OS << "Node " << this << "\n"; 207806c3fb27SDimitry Andric OS << "\t"; 207906c3fb27SDimitry Andric printCall(OS); 208006c3fb27SDimitry Andric if (Recursive) 208106c3fb27SDimitry Andric OS << " (recursive)"; 208206c3fb27SDimitry Andric OS << "\n"; 208306c3fb27SDimitry Andric OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n"; 208406c3fb27SDimitry Andric OS << "\tContextIds:"; 208506c3fb27SDimitry Andric std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end()); 208606c3fb27SDimitry Andric std::sort(SortedIds.begin(), SortedIds.end()); 208706c3fb27SDimitry Andric for (auto Id : SortedIds) 208806c3fb27SDimitry Andric OS << " " << Id; 208906c3fb27SDimitry Andric OS << "\n"; 209006c3fb27SDimitry Andric OS << "\tCalleeEdges:\n"; 209106c3fb27SDimitry Andric for (auto &Edge : CalleeEdges) 209206c3fb27SDimitry Andric OS << "\t\t" << *Edge << "\n"; 209306c3fb27SDimitry Andric OS << "\tCallerEdges:\n"; 209406c3fb27SDimitry Andric for (auto &Edge : CallerEdges) 209506c3fb27SDimitry Andric OS << "\t\t" << *Edge << "\n"; 209606c3fb27SDimitry Andric if (!Clones.empty()) { 209706c3fb27SDimitry Andric OS << "\tClones: "; 209806c3fb27SDimitry Andric FieldSeparator FS; 209906c3fb27SDimitry Andric for (auto *Clone : Clones) 210006c3fb27SDimitry Andric OS << FS << Clone; 210106c3fb27SDimitry Andric OS << "\n"; 210206c3fb27SDimitry Andric } else if (CloneOf) { 210306c3fb27SDimitry Andric OS << "\tClone of " << CloneOf << "\n"; 210406c3fb27SDimitry Andric } 210506c3fb27SDimitry Andric } 210606c3fb27SDimitry Andric 210706c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 210806c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump() 210906c3fb27SDimitry Andric const { 211006c3fb27SDimitry Andric print(dbgs()); 211106c3fb27SDimitry Andric dbgs() << "\n"; 211206c3fb27SDimitry Andric } 211306c3fb27SDimitry Andric 211406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 211506c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print( 211606c3fb27SDimitry Andric raw_ostream &OS) const { 211706c3fb27SDimitry Andric OS << "Edge from Callee " << Callee << " to Caller: " << Caller 211806c3fb27SDimitry Andric << " AllocTypes: " << getAllocTypeString(AllocTypes); 211906c3fb27SDimitry Andric OS << " ContextIds:"; 212006c3fb27SDimitry Andric std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end()); 212106c3fb27SDimitry Andric std::sort(SortedIds.begin(), SortedIds.end()); 212206c3fb27SDimitry Andric for (auto Id : SortedIds) 212306c3fb27SDimitry Andric OS << " " << Id; 212406c3fb27SDimitry Andric } 212506c3fb27SDimitry Andric 212606c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 212706c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump() const { 212806c3fb27SDimitry Andric print(dbgs()); 212906c3fb27SDimitry Andric } 213006c3fb27SDimitry Andric 213106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 213206c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print( 213306c3fb27SDimitry Andric raw_ostream &OS) const { 213406c3fb27SDimitry Andric OS << "Callsite Context Graph:\n"; 213506c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 213606c3fb27SDimitry Andric for (const auto Node : nodes<GraphType>(this)) { 213706c3fb27SDimitry Andric if (Node->isRemoved()) 213806c3fb27SDimitry Andric continue; 213906c3fb27SDimitry Andric Node->print(OS); 214006c3fb27SDimitry Andric OS << "\n"; 214106c3fb27SDimitry Andric } 214206c3fb27SDimitry Andric } 214306c3fb27SDimitry Andric 214406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 214506c3fb27SDimitry Andric static void checkEdge( 214606c3fb27SDimitry Andric const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) { 214706c3fb27SDimitry Andric // Confirm that alloc type is not None and that we have at least one context 214806c3fb27SDimitry Andric // id. 214906c3fb27SDimitry Andric assert(Edge->AllocTypes != (uint8_t)AllocationType::None); 215006c3fb27SDimitry Andric assert(!Edge->ContextIds.empty()); 215106c3fb27SDimitry Andric } 215206c3fb27SDimitry Andric 215306c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 215406c3fb27SDimitry Andric static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node, 215506c3fb27SDimitry Andric bool CheckEdges = true) { 215606c3fb27SDimitry Andric if (Node->isRemoved()) 215706c3fb27SDimitry Andric return; 215806c3fb27SDimitry Andric // Node's context ids should be the union of both its callee and caller edge 215906c3fb27SDimitry Andric // context ids. 216006c3fb27SDimitry Andric if (Node->CallerEdges.size()) { 216106c3fb27SDimitry Andric auto EI = Node->CallerEdges.begin(); 216206c3fb27SDimitry Andric auto &FirstEdge = *EI; 216306c3fb27SDimitry Andric EI++; 216406c3fb27SDimitry Andric DenseSet<uint32_t> CallerEdgeContextIds(FirstEdge->ContextIds); 216506c3fb27SDimitry Andric for (; EI != Node->CallerEdges.end(); EI++) { 216606c3fb27SDimitry Andric const auto &Edge = *EI; 216706c3fb27SDimitry Andric if (CheckEdges) 216806c3fb27SDimitry Andric checkEdge<DerivedCCG, FuncTy, CallTy>(Edge); 216906c3fb27SDimitry Andric set_union(CallerEdgeContextIds, Edge->ContextIds); 217006c3fb27SDimitry Andric } 217106c3fb27SDimitry Andric // Node can have more context ids than callers if some contexts terminate at 217206c3fb27SDimitry Andric // node and some are longer. 217306c3fb27SDimitry Andric assert(Node->ContextIds == CallerEdgeContextIds || 217406c3fb27SDimitry Andric set_is_subset(CallerEdgeContextIds, Node->ContextIds)); 217506c3fb27SDimitry Andric } 217606c3fb27SDimitry Andric if (Node->CalleeEdges.size()) { 217706c3fb27SDimitry Andric auto EI = Node->CalleeEdges.begin(); 217806c3fb27SDimitry Andric auto &FirstEdge = *EI; 217906c3fb27SDimitry Andric EI++; 218006c3fb27SDimitry Andric DenseSet<uint32_t> CalleeEdgeContextIds(FirstEdge->ContextIds); 218106c3fb27SDimitry Andric for (; EI != Node->CalleeEdges.end(); EI++) { 218206c3fb27SDimitry Andric const auto &Edge = *EI; 218306c3fb27SDimitry Andric if (CheckEdges) 218406c3fb27SDimitry Andric checkEdge<DerivedCCG, FuncTy, CallTy>(Edge); 218506c3fb27SDimitry Andric set_union(CalleeEdgeContextIds, Edge->ContextIds); 218606c3fb27SDimitry Andric } 218706c3fb27SDimitry Andric assert(Node->ContextIds == CalleeEdgeContextIds); 218806c3fb27SDimitry Andric } 218906c3fb27SDimitry Andric } 219006c3fb27SDimitry Andric 219106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 219206c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const { 219306c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 219406c3fb27SDimitry Andric for (const auto Node : nodes<GraphType>(this)) { 219506c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false); 219606c3fb27SDimitry Andric for (auto &Edge : Node->CallerEdges) 219706c3fb27SDimitry Andric checkEdge<DerivedCCG, FuncTy, CallTy>(Edge); 219806c3fb27SDimitry Andric } 219906c3fb27SDimitry Andric } 220006c3fb27SDimitry Andric 220106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 220206c3fb27SDimitry Andric struct GraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> { 220306c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 220406c3fb27SDimitry Andric using NodeRef = const ContextNode<DerivedCCG, FuncTy, CallTy> *; 220506c3fb27SDimitry Andric 220606c3fb27SDimitry Andric using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>; 220706c3fb27SDimitry Andric static NodeRef getNode(const NodePtrTy &P) { return P.get(); } 220806c3fb27SDimitry Andric 220906c3fb27SDimitry Andric using nodes_iterator = 221006c3fb27SDimitry Andric mapped_iterator<typename std::vector<NodePtrTy>::const_iterator, 221106c3fb27SDimitry Andric decltype(&getNode)>; 221206c3fb27SDimitry Andric 221306c3fb27SDimitry Andric static nodes_iterator nodes_begin(GraphType G) { 221406c3fb27SDimitry Andric return nodes_iterator(G->NodeOwner.begin(), &getNode); 221506c3fb27SDimitry Andric } 221606c3fb27SDimitry Andric 221706c3fb27SDimitry Andric static nodes_iterator nodes_end(GraphType G) { 221806c3fb27SDimitry Andric return nodes_iterator(G->NodeOwner.end(), &getNode); 221906c3fb27SDimitry Andric } 222006c3fb27SDimitry Andric 222106c3fb27SDimitry Andric static NodeRef getEntryNode(GraphType G) { 222206c3fb27SDimitry Andric return G->NodeOwner.begin()->get(); 222306c3fb27SDimitry Andric } 222406c3fb27SDimitry Andric 222506c3fb27SDimitry Andric using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>; 222606c3fb27SDimitry Andric static const ContextNode<DerivedCCG, FuncTy, CallTy> * 222706c3fb27SDimitry Andric GetCallee(const EdgePtrTy &P) { 222806c3fb27SDimitry Andric return P->Callee; 222906c3fb27SDimitry Andric } 223006c3fb27SDimitry Andric 223106c3fb27SDimitry Andric using ChildIteratorType = 223206c3fb27SDimitry Andric mapped_iterator<typename std::vector<std::shared_ptr<ContextEdge< 223306c3fb27SDimitry Andric DerivedCCG, FuncTy, CallTy>>>::const_iterator, 223406c3fb27SDimitry Andric decltype(&GetCallee)>; 223506c3fb27SDimitry Andric 223606c3fb27SDimitry Andric static ChildIteratorType child_begin(NodeRef N) { 223706c3fb27SDimitry Andric return ChildIteratorType(N->CalleeEdges.begin(), &GetCallee); 223806c3fb27SDimitry Andric } 223906c3fb27SDimitry Andric 224006c3fb27SDimitry Andric static ChildIteratorType child_end(NodeRef N) { 224106c3fb27SDimitry Andric return ChildIteratorType(N->CalleeEdges.end(), &GetCallee); 224206c3fb27SDimitry Andric } 224306c3fb27SDimitry Andric }; 224406c3fb27SDimitry Andric 224506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 224606c3fb27SDimitry Andric struct DOTGraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> 224706c3fb27SDimitry Andric : public DefaultDOTGraphTraits { 224806c3fb27SDimitry Andric DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} 224906c3fb27SDimitry Andric 225006c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 225106c3fb27SDimitry Andric using GTraits = GraphTraits<GraphType>; 225206c3fb27SDimitry Andric using NodeRef = typename GTraits::NodeRef; 225306c3fb27SDimitry Andric using ChildIteratorType = typename GTraits::ChildIteratorType; 225406c3fb27SDimitry Andric 225506c3fb27SDimitry Andric static std::string getNodeLabel(NodeRef Node, GraphType G) { 225606c3fb27SDimitry Andric std::string LabelString = 225706c3fb27SDimitry Andric (Twine("OrigId: ") + (Node->IsAllocation ? "Alloc" : "") + 225806c3fb27SDimitry Andric Twine(Node->OrigStackOrAllocId)) 225906c3fb27SDimitry Andric .str(); 226006c3fb27SDimitry Andric LabelString += "\n"; 226106c3fb27SDimitry Andric if (Node->hasCall()) { 226206c3fb27SDimitry Andric auto Func = G->NodeToCallingFunc.find(Node); 226306c3fb27SDimitry Andric assert(Func != G->NodeToCallingFunc.end()); 226406c3fb27SDimitry Andric LabelString += 226506c3fb27SDimitry Andric G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo()); 226606c3fb27SDimitry Andric } else { 226706c3fb27SDimitry Andric LabelString += "null call"; 226806c3fb27SDimitry Andric if (Node->Recursive) 226906c3fb27SDimitry Andric LabelString += " (recursive)"; 227006c3fb27SDimitry Andric else 227106c3fb27SDimitry Andric LabelString += " (external)"; 227206c3fb27SDimitry Andric } 227306c3fb27SDimitry Andric return LabelString; 227406c3fb27SDimitry Andric } 227506c3fb27SDimitry Andric 227606c3fb27SDimitry Andric static std::string getNodeAttributes(NodeRef Node, GraphType) { 227706c3fb27SDimitry Andric std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " + 227806c3fb27SDimitry Andric getContextIds(Node->ContextIds) + "\"") 227906c3fb27SDimitry Andric .str(); 228006c3fb27SDimitry Andric AttributeString += 228106c3fb27SDimitry Andric (Twine(",fillcolor=\"") + getColor(Node->AllocTypes) + "\"").str(); 228206c3fb27SDimitry Andric AttributeString += ",style=\"filled\""; 228306c3fb27SDimitry Andric if (Node->CloneOf) { 228406c3fb27SDimitry Andric AttributeString += ",color=\"blue\""; 228506c3fb27SDimitry Andric AttributeString += ",style=\"filled,bold,dashed\""; 228606c3fb27SDimitry Andric } else 228706c3fb27SDimitry Andric AttributeString += ",style=\"filled\""; 228806c3fb27SDimitry Andric return AttributeString; 228906c3fb27SDimitry Andric } 229006c3fb27SDimitry Andric 229106c3fb27SDimitry Andric static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, 229206c3fb27SDimitry Andric GraphType) { 229306c3fb27SDimitry Andric auto &Edge = *(ChildIter.getCurrent()); 229406c3fb27SDimitry Andric return (Twine("tooltip=\"") + getContextIds(Edge->ContextIds) + "\"" + 229506c3fb27SDimitry Andric Twine(",fillcolor=\"") + getColor(Edge->AllocTypes) + "\"") 229606c3fb27SDimitry Andric .str(); 229706c3fb27SDimitry Andric } 229806c3fb27SDimitry Andric 229906c3fb27SDimitry Andric // Since the NodeOwners list includes nodes that are no longer connected to 230006c3fb27SDimitry Andric // the graph, skip them here. 230106c3fb27SDimitry Andric static bool isNodeHidden(NodeRef Node, GraphType) { 230206c3fb27SDimitry Andric return Node->isRemoved(); 230306c3fb27SDimitry Andric } 230406c3fb27SDimitry Andric 230506c3fb27SDimitry Andric private: 230606c3fb27SDimitry Andric static std::string getContextIds(const DenseSet<uint32_t> &ContextIds) { 230706c3fb27SDimitry Andric std::string IdString = "ContextIds:"; 230806c3fb27SDimitry Andric if (ContextIds.size() < 100) { 230906c3fb27SDimitry Andric std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end()); 231006c3fb27SDimitry Andric std::sort(SortedIds.begin(), SortedIds.end()); 231106c3fb27SDimitry Andric for (auto Id : SortedIds) 231206c3fb27SDimitry Andric IdString += (" " + Twine(Id)).str(); 231306c3fb27SDimitry Andric } else { 231406c3fb27SDimitry Andric IdString += (" (" + Twine(ContextIds.size()) + " ids)").str(); 231506c3fb27SDimitry Andric } 231606c3fb27SDimitry Andric return IdString; 231706c3fb27SDimitry Andric } 231806c3fb27SDimitry Andric 231906c3fb27SDimitry Andric static std::string getColor(uint8_t AllocTypes) { 232006c3fb27SDimitry Andric if (AllocTypes == (uint8_t)AllocationType::NotCold) 232106c3fb27SDimitry Andric // Color "brown1" actually looks like a lighter red. 232206c3fb27SDimitry Andric return "brown1"; 232306c3fb27SDimitry Andric if (AllocTypes == (uint8_t)AllocationType::Cold) 232406c3fb27SDimitry Andric return "cyan"; 232506c3fb27SDimitry Andric if (AllocTypes == 232606c3fb27SDimitry Andric ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold)) 232706c3fb27SDimitry Andric // Lighter purple. 232806c3fb27SDimitry Andric return "mediumorchid1"; 232906c3fb27SDimitry Andric return "gray"; 233006c3fb27SDimitry Andric } 233106c3fb27SDimitry Andric 233206c3fb27SDimitry Andric static std::string getNodeId(NodeRef Node) { 233306c3fb27SDimitry Andric std::stringstream SStream; 233406c3fb27SDimitry Andric SStream << std::hex << "N0x" << (unsigned long long)Node; 233506c3fb27SDimitry Andric std::string Result = SStream.str(); 233606c3fb27SDimitry Andric return Result; 233706c3fb27SDimitry Andric } 233806c3fb27SDimitry Andric }; 233906c3fb27SDimitry Andric 234006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 234106c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot( 234206c3fb27SDimitry Andric std::string Label) const { 234306c3fb27SDimitry Andric WriteGraph(this, "", false, Label, 234406c3fb27SDimitry Andric DotFilePathPrefix + "ccg." + Label + ".dot"); 234506c3fb27SDimitry Andric } 234606c3fb27SDimitry Andric 234706c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 234806c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 234906c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone( 235006c3fb27SDimitry Andric const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI) { 235106c3fb27SDimitry Andric ContextNode *Node = Edge->Callee; 235206c3fb27SDimitry Andric NodeOwner.push_back( 235306c3fb27SDimitry Andric std::make_unique<ContextNode>(Node->IsAllocation, Node->Call)); 235406c3fb27SDimitry Andric ContextNode *Clone = NodeOwner.back().get(); 235506c3fb27SDimitry Andric Node->addClone(Clone); 235606c3fb27SDimitry Andric assert(NodeToCallingFunc.count(Node)); 235706c3fb27SDimitry Andric NodeToCallingFunc[Clone] = NodeToCallingFunc[Node]; 235806c3fb27SDimitry Andric moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true); 235906c3fb27SDimitry Andric return Clone; 236006c3fb27SDimitry Andric } 236106c3fb27SDimitry Andric 236206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 236306c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: 236406c3fb27SDimitry Andric moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge, 236506c3fb27SDimitry Andric ContextNode *NewCallee, EdgeIter *CallerEdgeI, 236606c3fb27SDimitry Andric bool NewClone) { 236706c3fb27SDimitry Andric // NewCallee and Edge's current callee must be clones of the same original 236806c3fb27SDimitry Andric // node (Edge's current callee may be the original node too). 236906c3fb27SDimitry Andric assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode()); 237006c3fb27SDimitry Andric auto &EdgeContextIds = Edge->getContextIds(); 237106c3fb27SDimitry Andric ContextNode *OldCallee = Edge->Callee; 237206c3fb27SDimitry Andric if (CallerEdgeI) 237306c3fb27SDimitry Andric *CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI); 237406c3fb27SDimitry Andric else 237506c3fb27SDimitry Andric OldCallee->eraseCallerEdge(Edge.get()); 237606c3fb27SDimitry Andric Edge->Callee = NewCallee; 237706c3fb27SDimitry Andric NewCallee->CallerEdges.push_back(Edge); 237806c3fb27SDimitry Andric // Don't need to update Edge's context ids since we are simply reconnecting 237906c3fb27SDimitry Andric // it. 238006c3fb27SDimitry Andric set_subtract(OldCallee->ContextIds, EdgeContextIds); 238106c3fb27SDimitry Andric NewCallee->ContextIds.insert(EdgeContextIds.begin(), EdgeContextIds.end()); 238206c3fb27SDimitry Andric NewCallee->AllocTypes |= Edge->AllocTypes; 238306c3fb27SDimitry Andric OldCallee->AllocTypes = computeAllocType(OldCallee->ContextIds); 238406c3fb27SDimitry Andric // OldCallee alloc type should be None iff its context id set is now empty. 238506c3fb27SDimitry Andric assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) == 238606c3fb27SDimitry Andric OldCallee->ContextIds.empty()); 238706c3fb27SDimitry Andric // Now walk the old callee node's callee edges and move Edge's context ids 238806c3fb27SDimitry Andric // over to the corresponding edge into the clone (which is created here if 238906c3fb27SDimitry Andric // this is a newly created clone). 239006c3fb27SDimitry Andric for (auto &OldCalleeEdge : OldCallee->CalleeEdges) { 239106c3fb27SDimitry Andric // The context ids moving to the new callee are the subset of this edge's 239206c3fb27SDimitry Andric // context ids and the context ids on the caller edge being moved. 239306c3fb27SDimitry Andric DenseSet<uint32_t> EdgeContextIdsToMove = 239406c3fb27SDimitry Andric set_intersection(OldCalleeEdge->getContextIds(), EdgeContextIds); 239506c3fb27SDimitry Andric set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove); 239606c3fb27SDimitry Andric OldCalleeEdge->AllocTypes = 239706c3fb27SDimitry Andric computeAllocType(OldCalleeEdge->getContextIds()); 239806c3fb27SDimitry Andric if (!NewClone) { 239906c3fb27SDimitry Andric // Update context ids / alloc type on corresponding edge to NewCallee. 240006c3fb27SDimitry Andric // There is a chance this may not exist if we are reusing an existing 240106c3fb27SDimitry Andric // clone, specifically during function assignment, where we would have 240206c3fb27SDimitry Andric // removed none type edges after creating the clone. If we can't find 240306c3fb27SDimitry Andric // a corresponding edge there, fall through to the cloning below. 240406c3fb27SDimitry Andric if (auto *NewCalleeEdge = 240506c3fb27SDimitry Andric NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) { 240606c3fb27SDimitry Andric NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.begin(), 240706c3fb27SDimitry Andric EdgeContextIdsToMove.end()); 240806c3fb27SDimitry Andric NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove); 240906c3fb27SDimitry Andric continue; 241006c3fb27SDimitry Andric } 241106c3fb27SDimitry Andric } 241206c3fb27SDimitry Andric auto NewEdge = std::make_shared<ContextEdge>( 241306c3fb27SDimitry Andric OldCalleeEdge->Callee, NewCallee, 241406c3fb27SDimitry Andric computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove); 241506c3fb27SDimitry Andric NewCallee->CalleeEdges.push_back(NewEdge); 241606c3fb27SDimitry Andric NewEdge->Callee->CallerEdges.push_back(NewEdge); 241706c3fb27SDimitry Andric } 241806c3fb27SDimitry Andric if (VerifyCCG) { 241906c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee, /*CheckEdges=*/false); 242006c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee, /*CheckEdges=*/false); 242106c3fb27SDimitry Andric for (const auto &OldCalleeEdge : OldCallee->CalleeEdges) 242206c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee, 242306c3fb27SDimitry Andric /*CheckEdges=*/false); 242406c3fb27SDimitry Andric for (const auto &NewCalleeEdge : NewCallee->CalleeEdges) 242506c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee, 242606c3fb27SDimitry Andric /*CheckEdges=*/false); 242706c3fb27SDimitry Andric } 242806c3fb27SDimitry Andric } 242906c3fb27SDimitry Andric 243006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 243106c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() { 243206c3fb27SDimitry Andric DenseSet<const ContextNode *> Visited; 243306c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) 243406c3fb27SDimitry Andric identifyClones(Entry.second, Visited); 243506c3fb27SDimitry Andric } 243606c3fb27SDimitry Andric 243706c3fb27SDimitry Andric // helper function to check an AllocType is cold or notcold or both. 243806c3fb27SDimitry Andric bool checkColdOrNotCold(uint8_t AllocType) { 243906c3fb27SDimitry Andric return (AllocType == (uint8_t)AllocationType::Cold) || 244006c3fb27SDimitry Andric (AllocType == (uint8_t)AllocationType::NotCold) || 244106c3fb27SDimitry Andric (AllocType == 244206c3fb27SDimitry Andric ((uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold)); 244306c3fb27SDimitry Andric } 244406c3fb27SDimitry Andric 244506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 244606c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones( 244706c3fb27SDimitry Andric ContextNode *Node, DenseSet<const ContextNode *> &Visited) { 244806c3fb27SDimitry Andric if (VerifyNodes) 244906c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node); 245006c3fb27SDimitry Andric assert(!Node->CloneOf); 245106c3fb27SDimitry Andric 245206c3fb27SDimitry Andric // If Node as a null call, then either it wasn't found in the module (regular 245306c3fb27SDimitry Andric // LTO) or summary index (ThinLTO), or there were other conditions blocking 245406c3fb27SDimitry Andric // cloning (e.g. recursion, calls multiple targets, etc). 245506c3fb27SDimitry Andric // Do this here so that we don't try to recursively clone callers below, which 245606c3fb27SDimitry Andric // isn't useful at least for this node. 245706c3fb27SDimitry Andric if (!Node->hasCall()) 245806c3fb27SDimitry Andric return; 245906c3fb27SDimitry Andric 246006c3fb27SDimitry Andric #ifndef NDEBUG 246106c3fb27SDimitry Andric auto Insert = 246206c3fb27SDimitry Andric #endif 246306c3fb27SDimitry Andric Visited.insert(Node); 246406c3fb27SDimitry Andric // We should not have visited this node yet. 246506c3fb27SDimitry Andric assert(Insert.second); 246606c3fb27SDimitry Andric // The recursive call to identifyClones may delete the current edge from the 246706c3fb27SDimitry Andric // CallerEdges vector. Make a copy and iterate on that, simpler than passing 246806c3fb27SDimitry Andric // in an iterator and having recursive call erase from it. Other edges may 246906c3fb27SDimitry Andric // also get removed during the recursion, which will have null Callee and 247006c3fb27SDimitry Andric // Caller pointers (and are deleted later), so we skip those below. 247106c3fb27SDimitry Andric { 247206c3fb27SDimitry Andric auto CallerEdges = Node->CallerEdges; 247306c3fb27SDimitry Andric for (auto &Edge : CallerEdges) { 247406c3fb27SDimitry Andric // Skip any that have been removed by an earlier recursive call. 247506c3fb27SDimitry Andric if (Edge->Callee == nullptr && Edge->Caller == nullptr) { 24765f757f3fSDimitry Andric assert(!llvm::count(Node->CallerEdges, Edge)); 247706c3fb27SDimitry Andric continue; 247806c3fb27SDimitry Andric } 247906c3fb27SDimitry Andric // Ignore any caller we previously visited via another edge. 248006c3fb27SDimitry Andric if (!Visited.count(Edge->Caller) && !Edge->Caller->CloneOf) { 248106c3fb27SDimitry Andric identifyClones(Edge->Caller, Visited); 248206c3fb27SDimitry Andric } 248306c3fb27SDimitry Andric } 248406c3fb27SDimitry Andric } 248506c3fb27SDimitry Andric 248606c3fb27SDimitry Andric // Check if we reached an unambiguous call or have have only a single caller. 248706c3fb27SDimitry Andric if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1) 248806c3fb27SDimitry Andric return; 248906c3fb27SDimitry Andric 249006c3fb27SDimitry Andric // We need to clone. 249106c3fb27SDimitry Andric 249206c3fb27SDimitry Andric // Try to keep the original version as alloc type NotCold. This will make 249306c3fb27SDimitry Andric // cases with indirect calls or any other situation with an unknown call to 249406c3fb27SDimitry Andric // the original function get the default behavior. We do this by sorting the 249506c3fb27SDimitry Andric // CallerEdges of the Node we will clone by alloc type. 249606c3fb27SDimitry Andric // 249706c3fb27SDimitry Andric // Give NotCold edge the lowest sort priority so those edges are at the end of 249806c3fb27SDimitry Andric // the caller edges vector, and stay on the original version (since the below 249906c3fb27SDimitry Andric // code clones greedily until it finds all remaining edges have the same type 250006c3fb27SDimitry Andric // and leaves the remaining ones on the original Node). 250106c3fb27SDimitry Andric // 250206c3fb27SDimitry Andric // We shouldn't actually have any None type edges, so the sorting priority for 250306c3fb27SDimitry Andric // that is arbitrary, and we assert in that case below. 250406c3fb27SDimitry Andric const unsigned AllocTypeCloningPriority[] = {/*None*/ 3, /*NotCold*/ 4, 250506c3fb27SDimitry Andric /*Cold*/ 1, 250606c3fb27SDimitry Andric /*NotColdCold*/ 2}; 250706c3fb27SDimitry Andric std::stable_sort(Node->CallerEdges.begin(), Node->CallerEdges.end(), 250806c3fb27SDimitry Andric [&](const std::shared_ptr<ContextEdge> &A, 250906c3fb27SDimitry Andric const std::shared_ptr<ContextEdge> &B) { 251006c3fb27SDimitry Andric assert(checkColdOrNotCold(A->AllocTypes) && 251106c3fb27SDimitry Andric checkColdOrNotCold(B->AllocTypes)); 251206c3fb27SDimitry Andric 251306c3fb27SDimitry Andric if (A->AllocTypes == B->AllocTypes) 251406c3fb27SDimitry Andric // Use the first context id for each edge as a 251506c3fb27SDimitry Andric // tie-breaker. 251606c3fb27SDimitry Andric return *A->ContextIds.begin() < *B->ContextIds.begin(); 251706c3fb27SDimitry Andric return AllocTypeCloningPriority[A->AllocTypes] < 251806c3fb27SDimitry Andric AllocTypeCloningPriority[B->AllocTypes]; 251906c3fb27SDimitry Andric }); 252006c3fb27SDimitry Andric 252106c3fb27SDimitry Andric assert(Node->AllocTypes != (uint8_t)AllocationType::None); 252206c3fb27SDimitry Andric 252306c3fb27SDimitry Andric // Iterate until we find no more opportunities for disambiguating the alloc 252406c3fb27SDimitry Andric // types via cloning. In most cases this loop will terminate once the Node 252506c3fb27SDimitry Andric // has a single allocation type, in which case no more cloning is needed. 252606c3fb27SDimitry Andric // We need to be able to remove Edge from CallerEdges, so need to adjust 252706c3fb27SDimitry Andric // iterator inside the loop. 252806c3fb27SDimitry Andric for (auto EI = Node->CallerEdges.begin(); EI != Node->CallerEdges.end();) { 252906c3fb27SDimitry Andric auto CallerEdge = *EI; 253006c3fb27SDimitry Andric 253106c3fb27SDimitry Andric // See if cloning the prior caller edge left this node with a single alloc 253206c3fb27SDimitry Andric // type or a single caller. In that case no more cloning of Node is needed. 253306c3fb27SDimitry Andric if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1) 253406c3fb27SDimitry Andric break; 253506c3fb27SDimitry Andric 253606c3fb27SDimitry Andric // Compute the node callee edge alloc types corresponding to the context ids 253706c3fb27SDimitry Andric // for this caller edge. 253806c3fb27SDimitry Andric std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge; 253906c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge.reserve(Node->CalleeEdges.size()); 254006c3fb27SDimitry Andric for (auto &CalleeEdge : Node->CalleeEdges) 254106c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes( 254206c3fb27SDimitry Andric CalleeEdge->getContextIds(), CallerEdge->getContextIds())); 254306c3fb27SDimitry Andric 254406c3fb27SDimitry Andric // Don't clone if doing so will not disambiguate any alloc types amongst 254506c3fb27SDimitry Andric // caller edges (including the callee edges that would be cloned). 254606c3fb27SDimitry Andric // Otherwise we will simply move all edges to the clone. 254706c3fb27SDimitry Andric // 254806c3fb27SDimitry Andric // First check if by cloning we will disambiguate the caller allocation 254906c3fb27SDimitry Andric // type from node's allocation type. Query allocTypeToUse so that we don't 255006c3fb27SDimitry Andric // bother cloning to distinguish NotCold+Cold from NotCold. Note that 255106c3fb27SDimitry Andric // neither of these should be None type. 255206c3fb27SDimitry Andric // 255306c3fb27SDimitry Andric // Then check if by cloning node at least one of the callee edges will be 255406c3fb27SDimitry Andric // disambiguated by splitting out different context ids. 255506c3fb27SDimitry Andric assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None); 255606c3fb27SDimitry Andric assert(Node->AllocTypes != (uint8_t)AllocationType::None); 255706c3fb27SDimitry Andric if (allocTypeToUse(CallerEdge->AllocTypes) == 255806c3fb27SDimitry Andric allocTypeToUse(Node->AllocTypes) && 255906c3fb27SDimitry Andric allocTypesMatch<DerivedCCG, FuncTy, CallTy>( 256006c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) { 256106c3fb27SDimitry Andric ++EI; 256206c3fb27SDimitry Andric continue; 256306c3fb27SDimitry Andric } 256406c3fb27SDimitry Andric 256506c3fb27SDimitry Andric // First see if we can use an existing clone. Check each clone and its 256606c3fb27SDimitry Andric // callee edges for matching alloc types. 256706c3fb27SDimitry Andric ContextNode *Clone = nullptr; 256806c3fb27SDimitry Andric for (auto *CurClone : Node->Clones) { 256906c3fb27SDimitry Andric if (allocTypeToUse(CurClone->AllocTypes) != 257006c3fb27SDimitry Andric allocTypeToUse(CallerEdge->AllocTypes)) 257106c3fb27SDimitry Andric continue; 257206c3fb27SDimitry Andric 257306c3fb27SDimitry Andric if (!allocTypesMatch<DerivedCCG, FuncTy, CallTy>( 257406c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge, CurClone->CalleeEdges)) 257506c3fb27SDimitry Andric continue; 257606c3fb27SDimitry Andric Clone = CurClone; 257706c3fb27SDimitry Andric break; 257806c3fb27SDimitry Andric } 257906c3fb27SDimitry Andric 258006c3fb27SDimitry Andric // The edge iterator is adjusted when we move the CallerEdge to the clone. 258106c3fb27SDimitry Andric if (Clone) 258206c3fb27SDimitry Andric moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI); 258306c3fb27SDimitry Andric else 258406c3fb27SDimitry Andric Clone = moveEdgeToNewCalleeClone(CallerEdge, &EI); 258506c3fb27SDimitry Andric 258606c3fb27SDimitry Andric assert(EI == Node->CallerEdges.end() || 258706c3fb27SDimitry Andric Node->AllocTypes != (uint8_t)AllocationType::None); 258806c3fb27SDimitry Andric // Sanity check that no alloc types on clone or its edges are None. 258906c3fb27SDimitry Andric assert(Clone->AllocTypes != (uint8_t)AllocationType::None); 259006c3fb27SDimitry Andric assert(llvm::none_of( 259106c3fb27SDimitry Andric Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) { 259206c3fb27SDimitry Andric return E->AllocTypes == (uint8_t)AllocationType::None; 259306c3fb27SDimitry Andric })); 259406c3fb27SDimitry Andric } 259506c3fb27SDimitry Andric 259606c3fb27SDimitry Andric // Cloning may have resulted in some cloned callee edges with type None, 259706c3fb27SDimitry Andric // because they aren't carrying any contexts. Remove those edges. 259806c3fb27SDimitry Andric for (auto *Clone : Node->Clones) { 259906c3fb27SDimitry Andric removeNoneTypeCalleeEdges(Clone); 260006c3fb27SDimitry Andric if (VerifyNodes) 260106c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Clone); 260206c3fb27SDimitry Andric } 260306c3fb27SDimitry Andric // We should still have some context ids on the original Node. 260406c3fb27SDimitry Andric assert(!Node->ContextIds.empty()); 260506c3fb27SDimitry Andric 260606c3fb27SDimitry Andric // Remove any callee edges that ended up with alloc type None after creating 260706c3fb27SDimitry Andric // clones and updating callee edges. 260806c3fb27SDimitry Andric removeNoneTypeCalleeEdges(Node); 260906c3fb27SDimitry Andric 261006c3fb27SDimitry Andric // Sanity check that no alloc types on node or edges are None. 261106c3fb27SDimitry Andric assert(Node->AllocTypes != (uint8_t)AllocationType::None); 261206c3fb27SDimitry Andric assert(llvm::none_of(Node->CalleeEdges, 261306c3fb27SDimitry Andric [&](const std::shared_ptr<ContextEdge> &E) { 261406c3fb27SDimitry Andric return E->AllocTypes == (uint8_t)AllocationType::None; 261506c3fb27SDimitry Andric })); 261606c3fb27SDimitry Andric assert(llvm::none_of(Node->CallerEdges, 261706c3fb27SDimitry Andric [&](const std::shared_ptr<ContextEdge> &E) { 261806c3fb27SDimitry Andric return E->AllocTypes == (uint8_t)AllocationType::None; 261906c3fb27SDimitry Andric })); 262006c3fb27SDimitry Andric 262106c3fb27SDimitry Andric if (VerifyNodes) 262206c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node); 262306c3fb27SDimitry Andric } 262406c3fb27SDimitry Andric 262506c3fb27SDimitry Andric void ModuleCallsiteContextGraph::updateAllocationCall( 262606c3fb27SDimitry Andric CallInfo &Call, AllocationType AllocType) { 262706c3fb27SDimitry Andric std::string AllocTypeString = getAllocTypeAttributeString(AllocType); 262806c3fb27SDimitry Andric auto A = llvm::Attribute::get(Call.call()->getFunction()->getContext(), 262906c3fb27SDimitry Andric "memprof", AllocTypeString); 263006c3fb27SDimitry Andric cast<CallBase>(Call.call())->addFnAttr(A); 263106c3fb27SDimitry Andric OREGetter(Call.call()->getFunction()) 263206c3fb27SDimitry Andric .emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", Call.call()) 263306c3fb27SDimitry Andric << ore::NV("AllocationCall", Call.call()) << " in clone " 263406c3fb27SDimitry Andric << ore::NV("Caller", Call.call()->getFunction()) 263506c3fb27SDimitry Andric << " marked with memprof allocation attribute " 263606c3fb27SDimitry Andric << ore::NV("Attribute", AllocTypeString)); 263706c3fb27SDimitry Andric } 263806c3fb27SDimitry Andric 263906c3fb27SDimitry Andric void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &Call, 264006c3fb27SDimitry Andric AllocationType AllocType) { 264106c3fb27SDimitry Andric auto *AI = Call.call().dyn_cast<AllocInfo *>(); 264206c3fb27SDimitry Andric assert(AI); 264306c3fb27SDimitry Andric assert(AI->Versions.size() > Call.cloneNo()); 264406c3fb27SDimitry Andric AI->Versions[Call.cloneNo()] = (uint8_t)AllocType; 264506c3fb27SDimitry Andric } 264606c3fb27SDimitry Andric 264706c3fb27SDimitry Andric void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall, 264806c3fb27SDimitry Andric FuncInfo CalleeFunc) { 264906c3fb27SDimitry Andric if (CalleeFunc.cloneNo() > 0) 265006c3fb27SDimitry Andric cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func()); 265106c3fb27SDimitry Andric OREGetter(CallerCall.call()->getFunction()) 265206c3fb27SDimitry Andric .emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call()) 265306c3fb27SDimitry Andric << ore::NV("Call", CallerCall.call()) << " in clone " 265406c3fb27SDimitry Andric << ore::NV("Caller", CallerCall.call()->getFunction()) 265506c3fb27SDimitry Andric << " assigned to call function clone " 265606c3fb27SDimitry Andric << ore::NV("Callee", CalleeFunc.func())); 265706c3fb27SDimitry Andric } 265806c3fb27SDimitry Andric 265906c3fb27SDimitry Andric void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall, 266006c3fb27SDimitry Andric FuncInfo CalleeFunc) { 266106c3fb27SDimitry Andric auto *CI = CallerCall.call().dyn_cast<CallsiteInfo *>(); 266206c3fb27SDimitry Andric assert(CI && 266306c3fb27SDimitry Andric "Caller cannot be an allocation which should not have profiled calls"); 266406c3fb27SDimitry Andric assert(CI->Clones.size() > CallerCall.cloneNo()); 266506c3fb27SDimitry Andric CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo(); 266606c3fb27SDimitry Andric } 266706c3fb27SDimitry Andric 266806c3fb27SDimitry Andric CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 266906c3fb27SDimitry Andric Instruction *>::FuncInfo 267006c3fb27SDimitry Andric ModuleCallsiteContextGraph::cloneFunctionForCallsite( 267106c3fb27SDimitry Andric FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap, 267206c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) { 267306c3fb27SDimitry Andric // Use existing LLVM facilities for cloning and obtaining Call in clone 267406c3fb27SDimitry Andric ValueToValueMapTy VMap; 267506c3fb27SDimitry Andric auto *NewFunc = CloneFunction(Func.func(), VMap); 267606c3fb27SDimitry Andric std::string Name = getMemProfFuncName(Func.func()->getName(), CloneNo); 267706c3fb27SDimitry Andric assert(!Func.func()->getParent()->getFunction(Name)); 267806c3fb27SDimitry Andric NewFunc->setName(Name); 267906c3fb27SDimitry Andric for (auto &Inst : CallsWithMetadataInFunc) { 268006c3fb27SDimitry Andric // This map always has the initial version in it. 268106c3fb27SDimitry Andric assert(Inst.cloneNo() == 0); 268206c3fb27SDimitry Andric CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo}; 268306c3fb27SDimitry Andric } 268406c3fb27SDimitry Andric OREGetter(Func.func()) 268506c3fb27SDimitry Andric .emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", Func.func()) 268606c3fb27SDimitry Andric << "created clone " << ore::NV("NewFunction", NewFunc)); 268706c3fb27SDimitry Andric return {NewFunc, CloneNo}; 268806c3fb27SDimitry Andric } 268906c3fb27SDimitry Andric 269006c3fb27SDimitry Andric CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 269106c3fb27SDimitry Andric IndexCall>::FuncInfo 269206c3fb27SDimitry Andric IndexCallsiteContextGraph::cloneFunctionForCallsite( 269306c3fb27SDimitry Andric FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap, 269406c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) { 269506c3fb27SDimitry Andric // Check how many clones we have of Call (and therefore function). 269606c3fb27SDimitry Andric // The next clone number is the current size of versions array. 269706c3fb27SDimitry Andric // Confirm this matches the CloneNo provided by the caller, which is based on 269806c3fb27SDimitry Andric // the number of function clones we have. 269906c3fb27SDimitry Andric assert(CloneNo == 270006c3fb27SDimitry Andric (Call.call().is<AllocInfo *>() 270106c3fb27SDimitry Andric ? Call.call().dyn_cast<AllocInfo *>()->Versions.size() 270206c3fb27SDimitry Andric : Call.call().dyn_cast<CallsiteInfo *>()->Clones.size())); 270306c3fb27SDimitry Andric // Walk all the instructions in this function. Create a new version for 270406c3fb27SDimitry Andric // each (by adding an entry to the Versions/Clones summary array), and copy 270506c3fb27SDimitry Andric // over the version being called for the function clone being cloned here. 270606c3fb27SDimitry Andric // Additionally, add an entry to the CallMap for the new function clone, 270706c3fb27SDimitry Andric // mapping the original call (clone 0, what is in CallsWithMetadataInFunc) 270806c3fb27SDimitry Andric // to the new call clone. 270906c3fb27SDimitry Andric for (auto &Inst : CallsWithMetadataInFunc) { 271006c3fb27SDimitry Andric // This map always has the initial version in it. 271106c3fb27SDimitry Andric assert(Inst.cloneNo() == 0); 271206c3fb27SDimitry Andric if (auto *AI = Inst.call().dyn_cast<AllocInfo *>()) { 271306c3fb27SDimitry Andric assert(AI->Versions.size() == CloneNo); 271406c3fb27SDimitry Andric // We assign the allocation type later (in updateAllocationCall), just add 271506c3fb27SDimitry Andric // an entry for it here. 271606c3fb27SDimitry Andric AI->Versions.push_back(0); 271706c3fb27SDimitry Andric } else { 271806c3fb27SDimitry Andric auto *CI = Inst.call().dyn_cast<CallsiteInfo *>(); 271906c3fb27SDimitry Andric assert(CI && CI->Clones.size() == CloneNo); 272006c3fb27SDimitry Andric // We assign the clone number later (in updateCall), just add an entry for 272106c3fb27SDimitry Andric // it here. 272206c3fb27SDimitry Andric CI->Clones.push_back(0); 272306c3fb27SDimitry Andric } 272406c3fb27SDimitry Andric CallMap[Inst] = {Inst.call(), CloneNo}; 272506c3fb27SDimitry Andric } 272606c3fb27SDimitry Andric return {Func.func(), CloneNo}; 272706c3fb27SDimitry Andric } 272806c3fb27SDimitry Andric 272906c3fb27SDimitry Andric // This method assigns cloned callsites to functions, cloning the functions as 273006c3fb27SDimitry Andric // needed. The assignment is greedy and proceeds roughly as follows: 273106c3fb27SDimitry Andric // 273206c3fb27SDimitry Andric // For each function Func: 273306c3fb27SDimitry Andric // For each call with graph Node having clones: 273406c3fb27SDimitry Andric // Initialize ClonesWorklist to Node and its clones 273506c3fb27SDimitry Andric // Initialize NodeCloneCount to 0 273606c3fb27SDimitry Andric // While ClonesWorklist is not empty: 273706c3fb27SDimitry Andric // Clone = pop front ClonesWorklist 273806c3fb27SDimitry Andric // NodeCloneCount++ 273906c3fb27SDimitry Andric // If Func has been cloned less than NodeCloneCount times: 274006c3fb27SDimitry Andric // If NodeCloneCount is 1: 274106c3fb27SDimitry Andric // Assign Clone to original Func 274206c3fb27SDimitry Andric // Continue 274306c3fb27SDimitry Andric // Create a new function clone 274406c3fb27SDimitry Andric // If other callers not assigned to call a function clone yet: 274506c3fb27SDimitry Andric // Assign them to call new function clone 274606c3fb27SDimitry Andric // Continue 274706c3fb27SDimitry Andric // Assign any other caller calling the cloned version to new clone 274806c3fb27SDimitry Andric // 274906c3fb27SDimitry Andric // For each caller of Clone: 275006c3fb27SDimitry Andric // If caller is assigned to call a specific function clone: 275106c3fb27SDimitry Andric // If we cannot assign Clone to that function clone: 275206c3fb27SDimitry Andric // Create new callsite Clone NewClone 275306c3fb27SDimitry Andric // Add NewClone to ClonesWorklist 275406c3fb27SDimitry Andric // Continue 275506c3fb27SDimitry Andric // Assign Clone to existing caller's called function clone 275606c3fb27SDimitry Andric // Else: 275706c3fb27SDimitry Andric // If Clone not already assigned to a function clone: 275806c3fb27SDimitry Andric // Assign to first function clone without assignment 275906c3fb27SDimitry Andric // Assign caller to selected function clone 276006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 276106c3fb27SDimitry Andric bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { 276206c3fb27SDimitry Andric bool Changed = false; 276306c3fb27SDimitry Andric 276406c3fb27SDimitry Andric // Keep track of the assignment of nodes (callsites) to function clones they 276506c3fb27SDimitry Andric // call. 276606c3fb27SDimitry Andric DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap; 276706c3fb27SDimitry Andric 276806c3fb27SDimitry Andric // Update caller node to call function version CalleeFunc, by recording the 276906c3fb27SDimitry Andric // assignment in CallsiteToCalleeFuncCloneMap. 277006c3fb27SDimitry Andric auto RecordCalleeFuncOfCallsite = [&](ContextNode *Caller, 277106c3fb27SDimitry Andric const FuncInfo &CalleeFunc) { 277206c3fb27SDimitry Andric assert(Caller->hasCall()); 277306c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[Caller] = CalleeFunc; 277406c3fb27SDimitry Andric }; 277506c3fb27SDimitry Andric 277606c3fb27SDimitry Andric // Walk all functions for which we saw calls with memprof metadata, and handle 277706c3fb27SDimitry Andric // cloning for each of its calls. 277806c3fb27SDimitry Andric for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) { 277906c3fb27SDimitry Andric FuncInfo OrigFunc(Func); 278006c3fb27SDimitry Andric // Map from each clone of OrigFunc to a map of remappings of each call of 278106c3fb27SDimitry Andric // interest (from original uncloned call to the corresponding cloned call in 278206c3fb27SDimitry Andric // that function clone). 278306c3fb27SDimitry Andric std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap; 278406c3fb27SDimitry Andric for (auto &Call : CallsWithMetadata) { 278506c3fb27SDimitry Andric ContextNode *Node = getNodeForInst(Call); 278606c3fb27SDimitry Andric // Skip call if we do not have a node for it (all uses of its stack ids 278706c3fb27SDimitry Andric // were either on inlined chains or pruned from the MIBs), or if we did 278806c3fb27SDimitry Andric // not create any clones for it. 278906c3fb27SDimitry Andric if (!Node || Node->Clones.empty()) 279006c3fb27SDimitry Andric continue; 279106c3fb27SDimitry Andric assert(Node->hasCall() && 279206c3fb27SDimitry Andric "Not having a call should have prevented cloning"); 279306c3fb27SDimitry Andric 279406c3fb27SDimitry Andric // Track the assignment of function clones to clones of the current 279506c3fb27SDimitry Andric // callsite Node being handled. 279606c3fb27SDimitry Andric std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap; 279706c3fb27SDimitry Andric 279806c3fb27SDimitry Andric // Assign callsite version CallsiteClone to function version FuncClone, 279906c3fb27SDimitry Andric // and also assign (possibly cloned) Call to CallsiteClone. 280006c3fb27SDimitry Andric auto AssignCallsiteCloneToFuncClone = [&](const FuncInfo &FuncClone, 280106c3fb27SDimitry Andric CallInfo &Call, 280206c3fb27SDimitry Andric ContextNode *CallsiteClone, 280306c3fb27SDimitry Andric bool IsAlloc) { 280406c3fb27SDimitry Andric // Record the clone of callsite node assigned to this function clone. 280506c3fb27SDimitry Andric FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone; 280606c3fb27SDimitry Andric 280706c3fb27SDimitry Andric assert(FuncClonesToCallMap.count(FuncClone)); 280806c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone]; 280906c3fb27SDimitry Andric CallInfo CallClone(Call); 281006c3fb27SDimitry Andric if (CallMap.count(Call)) 281106c3fb27SDimitry Andric CallClone = CallMap[Call]; 281206c3fb27SDimitry Andric CallsiteClone->setCall(CallClone); 281306c3fb27SDimitry Andric }; 281406c3fb27SDimitry Andric 281506c3fb27SDimitry Andric // Keep track of the clones of callsite Node that need to be assigned to 281606c3fb27SDimitry Andric // function clones. This list may be expanded in the loop body below if we 281706c3fb27SDimitry Andric // find additional cloning is required. 281806c3fb27SDimitry Andric std::deque<ContextNode *> ClonesWorklist; 281906c3fb27SDimitry Andric // Ignore original Node if we moved all of its contexts to clones. 282006c3fb27SDimitry Andric if (!Node->ContextIds.empty()) 282106c3fb27SDimitry Andric ClonesWorklist.push_back(Node); 282206c3fb27SDimitry Andric ClonesWorklist.insert(ClonesWorklist.end(), Node->Clones.begin(), 282306c3fb27SDimitry Andric Node->Clones.end()); 282406c3fb27SDimitry Andric 282506c3fb27SDimitry Andric // Now walk through all of the clones of this callsite Node that we need, 282606c3fb27SDimitry Andric // and determine the assignment to a corresponding clone of the current 282706c3fb27SDimitry Andric // function (creating new function clones as needed). 282806c3fb27SDimitry Andric unsigned NodeCloneCount = 0; 282906c3fb27SDimitry Andric while (!ClonesWorklist.empty()) { 283006c3fb27SDimitry Andric ContextNode *Clone = ClonesWorklist.front(); 283106c3fb27SDimitry Andric ClonesWorklist.pop_front(); 283206c3fb27SDimitry Andric NodeCloneCount++; 283306c3fb27SDimitry Andric if (VerifyNodes) 283406c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Clone); 283506c3fb27SDimitry Andric 283606c3fb27SDimitry Andric // Need to create a new function clone if we have more callsite clones 283706c3fb27SDimitry Andric // than existing function clones, which would have been assigned to an 283806c3fb27SDimitry Andric // earlier clone in the list (we assign callsite clones to function 283906c3fb27SDimitry Andric // clones greedily). 284006c3fb27SDimitry Andric if (FuncClonesToCallMap.size() < NodeCloneCount) { 284106c3fb27SDimitry Andric // If this is the first callsite copy, assign to original function. 284206c3fb27SDimitry Andric if (NodeCloneCount == 1) { 284306c3fb27SDimitry Andric // Since FuncClonesToCallMap is empty in this case, no clones have 284406c3fb27SDimitry Andric // been created for this function yet, and no callers should have 284506c3fb27SDimitry Andric // been assigned a function clone for this callee node yet. 284606c3fb27SDimitry Andric assert(llvm::none_of( 284706c3fb27SDimitry Andric Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) { 284806c3fb27SDimitry Andric return CallsiteToCalleeFuncCloneMap.count(E->Caller); 284906c3fb27SDimitry Andric })); 285006c3fb27SDimitry Andric // Initialize with empty call map, assign Clone to original function 285106c3fb27SDimitry Andric // and its callers, and skip to the next clone. 285206c3fb27SDimitry Andric FuncClonesToCallMap[OrigFunc] = {}; 285306c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 285406c3fb27SDimitry Andric OrigFunc, Call, Clone, 285506c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 285606c3fb27SDimitry Andric for (auto &CE : Clone->CallerEdges) { 285706c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 285806c3fb27SDimitry Andric if (!CE->Caller->hasCall()) 285906c3fb27SDimitry Andric continue; 286006c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(CE->Caller, OrigFunc); 286106c3fb27SDimitry Andric } 286206c3fb27SDimitry Andric continue; 286306c3fb27SDimitry Andric } 286406c3fb27SDimitry Andric 286506c3fb27SDimitry Andric // First locate which copy of OrigFunc to clone again. If a caller 286606c3fb27SDimitry Andric // of this callsite clone was already assigned to call a particular 286706c3fb27SDimitry Andric // function clone, we need to redirect all of those callers to the 286806c3fb27SDimitry Andric // new function clone, and update their other callees within this 286906c3fb27SDimitry Andric // function. 287006c3fb27SDimitry Andric FuncInfo PreviousAssignedFuncClone; 287106c3fb27SDimitry Andric auto EI = llvm::find_if( 287206c3fb27SDimitry Andric Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) { 287306c3fb27SDimitry Andric return CallsiteToCalleeFuncCloneMap.count(E->Caller); 287406c3fb27SDimitry Andric }); 287506c3fb27SDimitry Andric bool CallerAssignedToCloneOfFunc = false; 287606c3fb27SDimitry Andric if (EI != Clone->CallerEdges.end()) { 287706c3fb27SDimitry Andric const std::shared_ptr<ContextEdge> &Edge = *EI; 287806c3fb27SDimitry Andric PreviousAssignedFuncClone = 287906c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[Edge->Caller]; 288006c3fb27SDimitry Andric CallerAssignedToCloneOfFunc = true; 288106c3fb27SDimitry Andric } 288206c3fb27SDimitry Andric 288306c3fb27SDimitry Andric // Clone function and save it along with the CallInfo map created 288406c3fb27SDimitry Andric // during cloning in the FuncClonesToCallMap. 288506c3fb27SDimitry Andric std::map<CallInfo, CallInfo> NewCallMap; 288606c3fb27SDimitry Andric unsigned CloneNo = FuncClonesToCallMap.size(); 288706c3fb27SDimitry Andric assert(CloneNo > 0 && "Clone 0 is the original function, which " 288806c3fb27SDimitry Andric "should already exist in the map"); 288906c3fb27SDimitry Andric FuncInfo NewFuncClone = cloneFunctionForCallsite( 289006c3fb27SDimitry Andric OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo); 289106c3fb27SDimitry Andric FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap)); 289206c3fb27SDimitry Andric FunctionClonesAnalysis++; 289306c3fb27SDimitry Andric Changed = true; 289406c3fb27SDimitry Andric 289506c3fb27SDimitry Andric // If no caller callsites were already assigned to a clone of this 289606c3fb27SDimitry Andric // function, we can simply assign this clone to the new func clone 289706c3fb27SDimitry Andric // and update all callers to it, then skip to the next clone. 289806c3fb27SDimitry Andric if (!CallerAssignedToCloneOfFunc) { 289906c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 290006c3fb27SDimitry Andric NewFuncClone, Call, Clone, 290106c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 290206c3fb27SDimitry Andric for (auto &CE : Clone->CallerEdges) { 290306c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 290406c3fb27SDimitry Andric if (!CE->Caller->hasCall()) 290506c3fb27SDimitry Andric continue; 290606c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone); 290706c3fb27SDimitry Andric } 290806c3fb27SDimitry Andric continue; 290906c3fb27SDimitry Andric } 291006c3fb27SDimitry Andric 291106c3fb27SDimitry Andric // We may need to do additional node cloning in this case. 291206c3fb27SDimitry Andric // Reset the CallsiteToCalleeFuncCloneMap entry for any callers 291306c3fb27SDimitry Andric // that were previously assigned to call PreviousAssignedFuncClone, 291406c3fb27SDimitry Andric // to record that they now call NewFuncClone. 291506c3fb27SDimitry Andric for (auto CE : Clone->CallerEdges) { 2916297eecfbSDimitry Andric // Skip any that have been removed on an earlier iteration. 2917297eecfbSDimitry Andric if (!CE) 2918297eecfbSDimitry Andric continue; 291906c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 292006c3fb27SDimitry Andric if (!CE->Caller->hasCall()) 292106c3fb27SDimitry Andric continue; 292206c3fb27SDimitry Andric 292306c3fb27SDimitry Andric if (!CallsiteToCalleeFuncCloneMap.count(CE->Caller) || 292406c3fb27SDimitry Andric // We subsequently fall through to later handling that 292506c3fb27SDimitry Andric // will perform any additional cloning required for 292606c3fb27SDimitry Andric // callers that were calling other function clones. 292706c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[CE->Caller] != 292806c3fb27SDimitry Andric PreviousAssignedFuncClone) 292906c3fb27SDimitry Andric continue; 293006c3fb27SDimitry Andric 293106c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone); 293206c3fb27SDimitry Andric 293306c3fb27SDimitry Andric // If we are cloning a function that was already assigned to some 293406c3fb27SDimitry Andric // callers, then essentially we are creating new callsite clones 293506c3fb27SDimitry Andric // of the other callsites in that function that are reached by those 293606c3fb27SDimitry Andric // callers. Clone the other callees of the current callsite's caller 293706c3fb27SDimitry Andric // that were already assigned to PreviousAssignedFuncClone 293806c3fb27SDimitry Andric // accordingly. This is important since we subsequently update the 293906c3fb27SDimitry Andric // calls from the nodes in the graph and their assignments to callee 294006c3fb27SDimitry Andric // functions recorded in CallsiteToCalleeFuncCloneMap. 294106c3fb27SDimitry Andric for (auto CalleeEdge : CE->Caller->CalleeEdges) { 294206c3fb27SDimitry Andric // Skip any that have been removed on an earlier iteration when 294306c3fb27SDimitry Andric // cleaning up newly None type callee edges. 294406c3fb27SDimitry Andric if (!CalleeEdge) 294506c3fb27SDimitry Andric continue; 294606c3fb27SDimitry Andric ContextNode *Callee = CalleeEdge->Callee; 294706c3fb27SDimitry Andric // Skip the current callsite, we are looking for other 294806c3fb27SDimitry Andric // callsites Caller calls, as well as any that does not have a 294906c3fb27SDimitry Andric // recorded callsite Call. 295006c3fb27SDimitry Andric if (Callee == Clone || !Callee->hasCall()) 295106c3fb27SDimitry Andric continue; 295206c3fb27SDimitry Andric ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge); 295306c3fb27SDimitry Andric removeNoneTypeCalleeEdges(NewClone); 295406c3fb27SDimitry Andric // Moving the edge may have resulted in some none type 295506c3fb27SDimitry Andric // callee edges on the original Callee. 295606c3fb27SDimitry Andric removeNoneTypeCalleeEdges(Callee); 295706c3fb27SDimitry Andric assert(NewClone->AllocTypes != (uint8_t)AllocationType::None); 295806c3fb27SDimitry Andric // If the Callee node was already assigned to call a specific 295906c3fb27SDimitry Andric // function version, make sure its new clone is assigned to call 296006c3fb27SDimitry Andric // that same function clone. 296106c3fb27SDimitry Andric if (CallsiteToCalleeFuncCloneMap.count(Callee)) 296206c3fb27SDimitry Andric RecordCalleeFuncOfCallsite( 296306c3fb27SDimitry Andric NewClone, CallsiteToCalleeFuncCloneMap[Callee]); 296406c3fb27SDimitry Andric // Update NewClone with the new Call clone of this callsite's Call 296506c3fb27SDimitry Andric // created for the new function clone created earlier. 296606c3fb27SDimitry Andric // Recall that we have already ensured when building the graph 296706c3fb27SDimitry Andric // that each caller can only call callsites within the same 296806c3fb27SDimitry Andric // function, so we are guaranteed that Callee Call is in the 296906c3fb27SDimitry Andric // current OrigFunc. 297006c3fb27SDimitry Andric // CallMap is set up as indexed by original Call at clone 0. 297106c3fb27SDimitry Andric CallInfo OrigCall(Callee->getOrigNode()->Call); 297206c3fb27SDimitry Andric OrigCall.setCloneNo(0); 297306c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap = 297406c3fb27SDimitry Andric FuncClonesToCallMap[NewFuncClone]; 297506c3fb27SDimitry Andric assert(CallMap.count(OrigCall)); 297606c3fb27SDimitry Andric CallInfo NewCall(CallMap[OrigCall]); 297706c3fb27SDimitry Andric assert(NewCall); 297806c3fb27SDimitry Andric NewClone->setCall(NewCall); 297906c3fb27SDimitry Andric } 298006c3fb27SDimitry Andric } 298106c3fb27SDimitry Andric // Fall through to handling below to perform the recording of the 298206c3fb27SDimitry Andric // function for this callsite clone. This enables handling of cases 298306c3fb27SDimitry Andric // where the callers were assigned to different clones of a function. 298406c3fb27SDimitry Andric } 298506c3fb27SDimitry Andric 298606c3fb27SDimitry Andric // See if we can use existing function clone. Walk through 298706c3fb27SDimitry Andric // all caller edges to see if any have already been assigned to 298806c3fb27SDimitry Andric // a clone of this callsite's function. If we can use it, do so. If not, 298906c3fb27SDimitry Andric // because that function clone is already assigned to a different clone 299006c3fb27SDimitry Andric // of this callsite, then we need to clone again. 299106c3fb27SDimitry Andric // Basically, this checking is needed to handle the case where different 299206c3fb27SDimitry Andric // caller functions/callsites may need versions of this function 299306c3fb27SDimitry Andric // containing different mixes of callsite clones across the different 299406c3fb27SDimitry Andric // callsites within the function. If that happens, we need to create 299506c3fb27SDimitry Andric // additional function clones to handle the various combinations. 299606c3fb27SDimitry Andric // 299706c3fb27SDimitry Andric // Keep track of any new clones of this callsite created by the 299806c3fb27SDimitry Andric // following loop, as well as any existing clone that we decided to 299906c3fb27SDimitry Andric // assign this clone to. 300006c3fb27SDimitry Andric std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap; 300106c3fb27SDimitry Andric FuncInfo FuncCloneAssignedToCurCallsiteClone; 300206c3fb27SDimitry Andric // We need to be able to remove Edge from CallerEdges, so need to adjust 300306c3fb27SDimitry Andric // iterator in the loop. 300406c3fb27SDimitry Andric for (auto EI = Clone->CallerEdges.begin(); 300506c3fb27SDimitry Andric EI != Clone->CallerEdges.end();) { 300606c3fb27SDimitry Andric auto Edge = *EI; 300706c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 300806c3fb27SDimitry Andric if (!Edge->Caller->hasCall()) { 300906c3fb27SDimitry Andric EI++; 301006c3fb27SDimitry Andric continue; 301106c3fb27SDimitry Andric } 301206c3fb27SDimitry Andric // If this caller already assigned to call a version of OrigFunc, need 301306c3fb27SDimitry Andric // to ensure we can assign this callsite clone to that function clone. 301406c3fb27SDimitry Andric if (CallsiteToCalleeFuncCloneMap.count(Edge->Caller)) { 301506c3fb27SDimitry Andric FuncInfo FuncCloneCalledByCaller = 301606c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[Edge->Caller]; 301706c3fb27SDimitry Andric // First we need to confirm that this function clone is available 301806c3fb27SDimitry Andric // for use by this callsite node clone. 301906c3fb27SDimitry Andric // 302006c3fb27SDimitry Andric // While FuncCloneToCurNodeCloneMap is built only for this Node and 302106c3fb27SDimitry Andric // its callsite clones, one of those callsite clones X could have 302206c3fb27SDimitry Andric // been assigned to the same function clone called by Edge's caller 302306c3fb27SDimitry Andric // - if Edge's caller calls another callsite within Node's original 302406c3fb27SDimitry Andric // function, and that callsite has another caller reaching clone X. 302506c3fb27SDimitry Andric // We need to clone Node again in this case. 302606c3fb27SDimitry Andric if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) && 302706c3fb27SDimitry Andric FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] != 302806c3fb27SDimitry Andric Clone) || 302906c3fb27SDimitry Andric // Detect when we have multiple callers of this callsite that 303006c3fb27SDimitry Andric // have already been assigned to specific, and different, clones 303106c3fb27SDimitry Andric // of OrigFunc (due to other unrelated callsites in Func they 303206c3fb27SDimitry Andric // reach via call contexts). Is this Clone of callsite Node 303306c3fb27SDimitry Andric // assigned to a different clone of OrigFunc? If so, clone Node 303406c3fb27SDimitry Andric // again. 303506c3fb27SDimitry Andric (FuncCloneAssignedToCurCallsiteClone && 303606c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone != 303706c3fb27SDimitry Andric FuncCloneCalledByCaller)) { 303806c3fb27SDimitry Andric // We need to use a different newly created callsite clone, in 303906c3fb27SDimitry Andric // order to assign it to another new function clone on a 304006c3fb27SDimitry Andric // subsequent iteration over the Clones array (adjusted below). 304106c3fb27SDimitry Andric // Note we specifically do not reset the 304206c3fb27SDimitry Andric // CallsiteToCalleeFuncCloneMap entry for this caller, so that 304306c3fb27SDimitry Andric // when this new clone is processed later we know which version of 304406c3fb27SDimitry Andric // the function to copy (so that other callsite clones we have 304506c3fb27SDimitry Andric // assigned to that function clone are properly cloned over). See 304606c3fb27SDimitry Andric // comments in the function cloning handling earlier. 304706c3fb27SDimitry Andric 304806c3fb27SDimitry Andric // Check if we already have cloned this callsite again while 304906c3fb27SDimitry Andric // walking through caller edges, for a caller calling the same 305006c3fb27SDimitry Andric // function clone. If so, we can move this edge to that new clone 305106c3fb27SDimitry Andric // rather than creating yet another new clone. 305206c3fb27SDimitry Andric if (FuncCloneToNewCallsiteCloneMap.count( 305306c3fb27SDimitry Andric FuncCloneCalledByCaller)) { 305406c3fb27SDimitry Andric ContextNode *NewClone = 305506c3fb27SDimitry Andric FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller]; 305606c3fb27SDimitry Andric moveEdgeToExistingCalleeClone(Edge, NewClone, &EI); 305706c3fb27SDimitry Andric // Cleanup any none type edges cloned over. 305806c3fb27SDimitry Andric removeNoneTypeCalleeEdges(NewClone); 305906c3fb27SDimitry Andric } else { 306006c3fb27SDimitry Andric // Create a new callsite clone. 306106c3fb27SDimitry Andric ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge, &EI); 306206c3fb27SDimitry Andric removeNoneTypeCalleeEdges(NewClone); 306306c3fb27SDimitry Andric FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] = 306406c3fb27SDimitry Andric NewClone; 306506c3fb27SDimitry Andric // Add to list of clones and process later. 306606c3fb27SDimitry Andric ClonesWorklist.push_back(NewClone); 306706c3fb27SDimitry Andric assert(EI == Clone->CallerEdges.end() || 306806c3fb27SDimitry Andric Clone->AllocTypes != (uint8_t)AllocationType::None); 306906c3fb27SDimitry Andric assert(NewClone->AllocTypes != (uint8_t)AllocationType::None); 307006c3fb27SDimitry Andric } 307106c3fb27SDimitry Andric // Moving the caller edge may have resulted in some none type 307206c3fb27SDimitry Andric // callee edges. 307306c3fb27SDimitry Andric removeNoneTypeCalleeEdges(Clone); 307406c3fb27SDimitry Andric // We will handle the newly created callsite clone in a subsequent 307506c3fb27SDimitry Andric // iteration over this Node's Clones. Continue here since we 307606c3fb27SDimitry Andric // already adjusted iterator EI while moving the edge. 307706c3fb27SDimitry Andric continue; 307806c3fb27SDimitry Andric } 307906c3fb27SDimitry Andric 308006c3fb27SDimitry Andric // Otherwise, we can use the function clone already assigned to this 308106c3fb27SDimitry Andric // caller. 308206c3fb27SDimitry Andric if (!FuncCloneAssignedToCurCallsiteClone) { 308306c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller; 308406c3fb27SDimitry Andric // Assign Clone to FuncCloneCalledByCaller 308506c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 308606c3fb27SDimitry Andric FuncCloneCalledByCaller, Call, Clone, 308706c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 308806c3fb27SDimitry Andric } else 308906c3fb27SDimitry Andric // Don't need to do anything - callsite is already calling this 309006c3fb27SDimitry Andric // function clone. 309106c3fb27SDimitry Andric assert(FuncCloneAssignedToCurCallsiteClone == 309206c3fb27SDimitry Andric FuncCloneCalledByCaller); 309306c3fb27SDimitry Andric 309406c3fb27SDimitry Andric } else { 309506c3fb27SDimitry Andric // We have not already assigned this caller to a version of 309606c3fb27SDimitry Andric // OrigFunc. Do the assignment now. 309706c3fb27SDimitry Andric 309806c3fb27SDimitry Andric // First check if we have already assigned this callsite clone to a 309906c3fb27SDimitry Andric // clone of OrigFunc for another caller during this iteration over 310006c3fb27SDimitry Andric // its caller edges. 310106c3fb27SDimitry Andric if (!FuncCloneAssignedToCurCallsiteClone) { 310206c3fb27SDimitry Andric // Find first function in FuncClonesToCallMap without an assigned 310306c3fb27SDimitry Andric // clone of this callsite Node. We should always have one 310406c3fb27SDimitry Andric // available at this point due to the earlier cloning when the 310506c3fb27SDimitry Andric // FuncClonesToCallMap size was smaller than the clone number. 310606c3fb27SDimitry Andric for (auto &CF : FuncClonesToCallMap) { 310706c3fb27SDimitry Andric if (!FuncCloneToCurNodeCloneMap.count(CF.first)) { 310806c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone = CF.first; 310906c3fb27SDimitry Andric break; 311006c3fb27SDimitry Andric } 311106c3fb27SDimitry Andric } 311206c3fb27SDimitry Andric assert(FuncCloneAssignedToCurCallsiteClone); 311306c3fb27SDimitry Andric // Assign Clone to FuncCloneAssignedToCurCallsiteClone 311406c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 311506c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone, Call, Clone, 311606c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 311706c3fb27SDimitry Andric } else 311806c3fb27SDimitry Andric assert(FuncCloneToCurNodeCloneMap 311906c3fb27SDimitry Andric [FuncCloneAssignedToCurCallsiteClone] == Clone); 312006c3fb27SDimitry Andric // Update callers to record function version called. 312106c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(Edge->Caller, 312206c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone); 312306c3fb27SDimitry Andric } 312406c3fb27SDimitry Andric 312506c3fb27SDimitry Andric EI++; 312606c3fb27SDimitry Andric } 312706c3fb27SDimitry Andric } 312806c3fb27SDimitry Andric if (VerifyCCG) { 312906c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node); 313006c3fb27SDimitry Andric for (const auto &PE : Node->CalleeEdges) 313106c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee); 313206c3fb27SDimitry Andric for (const auto &CE : Node->CallerEdges) 313306c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller); 313406c3fb27SDimitry Andric for (auto *Clone : Node->Clones) { 313506c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Clone); 313606c3fb27SDimitry Andric for (const auto &PE : Clone->CalleeEdges) 313706c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee); 313806c3fb27SDimitry Andric for (const auto &CE : Clone->CallerEdges) 313906c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller); 314006c3fb27SDimitry Andric } 314106c3fb27SDimitry Andric } 314206c3fb27SDimitry Andric } 314306c3fb27SDimitry Andric } 314406c3fb27SDimitry Andric 314506c3fb27SDimitry Andric auto UpdateCalls = [&](ContextNode *Node, 314606c3fb27SDimitry Andric DenseSet<const ContextNode *> &Visited, 314706c3fb27SDimitry Andric auto &&UpdateCalls) { 314806c3fb27SDimitry Andric auto Inserted = Visited.insert(Node); 314906c3fb27SDimitry Andric if (!Inserted.second) 315006c3fb27SDimitry Andric return; 315106c3fb27SDimitry Andric 315206c3fb27SDimitry Andric for (auto *Clone : Node->Clones) 315306c3fb27SDimitry Andric UpdateCalls(Clone, Visited, UpdateCalls); 315406c3fb27SDimitry Andric 315506c3fb27SDimitry Andric for (auto &Edge : Node->CallerEdges) 315606c3fb27SDimitry Andric UpdateCalls(Edge->Caller, Visited, UpdateCalls); 315706c3fb27SDimitry Andric 315806c3fb27SDimitry Andric // Skip if either no call to update, or if we ended up with no context ids 315906c3fb27SDimitry Andric // (we moved all edges onto other clones). 316006c3fb27SDimitry Andric if (!Node->hasCall() || Node->ContextIds.empty()) 316106c3fb27SDimitry Andric return; 316206c3fb27SDimitry Andric 316306c3fb27SDimitry Andric if (Node->IsAllocation) { 316406c3fb27SDimitry Andric updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes)); 316506c3fb27SDimitry Andric return; 316606c3fb27SDimitry Andric } 316706c3fb27SDimitry Andric 316806c3fb27SDimitry Andric if (!CallsiteToCalleeFuncCloneMap.count(Node)) 316906c3fb27SDimitry Andric return; 317006c3fb27SDimitry Andric 317106c3fb27SDimitry Andric auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node]; 317206c3fb27SDimitry Andric updateCall(Node->Call, CalleeFunc); 317306c3fb27SDimitry Andric }; 317406c3fb27SDimitry Andric 317506c3fb27SDimitry Andric // Performs DFS traversal starting from allocation nodes to update calls to 317606c3fb27SDimitry Andric // reflect cloning decisions recorded earlier. For regular LTO this will 317706c3fb27SDimitry Andric // update the actual calls in the IR to call the appropriate function clone 317806c3fb27SDimitry Andric // (and add attributes to allocation calls), whereas for ThinLTO the decisions 317906c3fb27SDimitry Andric // are recorded in the summary entries. 318006c3fb27SDimitry Andric DenseSet<const ContextNode *> Visited; 318106c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) 318206c3fb27SDimitry Andric UpdateCalls(Entry.second, Visited, UpdateCalls); 318306c3fb27SDimitry Andric 318406c3fb27SDimitry Andric return Changed; 318506c3fb27SDimitry Andric } 318606c3fb27SDimitry Andric 318706c3fb27SDimitry Andric static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones( 318806c3fb27SDimitry Andric Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, 318906c3fb27SDimitry Andric std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>> 319006c3fb27SDimitry Andric &FuncToAliasMap) { 319106c3fb27SDimitry Andric // The first "clone" is the original copy, we should only call this if we 319206c3fb27SDimitry Andric // needed to create new clones. 319306c3fb27SDimitry Andric assert(NumClones > 1); 319406c3fb27SDimitry Andric SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; 319506c3fb27SDimitry Andric VMaps.reserve(NumClones - 1); 319606c3fb27SDimitry Andric FunctionsClonedThinBackend++; 319706c3fb27SDimitry Andric for (unsigned I = 1; I < NumClones; I++) { 319806c3fb27SDimitry Andric VMaps.emplace_back(std::make_unique<ValueToValueMapTy>()); 319906c3fb27SDimitry Andric auto *NewF = CloneFunction(&F, *VMaps.back()); 320006c3fb27SDimitry Andric FunctionClonesThinBackend++; 320106c3fb27SDimitry Andric // Strip memprof and callsite metadata from clone as they are no longer 320206c3fb27SDimitry Andric // needed. 320306c3fb27SDimitry Andric for (auto &BB : *NewF) { 320406c3fb27SDimitry Andric for (auto &Inst : BB) { 320506c3fb27SDimitry Andric Inst.setMetadata(LLVMContext::MD_memprof, nullptr); 320606c3fb27SDimitry Andric Inst.setMetadata(LLVMContext::MD_callsite, nullptr); 320706c3fb27SDimitry Andric } 320806c3fb27SDimitry Andric } 320906c3fb27SDimitry Andric std::string Name = getMemProfFuncName(F.getName(), I); 321006c3fb27SDimitry Andric auto *PrevF = M.getFunction(Name); 321106c3fb27SDimitry Andric if (PrevF) { 321206c3fb27SDimitry Andric // We might have created this when adjusting callsite in another 321306c3fb27SDimitry Andric // function. It should be a declaration. 321406c3fb27SDimitry Andric assert(PrevF->isDeclaration()); 321506c3fb27SDimitry Andric NewF->takeName(PrevF); 321606c3fb27SDimitry Andric PrevF->replaceAllUsesWith(NewF); 321706c3fb27SDimitry Andric PrevF->eraseFromParent(); 321806c3fb27SDimitry Andric } else 321906c3fb27SDimitry Andric NewF->setName(Name); 322006c3fb27SDimitry Andric ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) 322106c3fb27SDimitry Andric << "created clone " << ore::NV("NewFunction", NewF)); 322206c3fb27SDimitry Andric 322306c3fb27SDimitry Andric // Now handle aliases to this function, and clone those as well. 322406c3fb27SDimitry Andric if (!FuncToAliasMap.count(&F)) 322506c3fb27SDimitry Andric continue; 322606c3fb27SDimitry Andric for (auto *A : FuncToAliasMap[&F]) { 322706c3fb27SDimitry Andric std::string Name = getMemProfFuncName(A->getName(), I); 322806c3fb27SDimitry Andric auto *PrevA = M.getNamedAlias(Name); 322906c3fb27SDimitry Andric auto *NewA = GlobalAlias::create(A->getValueType(), 323006c3fb27SDimitry Andric A->getType()->getPointerAddressSpace(), 323106c3fb27SDimitry Andric A->getLinkage(), Name, NewF); 323206c3fb27SDimitry Andric NewA->copyAttributesFrom(A); 323306c3fb27SDimitry Andric if (PrevA) { 323406c3fb27SDimitry Andric // We might have created this when adjusting callsite in another 323506c3fb27SDimitry Andric // function. It should be a declaration. 323606c3fb27SDimitry Andric assert(PrevA->isDeclaration()); 323706c3fb27SDimitry Andric NewA->takeName(PrevA); 323806c3fb27SDimitry Andric PrevA->replaceAllUsesWith(NewA); 323906c3fb27SDimitry Andric PrevA->eraseFromParent(); 324006c3fb27SDimitry Andric } 324106c3fb27SDimitry Andric } 324206c3fb27SDimitry Andric } 324306c3fb27SDimitry Andric return VMaps; 324406c3fb27SDimitry Andric } 324506c3fb27SDimitry Andric 324606c3fb27SDimitry Andric // Locate the summary for F. This is complicated by the fact that it might 324706c3fb27SDimitry Andric // have been internalized or promoted. 324806c3fb27SDimitry Andric static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, 324906c3fb27SDimitry Andric const ModuleSummaryIndex *ImportSummary) { 325006c3fb27SDimitry Andric // FIXME: Ideally we would retain the original GUID in some fashion on the 325106c3fb27SDimitry Andric // function (e.g. as metadata), but for now do our best to locate the 325206c3fb27SDimitry Andric // summary without that information. 325306c3fb27SDimitry Andric ValueInfo TheFnVI = ImportSummary->getValueInfo(F.getGUID()); 325406c3fb27SDimitry Andric if (!TheFnVI) 325506c3fb27SDimitry Andric // See if theFn was internalized, by checking index directly with 325606c3fb27SDimitry Andric // original name (this avoids the name adjustment done by getGUID() for 325706c3fb27SDimitry Andric // internal symbols). 325806c3fb27SDimitry Andric TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(F.getName())); 325906c3fb27SDimitry Andric if (TheFnVI) 326006c3fb27SDimitry Andric return TheFnVI; 326106c3fb27SDimitry Andric // Now query with the original name before any promotion was performed. 326206c3fb27SDimitry Andric StringRef OrigName = 326306c3fb27SDimitry Andric ModuleSummaryIndex::getOriginalNameBeforePromote(F.getName()); 326406c3fb27SDimitry Andric std::string OrigId = GlobalValue::getGlobalIdentifier( 326506c3fb27SDimitry Andric OrigName, GlobalValue::InternalLinkage, M.getSourceFileName()); 326606c3fb27SDimitry Andric TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(OrigId)); 326706c3fb27SDimitry Andric if (TheFnVI) 326806c3fb27SDimitry Andric return TheFnVI; 326906c3fb27SDimitry Andric // Could be a promoted local imported from another module. We need to pass 327006c3fb27SDimitry Andric // down more info here to find the original module id. For now, try with 327106c3fb27SDimitry Andric // the OrigName which might have been stored in the OidGuidMap in the 327206c3fb27SDimitry Andric // index. This would not work if there were same-named locals in multiple 327306c3fb27SDimitry Andric // modules, however. 327406c3fb27SDimitry Andric auto OrigGUID = 327506c3fb27SDimitry Andric ImportSummary->getGUIDFromOriginalID(GlobalValue::getGUID(OrigName)); 327606c3fb27SDimitry Andric if (OrigGUID) 327706c3fb27SDimitry Andric TheFnVI = ImportSummary->getValueInfo(OrigGUID); 327806c3fb27SDimitry Andric return TheFnVI; 327906c3fb27SDimitry Andric } 328006c3fb27SDimitry Andric 328106c3fb27SDimitry Andric bool MemProfContextDisambiguation::applyImport(Module &M) { 328206c3fb27SDimitry Andric assert(ImportSummary); 328306c3fb27SDimitry Andric bool Changed = false; 328406c3fb27SDimitry Andric 328506c3fb27SDimitry Andric auto IsMemProfClone = [](const Function &F) { 328606c3fb27SDimitry Andric return F.getName().contains(MemProfCloneSuffix); 328706c3fb27SDimitry Andric }; 328806c3fb27SDimitry Andric 328906c3fb27SDimitry Andric // We also need to clone any aliases that reference cloned functions, because 329006c3fb27SDimitry Andric // the modified callsites may invoke via the alias. Keep track of the aliases 329106c3fb27SDimitry Andric // for each function. 329206c3fb27SDimitry Andric std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>> 329306c3fb27SDimitry Andric FuncToAliasMap; 329406c3fb27SDimitry Andric for (auto &A : M.aliases()) { 329506c3fb27SDimitry Andric auto *Aliasee = A.getAliaseeObject(); 329606c3fb27SDimitry Andric if (auto *F = dyn_cast<Function>(Aliasee)) 329706c3fb27SDimitry Andric FuncToAliasMap[F].insert(&A); 329806c3fb27SDimitry Andric } 329906c3fb27SDimitry Andric 330006c3fb27SDimitry Andric for (auto &F : M) { 330106c3fb27SDimitry Andric if (F.isDeclaration() || IsMemProfClone(F)) 330206c3fb27SDimitry Andric continue; 330306c3fb27SDimitry Andric 330406c3fb27SDimitry Andric OptimizationRemarkEmitter ORE(&F); 330506c3fb27SDimitry Andric 330606c3fb27SDimitry Andric SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; 330706c3fb27SDimitry Andric bool ClonesCreated = false; 330806c3fb27SDimitry Andric unsigned NumClonesCreated = 0; 330906c3fb27SDimitry Andric auto CloneFuncIfNeeded = [&](unsigned NumClones) { 331006c3fb27SDimitry Andric // We should at least have version 0 which is the original copy. 331106c3fb27SDimitry Andric assert(NumClones > 0); 331206c3fb27SDimitry Andric // If only one copy needed use original. 331306c3fb27SDimitry Andric if (NumClones == 1) 331406c3fb27SDimitry Andric return; 331506c3fb27SDimitry Andric // If we already performed cloning of this function, confirm that the 331606c3fb27SDimitry Andric // requested number of clones matches (the thin link should ensure the 331706c3fb27SDimitry Andric // number of clones for each constituent callsite is consistent within 331806c3fb27SDimitry Andric // each function), before returning. 331906c3fb27SDimitry Andric if (ClonesCreated) { 332006c3fb27SDimitry Andric assert(NumClonesCreated == NumClones); 332106c3fb27SDimitry Andric return; 332206c3fb27SDimitry Andric } 332306c3fb27SDimitry Andric VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap); 332406c3fb27SDimitry Andric // The first "clone" is the original copy, which doesn't have a VMap. 332506c3fb27SDimitry Andric assert(VMaps.size() == NumClones - 1); 332606c3fb27SDimitry Andric Changed = true; 332706c3fb27SDimitry Andric ClonesCreated = true; 332806c3fb27SDimitry Andric NumClonesCreated = NumClones; 332906c3fb27SDimitry Andric }; 333006c3fb27SDimitry Andric 3331297eecfbSDimitry Andric auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB, 3332297eecfbSDimitry Andric Function *CalledFunction) { 3333297eecfbSDimitry Andric // Perform cloning if not yet done. 3334297eecfbSDimitry Andric CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size()); 3335297eecfbSDimitry Andric 3336297eecfbSDimitry Andric // Should have skipped indirect calls via mayHaveMemprofSummary. 3337297eecfbSDimitry Andric assert(CalledFunction); 3338297eecfbSDimitry Andric assert(!IsMemProfClone(*CalledFunction)); 3339297eecfbSDimitry Andric 3340297eecfbSDimitry Andric // Update the calls per the summary info. 3341297eecfbSDimitry Andric // Save orig name since it gets updated in the first iteration 3342297eecfbSDimitry Andric // below. 3343297eecfbSDimitry Andric auto CalleeOrigName = CalledFunction->getName(); 3344297eecfbSDimitry Andric for (unsigned J = 0; J < StackNode.Clones.size(); J++) { 3345297eecfbSDimitry Andric // Do nothing if this version calls the original version of its 3346297eecfbSDimitry Andric // callee. 3347297eecfbSDimitry Andric if (!StackNode.Clones[J]) 3348297eecfbSDimitry Andric continue; 3349297eecfbSDimitry Andric auto NewF = M.getOrInsertFunction( 3350297eecfbSDimitry Andric getMemProfFuncName(CalleeOrigName, StackNode.Clones[J]), 3351297eecfbSDimitry Andric CalledFunction->getFunctionType()); 3352297eecfbSDimitry Andric CallBase *CBClone; 3353297eecfbSDimitry Andric // Copy 0 is the original function. 3354297eecfbSDimitry Andric if (!J) 3355297eecfbSDimitry Andric CBClone = CB; 3356297eecfbSDimitry Andric else 3357297eecfbSDimitry Andric CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); 3358297eecfbSDimitry Andric CBClone->setCalledFunction(NewF); 3359297eecfbSDimitry Andric ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone) 3360297eecfbSDimitry Andric << ore::NV("Call", CBClone) << " in clone " 3361297eecfbSDimitry Andric << ore::NV("Caller", CBClone->getFunction()) 3362297eecfbSDimitry Andric << " assigned to call function clone " 3363297eecfbSDimitry Andric << ore::NV("Callee", NewF.getCallee())); 3364297eecfbSDimitry Andric } 3365297eecfbSDimitry Andric }; 3366297eecfbSDimitry Andric 336706c3fb27SDimitry Andric // Locate the summary for F. 336806c3fb27SDimitry Andric ValueInfo TheFnVI = findValueInfoForFunc(F, M, ImportSummary); 336906c3fb27SDimitry Andric // If not found, this could be an imported local (see comment in 337006c3fb27SDimitry Andric // findValueInfoForFunc). Skip for now as it will be cloned in its original 337106c3fb27SDimitry Andric // module (where it would have been promoted to global scope so should 337206c3fb27SDimitry Andric // satisfy any reference in this module). 337306c3fb27SDimitry Andric if (!TheFnVI) 337406c3fb27SDimitry Andric continue; 337506c3fb27SDimitry Andric 337606c3fb27SDimitry Andric auto *GVSummary = 337706c3fb27SDimitry Andric ImportSummary->findSummaryInModule(TheFnVI, M.getModuleIdentifier()); 337806c3fb27SDimitry Andric if (!GVSummary) 337906c3fb27SDimitry Andric // Must have been imported, use the first summary (might be multiple if 338006c3fb27SDimitry Andric // this was a linkonce_odr). 338106c3fb27SDimitry Andric GVSummary = TheFnVI.getSummaryList().front().get(); 338206c3fb27SDimitry Andric 338306c3fb27SDimitry Andric // If this was an imported alias skip it as we won't have the function 338406c3fb27SDimitry Andric // summary, and it should be cloned in the original module. 338506c3fb27SDimitry Andric if (isa<AliasSummary>(GVSummary)) 338606c3fb27SDimitry Andric continue; 338706c3fb27SDimitry Andric 338806c3fb27SDimitry Andric auto *FS = cast<FunctionSummary>(GVSummary->getBaseObject()); 338906c3fb27SDimitry Andric 339006c3fb27SDimitry Andric if (FS->allocs().empty() && FS->callsites().empty()) 339106c3fb27SDimitry Andric continue; 339206c3fb27SDimitry Andric 339306c3fb27SDimitry Andric auto SI = FS->callsites().begin(); 339406c3fb27SDimitry Andric auto AI = FS->allocs().begin(); 339506c3fb27SDimitry Andric 3396297eecfbSDimitry Andric // To handle callsite infos synthesized for tail calls which have missing 3397297eecfbSDimitry Andric // frames in the profiled context, map callee VI to the synthesized callsite 3398297eecfbSDimitry Andric // info. 3399297eecfbSDimitry Andric DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite; 3400297eecfbSDimitry Andric // Iterate the callsites for this function in reverse, since we place all 3401297eecfbSDimitry Andric // those synthesized for tail calls at the end. 3402297eecfbSDimitry Andric for (auto CallsiteIt = FS->callsites().rbegin(); 3403297eecfbSDimitry Andric CallsiteIt != FS->callsites().rend(); CallsiteIt++) { 3404297eecfbSDimitry Andric auto &Callsite = *CallsiteIt; 3405297eecfbSDimitry Andric // Stop as soon as we see a non-synthesized callsite info (see comment 3406297eecfbSDimitry Andric // above loop). All the entries added for discovered tail calls have empty 3407297eecfbSDimitry Andric // stack ids. 3408297eecfbSDimitry Andric if (!Callsite.StackIdIndices.empty()) 3409297eecfbSDimitry Andric break; 3410297eecfbSDimitry Andric MapTailCallCalleeVIToCallsite.insert({Callsite.Callee, Callsite}); 3411297eecfbSDimitry Andric } 3412297eecfbSDimitry Andric 341306c3fb27SDimitry Andric // Assume for now that the instructions are in the exact same order 341406c3fb27SDimitry Andric // as when the summary was created, but confirm this is correct by 341506c3fb27SDimitry Andric // matching the stack ids. 341606c3fb27SDimitry Andric for (auto &BB : F) { 341706c3fb27SDimitry Andric for (auto &I : BB) { 341806c3fb27SDimitry Andric auto *CB = dyn_cast<CallBase>(&I); 341906c3fb27SDimitry Andric // Same handling as when creating module summary. 342006c3fb27SDimitry Andric if (!mayHaveMemprofSummary(CB)) 342106c3fb27SDimitry Andric continue; 342206c3fb27SDimitry Andric 34235f757f3fSDimitry Andric auto *CalledValue = CB->getCalledOperand(); 34245f757f3fSDimitry Andric auto *CalledFunction = CB->getCalledFunction(); 34255f757f3fSDimitry Andric if (CalledValue && !CalledFunction) { 34265f757f3fSDimitry Andric CalledValue = CalledValue->stripPointerCasts(); 34275f757f3fSDimitry Andric // Stripping pointer casts can reveal a called function. 34285f757f3fSDimitry Andric CalledFunction = dyn_cast<Function>(CalledValue); 34295f757f3fSDimitry Andric } 34305f757f3fSDimitry Andric // Check if this is an alias to a function. If so, get the 34315f757f3fSDimitry Andric // called aliasee for the checks below. 34325f757f3fSDimitry Andric if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) { 34335f757f3fSDimitry Andric assert(!CalledFunction && 34345f757f3fSDimitry Andric "Expected null called function in callsite for alias"); 34355f757f3fSDimitry Andric CalledFunction = dyn_cast<Function>(GA->getAliaseeObject()); 34365f757f3fSDimitry Andric } 34375f757f3fSDimitry Andric 343806c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext( 343906c3fb27SDimitry Andric I.getMetadata(LLVMContext::MD_callsite)); 344006c3fb27SDimitry Andric auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof); 344106c3fb27SDimitry Andric 344206c3fb27SDimitry Andric // Include allocs that were already assigned a memprof function 344306c3fb27SDimitry Andric // attribute in the statistics. 344406c3fb27SDimitry Andric if (CB->getAttributes().hasFnAttr("memprof")) { 344506c3fb27SDimitry Andric assert(!MemProfMD); 344606c3fb27SDimitry Andric CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold" 344706c3fb27SDimitry Andric ? AllocTypeColdThinBackend++ 344806c3fb27SDimitry Andric : AllocTypeNotColdThinBackend++; 344906c3fb27SDimitry Andric OrigAllocsThinBackend++; 345006c3fb27SDimitry Andric AllocVersionsThinBackend++; 345106c3fb27SDimitry Andric if (!MaxAllocVersionsThinBackend) 345206c3fb27SDimitry Andric MaxAllocVersionsThinBackend = 1; 345306c3fb27SDimitry Andric // Remove any remaining callsite metadata and we can skip the rest of 345406c3fb27SDimitry Andric // the handling for this instruction, since no cloning needed. 345506c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_callsite, nullptr); 345606c3fb27SDimitry Andric continue; 345706c3fb27SDimitry Andric } 345806c3fb27SDimitry Andric 345906c3fb27SDimitry Andric if (MemProfMD) { 346006c3fb27SDimitry Andric // Consult the next alloc node. 346106c3fb27SDimitry Andric assert(AI != FS->allocs().end()); 346206c3fb27SDimitry Andric auto &AllocNode = *(AI++); 346306c3fb27SDimitry Andric 346406c3fb27SDimitry Andric // Sanity check that the MIB stack ids match between the summary and 346506c3fb27SDimitry Andric // instruction metadata. 346606c3fb27SDimitry Andric auto MIBIter = AllocNode.MIBs.begin(); 346706c3fb27SDimitry Andric for (auto &MDOp : MemProfMD->operands()) { 346806c3fb27SDimitry Andric assert(MIBIter != AllocNode.MIBs.end()); 346906c3fb27SDimitry Andric LLVM_ATTRIBUTE_UNUSED auto StackIdIndexIter = 347006c3fb27SDimitry Andric MIBIter->StackIdIndices.begin(); 347106c3fb27SDimitry Andric auto *MIBMD = cast<const MDNode>(MDOp); 347206c3fb27SDimitry Andric MDNode *StackMDNode = getMIBStackNode(MIBMD); 347306c3fb27SDimitry Andric assert(StackMDNode); 347406c3fb27SDimitry Andric SmallVector<unsigned> StackIdsFromMetadata; 347506c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> StackContext(StackMDNode); 347606c3fb27SDimitry Andric for (auto ContextIter = 347706c3fb27SDimitry Andric StackContext.beginAfterSharedPrefix(CallsiteContext); 347806c3fb27SDimitry Andric ContextIter != StackContext.end(); ++ContextIter) { 347906c3fb27SDimitry Andric // If this is a direct recursion, simply skip the duplicate 348006c3fb27SDimitry Andric // entries, to be consistent with how the summary ids were 348106c3fb27SDimitry Andric // generated during ModuleSummaryAnalysis. 348206c3fb27SDimitry Andric if (!StackIdsFromMetadata.empty() && 348306c3fb27SDimitry Andric StackIdsFromMetadata.back() == *ContextIter) 348406c3fb27SDimitry Andric continue; 348506c3fb27SDimitry Andric assert(StackIdIndexIter != MIBIter->StackIdIndices.end()); 348606c3fb27SDimitry Andric assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) == 348706c3fb27SDimitry Andric *ContextIter); 348806c3fb27SDimitry Andric StackIdIndexIter++; 348906c3fb27SDimitry Andric } 349006c3fb27SDimitry Andric MIBIter++; 349106c3fb27SDimitry Andric } 349206c3fb27SDimitry Andric 349306c3fb27SDimitry Andric // Perform cloning if not yet done. 349406c3fb27SDimitry Andric CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size()); 349506c3fb27SDimitry Andric 349606c3fb27SDimitry Andric OrigAllocsThinBackend++; 349706c3fb27SDimitry Andric AllocVersionsThinBackend += AllocNode.Versions.size(); 349806c3fb27SDimitry Andric if (MaxAllocVersionsThinBackend < AllocNode.Versions.size()) 349906c3fb27SDimitry Andric MaxAllocVersionsThinBackend = AllocNode.Versions.size(); 350006c3fb27SDimitry Andric 350106c3fb27SDimitry Andric // If there is only one version that means we didn't end up 350206c3fb27SDimitry Andric // considering this function for cloning, and in that case the alloc 350306c3fb27SDimitry Andric // will still be none type or should have gotten the default NotCold. 350406c3fb27SDimitry Andric // Skip that after calling clone helper since that does some sanity 350506c3fb27SDimitry Andric // checks that confirm we haven't decided yet that we need cloning. 350606c3fb27SDimitry Andric if (AllocNode.Versions.size() == 1) { 350706c3fb27SDimitry Andric assert((AllocationType)AllocNode.Versions[0] == 350806c3fb27SDimitry Andric AllocationType::NotCold || 350906c3fb27SDimitry Andric (AllocationType)AllocNode.Versions[0] == 351006c3fb27SDimitry Andric AllocationType::None); 351106c3fb27SDimitry Andric UnclonableAllocsThinBackend++; 351206c3fb27SDimitry Andric continue; 351306c3fb27SDimitry Andric } 351406c3fb27SDimitry Andric 351506c3fb27SDimitry Andric // All versions should have a singular allocation type. 351606c3fb27SDimitry Andric assert(llvm::none_of(AllocNode.Versions, [](uint8_t Type) { 351706c3fb27SDimitry Andric return Type == ((uint8_t)AllocationType::NotCold | 351806c3fb27SDimitry Andric (uint8_t)AllocationType::Cold); 351906c3fb27SDimitry Andric })); 352006c3fb27SDimitry Andric 352106c3fb27SDimitry Andric // Update the allocation types per the summary info. 352206c3fb27SDimitry Andric for (unsigned J = 0; J < AllocNode.Versions.size(); J++) { 352306c3fb27SDimitry Andric // Ignore any that didn't get an assigned allocation type. 352406c3fb27SDimitry Andric if (AllocNode.Versions[J] == (uint8_t)AllocationType::None) 352506c3fb27SDimitry Andric continue; 352606c3fb27SDimitry Andric AllocationType AllocTy = (AllocationType)AllocNode.Versions[J]; 352706c3fb27SDimitry Andric AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++ 352806c3fb27SDimitry Andric : AllocTypeNotColdThinBackend++; 352906c3fb27SDimitry Andric std::string AllocTypeString = getAllocTypeAttributeString(AllocTy); 353006c3fb27SDimitry Andric auto A = llvm::Attribute::get(F.getContext(), "memprof", 353106c3fb27SDimitry Andric AllocTypeString); 353206c3fb27SDimitry Andric CallBase *CBClone; 353306c3fb27SDimitry Andric // Copy 0 is the original function. 353406c3fb27SDimitry Andric if (!J) 353506c3fb27SDimitry Andric CBClone = CB; 353606c3fb27SDimitry Andric else 353706c3fb27SDimitry Andric // Since VMaps are only created for new clones, we index with 353806c3fb27SDimitry Andric // clone J-1 (J==0 is the original clone and does not have a VMaps 353906c3fb27SDimitry Andric // entry). 354006c3fb27SDimitry Andric CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); 354106c3fb27SDimitry Andric CBClone->addFnAttr(A); 354206c3fb27SDimitry Andric ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", CBClone) 354306c3fb27SDimitry Andric << ore::NV("AllocationCall", CBClone) << " in clone " 354406c3fb27SDimitry Andric << ore::NV("Caller", CBClone->getFunction()) 354506c3fb27SDimitry Andric << " marked with memprof allocation attribute " 354606c3fb27SDimitry Andric << ore::NV("Attribute", AllocTypeString)); 354706c3fb27SDimitry Andric } 354806c3fb27SDimitry Andric } else if (!CallsiteContext.empty()) { 354906c3fb27SDimitry Andric // Consult the next callsite node. 355006c3fb27SDimitry Andric assert(SI != FS->callsites().end()); 355106c3fb27SDimitry Andric auto &StackNode = *(SI++); 355206c3fb27SDimitry Andric 355306c3fb27SDimitry Andric #ifndef NDEBUG 355406c3fb27SDimitry Andric // Sanity check that the stack ids match between the summary and 355506c3fb27SDimitry Andric // instruction metadata. 355606c3fb27SDimitry Andric auto StackIdIndexIter = StackNode.StackIdIndices.begin(); 355706c3fb27SDimitry Andric for (auto StackId : CallsiteContext) { 355806c3fb27SDimitry Andric assert(StackIdIndexIter != StackNode.StackIdIndices.end()); 355906c3fb27SDimitry Andric assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) == 356006c3fb27SDimitry Andric StackId); 356106c3fb27SDimitry Andric StackIdIndexIter++; 356206c3fb27SDimitry Andric } 356306c3fb27SDimitry Andric #endif 356406c3fb27SDimitry Andric 3565297eecfbSDimitry Andric CloneCallsite(StackNode, CB, CalledFunction); 3566297eecfbSDimitry Andric } else if (CB->isTailCall()) { 3567297eecfbSDimitry Andric // Locate the synthesized callsite info for the callee VI, if any was 3568297eecfbSDimitry Andric // created, and use that for cloning. 3569297eecfbSDimitry Andric ValueInfo CalleeVI = 3570297eecfbSDimitry Andric findValueInfoForFunc(*CalledFunction, M, ImportSummary); 3571297eecfbSDimitry Andric if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) { 3572297eecfbSDimitry Andric auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI); 3573297eecfbSDimitry Andric assert(Callsite != MapTailCallCalleeVIToCallsite.end()); 3574297eecfbSDimitry Andric CloneCallsite(Callsite->second, CB, CalledFunction); 357506c3fb27SDimitry Andric } 357606c3fb27SDimitry Andric } 357706c3fb27SDimitry Andric // Memprof and callsite metadata on memory allocations no longer needed. 357806c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_memprof, nullptr); 357906c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_callsite, nullptr); 358006c3fb27SDimitry Andric } 358106c3fb27SDimitry Andric } 358206c3fb27SDimitry Andric } 358306c3fb27SDimitry Andric 358406c3fb27SDimitry Andric return Changed; 358506c3fb27SDimitry Andric } 358606c3fb27SDimitry Andric 358706c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 358806c3fb27SDimitry Andric bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() { 358906c3fb27SDimitry Andric if (DumpCCG) { 359006c3fb27SDimitry Andric dbgs() << "CCG before cloning:\n"; 359106c3fb27SDimitry Andric dbgs() << *this; 359206c3fb27SDimitry Andric } 359306c3fb27SDimitry Andric if (ExportToDot) 359406c3fb27SDimitry Andric exportToDot("postbuild"); 359506c3fb27SDimitry Andric 359606c3fb27SDimitry Andric if (VerifyCCG) { 359706c3fb27SDimitry Andric check(); 359806c3fb27SDimitry Andric } 359906c3fb27SDimitry Andric 360006c3fb27SDimitry Andric identifyClones(); 360106c3fb27SDimitry Andric 360206c3fb27SDimitry Andric if (VerifyCCG) { 360306c3fb27SDimitry Andric check(); 360406c3fb27SDimitry Andric } 360506c3fb27SDimitry Andric 360606c3fb27SDimitry Andric if (DumpCCG) { 360706c3fb27SDimitry Andric dbgs() << "CCG after cloning:\n"; 360806c3fb27SDimitry Andric dbgs() << *this; 360906c3fb27SDimitry Andric } 361006c3fb27SDimitry Andric if (ExportToDot) 361106c3fb27SDimitry Andric exportToDot("cloned"); 361206c3fb27SDimitry Andric 361306c3fb27SDimitry Andric bool Changed = assignFunctions(); 361406c3fb27SDimitry Andric 361506c3fb27SDimitry Andric if (DumpCCG) { 361606c3fb27SDimitry Andric dbgs() << "CCG after assigning function clones:\n"; 361706c3fb27SDimitry Andric dbgs() << *this; 361806c3fb27SDimitry Andric } 361906c3fb27SDimitry Andric if (ExportToDot) 362006c3fb27SDimitry Andric exportToDot("clonefuncassign"); 362106c3fb27SDimitry Andric 362206c3fb27SDimitry Andric return Changed; 362306c3fb27SDimitry Andric } 362406c3fb27SDimitry Andric 362506c3fb27SDimitry Andric bool MemProfContextDisambiguation::processModule( 362606c3fb27SDimitry Andric Module &M, 3627*7a6dacacSDimitry Andric llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) { 362806c3fb27SDimitry Andric 362906c3fb27SDimitry Andric // If we have an import summary, then the cloning decisions were made during 363006c3fb27SDimitry Andric // the thin link on the index. Apply them and return. 363106c3fb27SDimitry Andric if (ImportSummary) 363206c3fb27SDimitry Andric return applyImport(M); 363306c3fb27SDimitry Andric 363406c3fb27SDimitry Andric // TODO: If/when other types of memprof cloning are enabled beyond just for 363506c3fb27SDimitry Andric // hot and cold, we will need to change this to individually control the 363606c3fb27SDimitry Andric // AllocationType passed to addStackNodesForMIB during CCG construction. 363706c3fb27SDimitry Andric // Note that we specifically check this after applying imports above, so that 363806c3fb27SDimitry Andric // the option isn't needed to be passed to distributed ThinLTO backend 363906c3fb27SDimitry Andric // clang processes, which won't necessarily have visibility into the linker 364006c3fb27SDimitry Andric // dependences. Instead the information is communicated from the LTO link to 364106c3fb27SDimitry Andric // the backends via the combined summary index. 364206c3fb27SDimitry Andric if (!SupportsHotColdNew) 364306c3fb27SDimitry Andric return false; 364406c3fb27SDimitry Andric 364506c3fb27SDimitry Andric ModuleCallsiteContextGraph CCG(M, OREGetter); 364606c3fb27SDimitry Andric return CCG.process(); 364706c3fb27SDimitry Andric } 364806c3fb27SDimitry Andric 364906c3fb27SDimitry Andric MemProfContextDisambiguation::MemProfContextDisambiguation( 365006c3fb27SDimitry Andric const ModuleSummaryIndex *Summary) 365106c3fb27SDimitry Andric : ImportSummary(Summary) { 365206c3fb27SDimitry Andric if (ImportSummary) { 365306c3fb27SDimitry Andric // The MemProfImportSummary should only be used for testing ThinLTO 365406c3fb27SDimitry Andric // distributed backend handling via opt, in which case we don't have a 365506c3fb27SDimitry Andric // summary from the pass pipeline. 365606c3fb27SDimitry Andric assert(MemProfImportSummary.empty()); 365706c3fb27SDimitry Andric return; 365806c3fb27SDimitry Andric } 365906c3fb27SDimitry Andric if (MemProfImportSummary.empty()) 366006c3fb27SDimitry Andric return; 366106c3fb27SDimitry Andric 366206c3fb27SDimitry Andric auto ReadSummaryFile = 366306c3fb27SDimitry Andric errorOrToExpected(MemoryBuffer::getFile(MemProfImportSummary)); 366406c3fb27SDimitry Andric if (!ReadSummaryFile) { 366506c3fb27SDimitry Andric logAllUnhandledErrors(ReadSummaryFile.takeError(), errs(), 366606c3fb27SDimitry Andric "Error loading file '" + MemProfImportSummary + 366706c3fb27SDimitry Andric "': "); 366806c3fb27SDimitry Andric return; 366906c3fb27SDimitry Andric } 367006c3fb27SDimitry Andric auto ImportSummaryForTestingOrErr = getModuleSummaryIndex(**ReadSummaryFile); 367106c3fb27SDimitry Andric if (!ImportSummaryForTestingOrErr) { 367206c3fb27SDimitry Andric logAllUnhandledErrors(ImportSummaryForTestingOrErr.takeError(), errs(), 367306c3fb27SDimitry Andric "Error parsing file '" + MemProfImportSummary + 367406c3fb27SDimitry Andric "': "); 367506c3fb27SDimitry Andric return; 367606c3fb27SDimitry Andric } 367706c3fb27SDimitry Andric ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr); 367806c3fb27SDimitry Andric ImportSummary = ImportSummaryForTesting.get(); 367906c3fb27SDimitry Andric } 368006c3fb27SDimitry Andric 368106c3fb27SDimitry Andric PreservedAnalyses MemProfContextDisambiguation::run(Module &M, 368206c3fb27SDimitry Andric ModuleAnalysisManager &AM) { 368306c3fb27SDimitry Andric auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 368406c3fb27SDimitry Andric auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & { 368506c3fb27SDimitry Andric return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); 368606c3fb27SDimitry Andric }; 368706c3fb27SDimitry Andric if (!processModule(M, OREGetter)) 368806c3fb27SDimitry Andric return PreservedAnalyses::all(); 368906c3fb27SDimitry Andric return PreservedAnalyses::none(); 369006c3fb27SDimitry Andric } 369106c3fb27SDimitry Andric 369206c3fb27SDimitry Andric void MemProfContextDisambiguation::run( 369306c3fb27SDimitry Andric ModuleSummaryIndex &Index, 3694*7a6dacacSDimitry Andric llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 369506c3fb27SDimitry Andric isPrevailing) { 369606c3fb27SDimitry Andric // TODO: If/when other types of memprof cloning are enabled beyond just for 369706c3fb27SDimitry Andric // hot and cold, we will need to change this to individually control the 369806c3fb27SDimitry Andric // AllocationType passed to addStackNodesForMIB during CCG construction. 369906c3fb27SDimitry Andric // The index was set from the option, so these should be in sync. 370006c3fb27SDimitry Andric assert(Index.withSupportsHotColdNew() == SupportsHotColdNew); 370106c3fb27SDimitry Andric if (!SupportsHotColdNew) 370206c3fb27SDimitry Andric return; 370306c3fb27SDimitry Andric 370406c3fb27SDimitry Andric IndexCallsiteContextGraph CCG(Index, isPrevailing); 370506c3fb27SDimitry Andric CCG.process(); 370606c3fb27SDimitry Andric } 3707