106c3fb27SDimitry Andric //==-- MemProfContextDisambiguation.cpp - Disambiguate contexts -------------=// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric // 906c3fb27SDimitry Andric // This file implements support for context disambiguation of allocation 1006c3fb27SDimitry Andric // calls for profile guided heap optimization. Specifically, it uses Memprof 1106c3fb27SDimitry Andric // profiles which indicate context specific allocation behavior (currently 1206c3fb27SDimitry Andric // distinguishing cold vs hot memory allocations). Cloning is performed to 1306c3fb27SDimitry Andric // expose the cold allocation call contexts, and the allocation calls are 1406c3fb27SDimitry Andric // subsequently annotated with an attribute for later transformation. 1506c3fb27SDimitry Andric // 1606c3fb27SDimitry Andric // The transformations can be performed either directly on IR (regular LTO), or 1706c3fb27SDimitry Andric // on a ThinLTO index (and later applied to the IR during the ThinLTO backend). 1806c3fb27SDimitry Andric // Both types of LTO operate on a the same base graph representation, which 1906c3fb27SDimitry Andric // uses CRTP to support either IR or Index formats. 2006c3fb27SDimitry Andric // 2106c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 2206c3fb27SDimitry Andric 2306c3fb27SDimitry Andric #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" 2406c3fb27SDimitry Andric #include "llvm/ADT/DenseMap.h" 2506c3fb27SDimitry Andric #include "llvm/ADT/DenseSet.h" 2606c3fb27SDimitry Andric #include "llvm/ADT/MapVector.h" 2706c3fb27SDimitry Andric #include "llvm/ADT/SetOperations.h" 2806c3fb27SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 2906c3fb27SDimitry Andric #include "llvm/ADT/SmallSet.h" 3006c3fb27SDimitry Andric #include "llvm/ADT/SmallVector.h" 3106c3fb27SDimitry Andric #include "llvm/ADT/Statistic.h" 3206c3fb27SDimitry Andric #include "llvm/Analysis/MemoryProfileInfo.h" 3306c3fb27SDimitry Andric #include "llvm/Analysis/ModuleSummaryAnalysis.h" 3406c3fb27SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 3506c3fb27SDimitry Andric #include "llvm/Bitcode/BitcodeReader.h" 3606c3fb27SDimitry Andric #include "llvm/IR/Constants.h" 3706c3fb27SDimitry Andric #include "llvm/IR/Instructions.h" 3806c3fb27SDimitry Andric #include "llvm/IR/Module.h" 3906c3fb27SDimitry Andric #include "llvm/IR/ModuleSummaryIndex.h" 4006c3fb27SDimitry Andric #include "llvm/Pass.h" 4106c3fb27SDimitry Andric #include "llvm/Support/CommandLine.h" 4206c3fb27SDimitry Andric #include "llvm/Support/FileSystem.h" 4306c3fb27SDimitry Andric #include "llvm/Support/GraphWriter.h" 4406c3fb27SDimitry Andric #include "llvm/Support/raw_ostream.h" 4506c3fb27SDimitry Andric #include "llvm/Transforms/IPO.h" 4606c3fb27SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h" 4706c3fb27SDimitry Andric #include <sstream> 4806c3fb27SDimitry Andric #include <vector> 4906c3fb27SDimitry Andric using namespace llvm; 5006c3fb27SDimitry Andric using namespace llvm::memprof; 5106c3fb27SDimitry Andric 5206c3fb27SDimitry Andric #define DEBUG_TYPE "memprof-context-disambiguation" 5306c3fb27SDimitry Andric 5406c3fb27SDimitry Andric STATISTIC(FunctionClonesAnalysis, 5506c3fb27SDimitry Andric "Number of function clones created during whole program analysis"); 5606c3fb27SDimitry Andric STATISTIC(FunctionClonesThinBackend, 5706c3fb27SDimitry Andric "Number of function clones created during ThinLTO backend"); 5806c3fb27SDimitry Andric STATISTIC(FunctionsClonedThinBackend, 5906c3fb27SDimitry Andric "Number of functions that had clones created during ThinLTO backend"); 6006c3fb27SDimitry Andric STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly " 6106c3fb27SDimitry Andric "cloned) during whole program analysis"); 6206c3fb27SDimitry Andric STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) " 6306c3fb27SDimitry Andric "during whole program analysis"); 6406c3fb27SDimitry Andric STATISTIC(AllocTypeNotColdThinBackend, 6506c3fb27SDimitry Andric "Number of not cold static allocations (possibly cloned) during " 6606c3fb27SDimitry Andric "ThinLTO backend"); 6706c3fb27SDimitry Andric STATISTIC(AllocTypeColdThinBackend, "Number of cold static allocations " 6806c3fb27SDimitry Andric "(possibly cloned) during ThinLTO backend"); 6906c3fb27SDimitry Andric STATISTIC(OrigAllocsThinBackend, 7006c3fb27SDimitry Andric "Number of original (not cloned) allocations with memprof profiles " 7106c3fb27SDimitry Andric "during ThinLTO backend"); 7206c3fb27SDimitry Andric STATISTIC( 7306c3fb27SDimitry Andric AllocVersionsThinBackend, 7406c3fb27SDimitry Andric "Number of allocation versions (including clones) during ThinLTO backend"); 7506c3fb27SDimitry Andric STATISTIC(MaxAllocVersionsThinBackend, 7606c3fb27SDimitry Andric "Maximum number of allocation versions created for an original " 7706c3fb27SDimitry Andric "allocation during ThinLTO backend"); 7806c3fb27SDimitry Andric STATISTIC(UnclonableAllocsThinBackend, 7906c3fb27SDimitry Andric "Number of unclonable ambigous allocations during ThinLTO backend"); 8006c3fb27SDimitry Andric 8106c3fb27SDimitry Andric static cl::opt<std::string> DotFilePathPrefix( 8206c3fb27SDimitry Andric "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, 8306c3fb27SDimitry Andric cl::value_desc("filename"), 8406c3fb27SDimitry Andric cl::desc("Specify the path prefix of the MemProf dot files.")); 8506c3fb27SDimitry Andric 8606c3fb27SDimitry Andric static cl::opt<bool> ExportToDot("memprof-export-to-dot", cl::init(false), 8706c3fb27SDimitry Andric cl::Hidden, 8806c3fb27SDimitry Andric cl::desc("Export graph to dot files.")); 8906c3fb27SDimitry Andric 9006c3fb27SDimitry Andric static cl::opt<bool> 9106c3fb27SDimitry Andric DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, 9206c3fb27SDimitry Andric cl::desc("Dump CallingContextGraph to stdout after each stage.")); 9306c3fb27SDimitry Andric 9406c3fb27SDimitry Andric static cl::opt<bool> 9506c3fb27SDimitry Andric VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, 9606c3fb27SDimitry Andric cl::desc("Perform verification checks on CallingContextGraph.")); 9706c3fb27SDimitry Andric 9806c3fb27SDimitry Andric static cl::opt<bool> 9906c3fb27SDimitry Andric VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, 10006c3fb27SDimitry Andric cl::desc("Perform frequent verification checks on nodes.")); 10106c3fb27SDimitry Andric 10206c3fb27SDimitry Andric static cl::opt<std::string> MemProfImportSummary( 10306c3fb27SDimitry Andric "memprof-import-summary", 10406c3fb27SDimitry Andric cl::desc("Import summary to use for testing the ThinLTO backend via opt"), 10506c3fb27SDimitry Andric cl::Hidden); 10606c3fb27SDimitry Andric 107*5f757f3fSDimitry Andric namespace llvm { 10806c3fb27SDimitry Andric // Indicate we are linking with an allocator that supports hot/cold operator 10906c3fb27SDimitry Andric // new interfaces. 11006c3fb27SDimitry Andric cl::opt<bool> SupportsHotColdNew( 11106c3fb27SDimitry Andric "supports-hot-cold-new", cl::init(false), cl::Hidden, 11206c3fb27SDimitry Andric cl::desc("Linking with hot/cold operator new interfaces")); 113*5f757f3fSDimitry Andric } // namespace llvm 11406c3fb27SDimitry Andric 11506c3fb27SDimitry Andric namespace { 11606c3fb27SDimitry Andric /// CRTP base for graphs built from either IR or ThinLTO summary index. 11706c3fb27SDimitry Andric /// 11806c3fb27SDimitry Andric /// The graph represents the call contexts in all memprof metadata on allocation 11906c3fb27SDimitry Andric /// calls, with nodes for the allocations themselves, as well as for the calls 12006c3fb27SDimitry Andric /// in each context. The graph is initially built from the allocation memprof 12106c3fb27SDimitry Andric /// metadata (or summary) MIBs. It is then updated to match calls with callsite 12206c3fb27SDimitry Andric /// metadata onto the nodes, updating it to reflect any inlining performed on 12306c3fb27SDimitry Andric /// those calls. 12406c3fb27SDimitry Andric /// 12506c3fb27SDimitry Andric /// Each MIB (representing an allocation's call context with allocation 12606c3fb27SDimitry Andric /// behavior) is assigned a unique context id during the graph build. The edges 12706c3fb27SDimitry Andric /// and nodes in the graph are decorated with the context ids they carry. This 12806c3fb27SDimitry Andric /// is used to correctly update the graph when cloning is performed so that we 12906c3fb27SDimitry Andric /// can uniquify the context for a single (possibly cloned) allocation. 13006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 13106c3fb27SDimitry Andric class CallsiteContextGraph { 13206c3fb27SDimitry Andric public: 13306c3fb27SDimitry Andric CallsiteContextGraph() = default; 13406c3fb27SDimitry Andric CallsiteContextGraph(const CallsiteContextGraph &) = default; 13506c3fb27SDimitry Andric CallsiteContextGraph(CallsiteContextGraph &&) = default; 13606c3fb27SDimitry Andric 13706c3fb27SDimitry Andric /// Main entry point to perform analysis and transformations on graph. 13806c3fb27SDimitry Andric bool process(); 13906c3fb27SDimitry Andric 14006c3fb27SDimitry Andric /// Perform cloning on the graph necessary to uniquely identify the allocation 14106c3fb27SDimitry Andric /// behavior of an allocation based on its context. 14206c3fb27SDimitry Andric void identifyClones(); 14306c3fb27SDimitry Andric 14406c3fb27SDimitry Andric /// Assign callsite clones to functions, cloning functions as needed to 14506c3fb27SDimitry Andric /// accommodate the combinations of their callsite clones reached by callers. 14606c3fb27SDimitry Andric /// For regular LTO this clones functions and callsites in the IR, but for 14706c3fb27SDimitry Andric /// ThinLTO the cloning decisions are noted in the summaries and later applied 14806c3fb27SDimitry Andric /// in applyImport. 14906c3fb27SDimitry Andric bool assignFunctions(); 15006c3fb27SDimitry Andric 15106c3fb27SDimitry Andric void dump() const; 15206c3fb27SDimitry Andric void print(raw_ostream &OS) const; 15306c3fb27SDimitry Andric 15406c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, 15506c3fb27SDimitry Andric const CallsiteContextGraph &CCG) { 15606c3fb27SDimitry Andric CCG.print(OS); 15706c3fb27SDimitry Andric return OS; 15806c3fb27SDimitry Andric } 15906c3fb27SDimitry Andric 16006c3fb27SDimitry Andric friend struct GraphTraits< 16106c3fb27SDimitry Andric const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>; 16206c3fb27SDimitry Andric friend struct DOTGraphTraits< 16306c3fb27SDimitry Andric const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>; 16406c3fb27SDimitry Andric 16506c3fb27SDimitry Andric void exportToDot(std::string Label) const; 16606c3fb27SDimitry Andric 16706c3fb27SDimitry Andric /// Represents a function clone via FuncTy pointer and clone number pair. 16806c3fb27SDimitry Andric struct FuncInfo final 16906c3fb27SDimitry Andric : public std::pair<FuncTy *, unsigned /*Clone number*/> { 17006c3fb27SDimitry Andric using Base = std::pair<FuncTy *, unsigned>; 17106c3fb27SDimitry Andric FuncInfo(const Base &B) : Base(B) {} 17206c3fb27SDimitry Andric FuncInfo(FuncTy *F = nullptr, unsigned CloneNo = 0) : Base(F, CloneNo) {} 17306c3fb27SDimitry Andric explicit operator bool() const { return this->first != nullptr; } 17406c3fb27SDimitry Andric FuncTy *func() const { return this->first; } 17506c3fb27SDimitry Andric unsigned cloneNo() const { return this->second; } 17606c3fb27SDimitry Andric }; 17706c3fb27SDimitry Andric 17806c3fb27SDimitry Andric /// Represents a callsite clone via CallTy and clone number pair. 17906c3fb27SDimitry Andric struct CallInfo final : public std::pair<CallTy, unsigned /*Clone number*/> { 18006c3fb27SDimitry Andric using Base = std::pair<CallTy, unsigned>; 18106c3fb27SDimitry Andric CallInfo(const Base &B) : Base(B) {} 18206c3fb27SDimitry Andric CallInfo(CallTy Call = nullptr, unsigned CloneNo = 0) 18306c3fb27SDimitry Andric : Base(Call, CloneNo) {} 18406c3fb27SDimitry Andric explicit operator bool() const { return (bool)this->first; } 18506c3fb27SDimitry Andric CallTy call() const { return this->first; } 18606c3fb27SDimitry Andric unsigned cloneNo() const { return this->second; } 18706c3fb27SDimitry Andric void setCloneNo(unsigned N) { this->second = N; } 18806c3fb27SDimitry Andric void print(raw_ostream &OS) const { 18906c3fb27SDimitry Andric if (!operator bool()) { 19006c3fb27SDimitry Andric assert(!cloneNo()); 19106c3fb27SDimitry Andric OS << "null Call"; 19206c3fb27SDimitry Andric return; 19306c3fb27SDimitry Andric } 19406c3fb27SDimitry Andric call()->print(OS); 19506c3fb27SDimitry Andric OS << "\t(clone " << cloneNo() << ")"; 19606c3fb27SDimitry Andric } 19706c3fb27SDimitry Andric void dump() const { 19806c3fb27SDimitry Andric print(dbgs()); 19906c3fb27SDimitry Andric dbgs() << "\n"; 20006c3fb27SDimitry Andric } 20106c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, const CallInfo &Call) { 20206c3fb27SDimitry Andric Call.print(OS); 20306c3fb27SDimitry Andric return OS; 20406c3fb27SDimitry Andric } 20506c3fb27SDimitry Andric }; 20606c3fb27SDimitry Andric 20706c3fb27SDimitry Andric struct ContextEdge; 20806c3fb27SDimitry Andric 20906c3fb27SDimitry Andric /// Node in the Callsite Context Graph 21006c3fb27SDimitry Andric struct ContextNode { 21106c3fb27SDimitry Andric // Keep this for now since in the IR case where we have an Instruction* it 21206c3fb27SDimitry Andric // is not as immediately discoverable. Used for printing richer information 21306c3fb27SDimitry Andric // when dumping graph. 21406c3fb27SDimitry Andric bool IsAllocation; 21506c3fb27SDimitry Andric 21606c3fb27SDimitry Andric // Keeps track of when the Call was reset to null because there was 21706c3fb27SDimitry Andric // recursion. 21806c3fb27SDimitry Andric bool Recursive = false; 21906c3fb27SDimitry Andric 22006c3fb27SDimitry Andric // The corresponding allocation or interior call. 22106c3fb27SDimitry Andric CallInfo Call; 22206c3fb27SDimitry Andric 22306c3fb27SDimitry Andric // For alloc nodes this is a unique id assigned when constructed, and for 22406c3fb27SDimitry Andric // callsite stack nodes it is the original stack id when the node is 22506c3fb27SDimitry Andric // constructed from the memprof MIB metadata on the alloc nodes. Note that 22606c3fb27SDimitry Andric // this is only used when matching callsite metadata onto the stack nodes 22706c3fb27SDimitry Andric // created when processing the allocation memprof MIBs, and for labeling 22806c3fb27SDimitry Andric // nodes in the dot graph. Therefore we don't bother to assign a value for 22906c3fb27SDimitry Andric // clones. 23006c3fb27SDimitry Andric uint64_t OrigStackOrAllocId = 0; 23106c3fb27SDimitry Andric 23206c3fb27SDimitry Andric // This will be formed by ORing together the AllocationType enum values 23306c3fb27SDimitry Andric // for contexts including this node. 23406c3fb27SDimitry Andric uint8_t AllocTypes = 0; 23506c3fb27SDimitry Andric 23606c3fb27SDimitry Andric // Edges to all callees in the profiled call stacks. 23706c3fb27SDimitry Andric // TODO: Should this be a map (from Callee node) for more efficient lookup? 23806c3fb27SDimitry Andric std::vector<std::shared_ptr<ContextEdge>> CalleeEdges; 23906c3fb27SDimitry Andric 24006c3fb27SDimitry Andric // Edges to all callers in the profiled call stacks. 24106c3fb27SDimitry Andric // TODO: Should this be a map (from Caller node) for more efficient lookup? 24206c3fb27SDimitry Andric std::vector<std::shared_ptr<ContextEdge>> CallerEdges; 24306c3fb27SDimitry Andric 24406c3fb27SDimitry Andric // The set of IDs for contexts including this node. 24506c3fb27SDimitry Andric DenseSet<uint32_t> ContextIds; 24606c3fb27SDimitry Andric 24706c3fb27SDimitry Andric // List of clones of this ContextNode, initially empty. 24806c3fb27SDimitry Andric std::vector<ContextNode *> Clones; 24906c3fb27SDimitry Andric 25006c3fb27SDimitry Andric // If a clone, points to the original uncloned node. 25106c3fb27SDimitry Andric ContextNode *CloneOf = nullptr; 25206c3fb27SDimitry Andric 25306c3fb27SDimitry Andric ContextNode(bool IsAllocation) : IsAllocation(IsAllocation), Call() {} 25406c3fb27SDimitry Andric 25506c3fb27SDimitry Andric ContextNode(bool IsAllocation, CallInfo C) 25606c3fb27SDimitry Andric : IsAllocation(IsAllocation), Call(C) {} 25706c3fb27SDimitry Andric 25806c3fb27SDimitry Andric void addClone(ContextNode *Clone) { 25906c3fb27SDimitry Andric if (CloneOf) { 26006c3fb27SDimitry Andric CloneOf->Clones.push_back(Clone); 26106c3fb27SDimitry Andric Clone->CloneOf = CloneOf; 26206c3fb27SDimitry Andric } else { 26306c3fb27SDimitry Andric Clones.push_back(Clone); 26406c3fb27SDimitry Andric assert(!Clone->CloneOf); 26506c3fb27SDimitry Andric Clone->CloneOf = this; 26606c3fb27SDimitry Andric } 26706c3fb27SDimitry Andric } 26806c3fb27SDimitry Andric 26906c3fb27SDimitry Andric ContextNode *getOrigNode() { 27006c3fb27SDimitry Andric if (!CloneOf) 27106c3fb27SDimitry Andric return this; 27206c3fb27SDimitry Andric return CloneOf; 27306c3fb27SDimitry Andric } 27406c3fb27SDimitry Andric 27506c3fb27SDimitry Andric void addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, 27606c3fb27SDimitry Andric unsigned int ContextId); 27706c3fb27SDimitry Andric 27806c3fb27SDimitry Andric ContextEdge *findEdgeFromCallee(const ContextNode *Callee); 27906c3fb27SDimitry Andric ContextEdge *findEdgeFromCaller(const ContextNode *Caller); 28006c3fb27SDimitry Andric void eraseCalleeEdge(const ContextEdge *Edge); 28106c3fb27SDimitry Andric void eraseCallerEdge(const ContextEdge *Edge); 28206c3fb27SDimitry Andric 28306c3fb27SDimitry Andric void setCall(CallInfo C) { Call = C; } 28406c3fb27SDimitry Andric 28506c3fb27SDimitry Andric bool hasCall() const { return (bool)Call.call(); } 28606c3fb27SDimitry Andric 28706c3fb27SDimitry Andric void printCall(raw_ostream &OS) const { Call.print(OS); } 28806c3fb27SDimitry Andric 28906c3fb27SDimitry Andric // True if this node was effectively removed from the graph, in which case 29006c3fb27SDimitry Andric // its context id set, caller edges, and callee edges should all be empty. 29106c3fb27SDimitry Andric bool isRemoved() const { 29206c3fb27SDimitry Andric assert(ContextIds.empty() == 29306c3fb27SDimitry Andric (CalleeEdges.empty() && CallerEdges.empty())); 29406c3fb27SDimitry Andric return ContextIds.empty(); 29506c3fb27SDimitry Andric } 29606c3fb27SDimitry Andric 29706c3fb27SDimitry Andric void dump() const; 29806c3fb27SDimitry Andric void print(raw_ostream &OS) const; 29906c3fb27SDimitry Andric 30006c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, const ContextNode &Node) { 30106c3fb27SDimitry Andric Node.print(OS); 30206c3fb27SDimitry Andric return OS; 30306c3fb27SDimitry Andric } 30406c3fb27SDimitry Andric }; 30506c3fb27SDimitry Andric 30606c3fb27SDimitry Andric /// Edge in the Callsite Context Graph from a ContextNode N to a caller or 30706c3fb27SDimitry Andric /// callee. 30806c3fb27SDimitry Andric struct ContextEdge { 30906c3fb27SDimitry Andric ContextNode *Callee; 31006c3fb27SDimitry Andric ContextNode *Caller; 31106c3fb27SDimitry Andric 31206c3fb27SDimitry Andric // This will be formed by ORing together the AllocationType enum values 31306c3fb27SDimitry Andric // for contexts including this edge. 31406c3fb27SDimitry Andric uint8_t AllocTypes = 0; 31506c3fb27SDimitry Andric 31606c3fb27SDimitry Andric // The set of IDs for contexts including this edge. 31706c3fb27SDimitry Andric DenseSet<uint32_t> ContextIds; 31806c3fb27SDimitry Andric 31906c3fb27SDimitry Andric ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t AllocType, 32006c3fb27SDimitry Andric DenseSet<uint32_t> ContextIds) 32106c3fb27SDimitry Andric : Callee(Callee), Caller(Caller), AllocTypes(AllocType), 32206c3fb27SDimitry Andric ContextIds(ContextIds) {} 32306c3fb27SDimitry Andric 32406c3fb27SDimitry Andric DenseSet<uint32_t> &getContextIds() { return ContextIds; } 32506c3fb27SDimitry Andric 32606c3fb27SDimitry Andric void dump() const; 32706c3fb27SDimitry Andric void print(raw_ostream &OS) const; 32806c3fb27SDimitry Andric 32906c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, const ContextEdge &Edge) { 33006c3fb27SDimitry Andric Edge.print(OS); 33106c3fb27SDimitry Andric return OS; 33206c3fb27SDimitry Andric } 33306c3fb27SDimitry Andric }; 33406c3fb27SDimitry Andric 33506c3fb27SDimitry Andric /// Helper to remove callee edges that have allocation type None (due to not 33606c3fb27SDimitry Andric /// carrying any context ids) after transformations. 33706c3fb27SDimitry Andric void removeNoneTypeCalleeEdges(ContextNode *Node); 33806c3fb27SDimitry Andric 33906c3fb27SDimitry Andric protected: 34006c3fb27SDimitry Andric /// Get a list of nodes corresponding to the stack ids in the given callsite 34106c3fb27SDimitry Andric /// context. 34206c3fb27SDimitry Andric template <class NodeT, class IteratorT> 34306c3fb27SDimitry Andric std::vector<uint64_t> 34406c3fb27SDimitry Andric getStackIdsWithContextNodes(CallStack<NodeT, IteratorT> &CallsiteContext); 34506c3fb27SDimitry Andric 34606c3fb27SDimitry Andric /// Adds nodes for the given allocation and any stack ids on its memprof MIB 34706c3fb27SDimitry Andric /// metadata (or summary). 34806c3fb27SDimitry Andric ContextNode *addAllocNode(CallInfo Call, const FuncTy *F); 34906c3fb27SDimitry Andric 35006c3fb27SDimitry Andric /// Adds nodes for the given MIB stack ids. 35106c3fb27SDimitry Andric template <class NodeT, class IteratorT> 35206c3fb27SDimitry Andric void addStackNodesForMIB(ContextNode *AllocNode, 35306c3fb27SDimitry Andric CallStack<NodeT, IteratorT> &StackContext, 35406c3fb27SDimitry Andric CallStack<NodeT, IteratorT> &CallsiteContext, 35506c3fb27SDimitry Andric AllocationType AllocType); 35606c3fb27SDimitry Andric 35706c3fb27SDimitry Andric /// Matches all callsite metadata (or summary) to the nodes created for 35806c3fb27SDimitry Andric /// allocation memprof MIB metadata, synthesizing new nodes to reflect any 35906c3fb27SDimitry Andric /// inlining performed on those callsite instructions. 36006c3fb27SDimitry Andric void updateStackNodes(); 36106c3fb27SDimitry Andric 36206c3fb27SDimitry Andric /// Update graph to conservatively handle any callsite stack nodes that target 36306c3fb27SDimitry Andric /// multiple different callee target functions. 36406c3fb27SDimitry Andric void handleCallsitesWithMultipleTargets(); 36506c3fb27SDimitry Andric 36606c3fb27SDimitry Andric /// Save lists of calls with MemProf metadata in each function, for faster 36706c3fb27SDimitry Andric /// iteration. 36806c3fb27SDimitry Andric std::vector<std::pair<FuncTy *, std::vector<CallInfo>>> 36906c3fb27SDimitry Andric FuncToCallsWithMetadata; 37006c3fb27SDimitry Andric 37106c3fb27SDimitry Andric /// Map from callsite node to the enclosing caller function. 37206c3fb27SDimitry Andric std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc; 37306c3fb27SDimitry Andric 37406c3fb27SDimitry Andric private: 37506c3fb27SDimitry Andric using EdgeIter = typename std::vector<std::shared_ptr<ContextEdge>>::iterator; 37606c3fb27SDimitry Andric 37706c3fb27SDimitry Andric using CallContextInfo = std::tuple<CallTy, std::vector<uint64_t>, 37806c3fb27SDimitry Andric const FuncTy *, DenseSet<uint32_t>>; 37906c3fb27SDimitry Andric 38006c3fb27SDimitry Andric /// Assigns the given Node to calls at or inlined into the location with 38106c3fb27SDimitry Andric /// the Node's stack id, after post order traversing and processing its 38206c3fb27SDimitry Andric /// caller nodes. Uses the call information recorded in the given 38306c3fb27SDimitry Andric /// StackIdToMatchingCalls map, and creates new nodes for inlined sequences 38406c3fb27SDimitry Andric /// as needed. Called by updateStackNodes which sets up the given 38506c3fb27SDimitry Andric /// StackIdToMatchingCalls map. 38606c3fb27SDimitry Andric void assignStackNodesPostOrder( 38706c3fb27SDimitry Andric ContextNode *Node, DenseSet<const ContextNode *> &Visited, 38806c3fb27SDimitry Andric DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls); 38906c3fb27SDimitry Andric 39006c3fb27SDimitry Andric /// Duplicates the given set of context ids, updating the provided 39106c3fb27SDimitry Andric /// map from each original id with the newly generated context ids, 39206c3fb27SDimitry Andric /// and returning the new duplicated id set. 39306c3fb27SDimitry Andric DenseSet<uint32_t> duplicateContextIds( 39406c3fb27SDimitry Andric const DenseSet<uint32_t> &StackSequenceContextIds, 39506c3fb27SDimitry Andric DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds); 39606c3fb27SDimitry Andric 39706c3fb27SDimitry Andric /// Propagates all duplicated context ids across the graph. 39806c3fb27SDimitry Andric void propagateDuplicateContextIds( 39906c3fb27SDimitry Andric const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds); 40006c3fb27SDimitry Andric 40106c3fb27SDimitry Andric /// Connect the NewNode to OrigNode's callees if TowardsCallee is true, 40206c3fb27SDimitry Andric /// else to its callers. Also updates OrigNode's edges to remove any context 40306c3fb27SDimitry Andric /// ids moved to the newly created edge. 40406c3fb27SDimitry Andric void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode, 40506c3fb27SDimitry Andric bool TowardsCallee); 40606c3fb27SDimitry Andric 40706c3fb27SDimitry Andric /// Get the stack id corresponding to the given Id or Index (for IR this will 40806c3fb27SDimitry Andric /// return itself, for a summary index this will return the id recorded in the 40906c3fb27SDimitry Andric /// index for that stack id index value). 41006c3fb27SDimitry Andric uint64_t getStackId(uint64_t IdOrIndex) const { 41106c3fb27SDimitry Andric return static_cast<const DerivedCCG *>(this)->getStackId(IdOrIndex); 41206c3fb27SDimitry Andric } 41306c3fb27SDimitry Andric 41406c3fb27SDimitry Andric /// Returns true if the given call targets the given function. 41506c3fb27SDimitry Andric bool calleeMatchesFunc(CallTy Call, const FuncTy *Func) { 41606c3fb27SDimitry Andric return static_cast<DerivedCCG *>(this)->calleeMatchesFunc(Call, Func); 41706c3fb27SDimitry Andric } 41806c3fb27SDimitry Andric 41906c3fb27SDimitry Andric /// Get a list of nodes corresponding to the stack ids in the given 42006c3fb27SDimitry Andric /// callsite's context. 42106c3fb27SDimitry Andric std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) { 42206c3fb27SDimitry Andric return static_cast<DerivedCCG *>(this)->getStackIdsWithContextNodesForCall( 42306c3fb27SDimitry Andric Call); 42406c3fb27SDimitry Andric } 42506c3fb27SDimitry Andric 42606c3fb27SDimitry Andric /// Get the last stack id in the context for callsite. 42706c3fb27SDimitry Andric uint64_t getLastStackId(CallTy Call) { 42806c3fb27SDimitry Andric return static_cast<DerivedCCG *>(this)->getLastStackId(Call); 42906c3fb27SDimitry Andric } 43006c3fb27SDimitry Andric 43106c3fb27SDimitry Andric /// Update the allocation call to record type of allocated memory. 43206c3fb27SDimitry Andric void updateAllocationCall(CallInfo &Call, AllocationType AllocType) { 43306c3fb27SDimitry Andric AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++; 43406c3fb27SDimitry Andric static_cast<DerivedCCG *>(this)->updateAllocationCall(Call, AllocType); 43506c3fb27SDimitry Andric } 43606c3fb27SDimitry Andric 43706c3fb27SDimitry Andric /// Update non-allocation call to invoke (possibly cloned) function 43806c3fb27SDimitry Andric /// CalleeFunc. 43906c3fb27SDimitry Andric void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) { 44006c3fb27SDimitry Andric static_cast<DerivedCCG *>(this)->updateCall(CallerCall, CalleeFunc); 44106c3fb27SDimitry Andric } 44206c3fb27SDimitry Andric 44306c3fb27SDimitry Andric /// Clone the given function for the given callsite, recording mapping of all 44406c3fb27SDimitry Andric /// of the functions tracked calls to their new versions in the CallMap. 44506c3fb27SDimitry Andric /// Assigns new clones to clone number CloneNo. 44606c3fb27SDimitry Andric FuncInfo cloneFunctionForCallsite( 44706c3fb27SDimitry Andric FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap, 44806c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) { 44906c3fb27SDimitry Andric return static_cast<DerivedCCG *>(this)->cloneFunctionForCallsite( 45006c3fb27SDimitry Andric Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo); 45106c3fb27SDimitry Andric } 45206c3fb27SDimitry Andric 45306c3fb27SDimitry Andric /// Gets a label to use in the dot graph for the given call clone in the given 45406c3fb27SDimitry Andric /// function. 45506c3fb27SDimitry Andric std::string getLabel(const FuncTy *Func, const CallTy Call, 45606c3fb27SDimitry Andric unsigned CloneNo) const { 45706c3fb27SDimitry Andric return static_cast<const DerivedCCG *>(this)->getLabel(Func, Call, CloneNo); 45806c3fb27SDimitry Andric } 45906c3fb27SDimitry Andric 46006c3fb27SDimitry Andric /// Helpers to find the node corresponding to the given call or stackid. 46106c3fb27SDimitry Andric ContextNode *getNodeForInst(const CallInfo &C); 46206c3fb27SDimitry Andric ContextNode *getNodeForAlloc(const CallInfo &C); 46306c3fb27SDimitry Andric ContextNode *getNodeForStackId(uint64_t StackId); 46406c3fb27SDimitry Andric 46506c3fb27SDimitry Andric /// Removes the node information recorded for the given call. 46606c3fb27SDimitry Andric void unsetNodeForInst(const CallInfo &C); 46706c3fb27SDimitry Andric 46806c3fb27SDimitry Andric /// Computes the alloc type corresponding to the given context ids, by 46906c3fb27SDimitry Andric /// unioning their recorded alloc types. 47006c3fb27SDimitry Andric uint8_t computeAllocType(DenseSet<uint32_t> &ContextIds); 47106c3fb27SDimitry Andric 47206c3fb27SDimitry Andric /// Returns the alloction type of the intersection of the contexts of two 47306c3fb27SDimitry Andric /// nodes (based on their provided context id sets), optimized for the case 47406c3fb27SDimitry Andric /// when Node1Ids is smaller than Node2Ids. 47506c3fb27SDimitry Andric uint8_t intersectAllocTypesImpl(const DenseSet<uint32_t> &Node1Ids, 47606c3fb27SDimitry Andric const DenseSet<uint32_t> &Node2Ids); 47706c3fb27SDimitry Andric 47806c3fb27SDimitry Andric /// Returns the alloction type of the intersection of the contexts of two 47906c3fb27SDimitry Andric /// nodes (based on their provided context id sets). 48006c3fb27SDimitry Andric uint8_t intersectAllocTypes(const DenseSet<uint32_t> &Node1Ids, 48106c3fb27SDimitry Andric const DenseSet<uint32_t> &Node2Ids); 48206c3fb27SDimitry Andric 48306c3fb27SDimitry Andric /// Create a clone of Edge's callee and move Edge to that new callee node, 48406c3fb27SDimitry Andric /// performing the necessary context id and allocation type updates. 48506c3fb27SDimitry Andric /// If callee's caller edge iterator is supplied, it is updated when removing 48606c3fb27SDimitry Andric /// the edge from that list. 48706c3fb27SDimitry Andric ContextNode * 48806c3fb27SDimitry Andric moveEdgeToNewCalleeClone(const std::shared_ptr<ContextEdge> &Edge, 48906c3fb27SDimitry Andric EdgeIter *CallerEdgeI = nullptr); 49006c3fb27SDimitry Andric 49106c3fb27SDimitry Andric /// Change the callee of Edge to existing callee clone NewCallee, performing 49206c3fb27SDimitry Andric /// the necessary context id and allocation type updates. 49306c3fb27SDimitry Andric /// If callee's caller edge iterator is supplied, it is updated when removing 49406c3fb27SDimitry Andric /// the edge from that list. 49506c3fb27SDimitry Andric void moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge, 49606c3fb27SDimitry Andric ContextNode *NewCallee, 49706c3fb27SDimitry Andric EdgeIter *CallerEdgeI = nullptr, 49806c3fb27SDimitry Andric bool NewClone = false); 49906c3fb27SDimitry Andric 50006c3fb27SDimitry Andric /// Recursively perform cloning on the graph for the given Node and its 50106c3fb27SDimitry Andric /// callers, in order to uniquely identify the allocation behavior of an 50206c3fb27SDimitry Andric /// allocation given its context. 50306c3fb27SDimitry Andric void identifyClones(ContextNode *Node, 50406c3fb27SDimitry Andric DenseSet<const ContextNode *> &Visited); 50506c3fb27SDimitry Andric 50606c3fb27SDimitry Andric /// Map from each context ID to the AllocationType assigned to that context. 50706c3fb27SDimitry Andric std::map<uint32_t, AllocationType> ContextIdToAllocationType; 50806c3fb27SDimitry Andric 50906c3fb27SDimitry Andric /// Identifies the context node created for a stack id when adding the MIB 51006c3fb27SDimitry Andric /// contexts to the graph. This is used to locate the context nodes when 51106c3fb27SDimitry Andric /// trying to assign the corresponding callsites with those stack ids to these 51206c3fb27SDimitry Andric /// nodes. 51306c3fb27SDimitry Andric std::map<uint64_t, ContextNode *> StackEntryIdToContextNodeMap; 51406c3fb27SDimitry Andric 51506c3fb27SDimitry Andric /// Maps to track the calls to their corresponding nodes in the graph. 51606c3fb27SDimitry Andric MapVector<CallInfo, ContextNode *> AllocationCallToContextNodeMap; 51706c3fb27SDimitry Andric MapVector<CallInfo, ContextNode *> NonAllocationCallToContextNodeMap; 51806c3fb27SDimitry Andric 51906c3fb27SDimitry Andric /// Owner of all ContextNode unique_ptrs. 52006c3fb27SDimitry Andric std::vector<std::unique_ptr<ContextNode>> NodeOwner; 52106c3fb27SDimitry Andric 52206c3fb27SDimitry Andric /// Perform sanity checks on graph when requested. 52306c3fb27SDimitry Andric void check() const; 52406c3fb27SDimitry Andric 52506c3fb27SDimitry Andric /// Keeps track of the last unique context id assigned. 52606c3fb27SDimitry Andric unsigned int LastContextId = 0; 52706c3fb27SDimitry Andric }; 52806c3fb27SDimitry Andric 52906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 53006c3fb27SDimitry Andric using ContextNode = 53106c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode; 53206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 53306c3fb27SDimitry Andric using ContextEdge = 53406c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge; 53506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 53606c3fb27SDimitry Andric using FuncInfo = 53706c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo; 53806c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 53906c3fb27SDimitry Andric using CallInfo = 54006c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo; 54106c3fb27SDimitry Andric 54206c3fb27SDimitry Andric /// CRTP derived class for graphs built from IR (regular LTO). 54306c3fb27SDimitry Andric class ModuleCallsiteContextGraph 54406c3fb27SDimitry Andric : public CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 54506c3fb27SDimitry Andric Instruction *> { 54606c3fb27SDimitry Andric public: 54706c3fb27SDimitry Andric ModuleCallsiteContextGraph( 54806c3fb27SDimitry Andric Module &M, 54906c3fb27SDimitry Andric function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter); 55006c3fb27SDimitry Andric 55106c3fb27SDimitry Andric private: 55206c3fb27SDimitry Andric friend CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 55306c3fb27SDimitry Andric Instruction *>; 55406c3fb27SDimitry Andric 55506c3fb27SDimitry Andric uint64_t getStackId(uint64_t IdOrIndex) const; 55606c3fb27SDimitry Andric bool calleeMatchesFunc(Instruction *Call, const Function *Func); 55706c3fb27SDimitry Andric uint64_t getLastStackId(Instruction *Call); 55806c3fb27SDimitry Andric std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *Call); 55906c3fb27SDimitry Andric void updateAllocationCall(CallInfo &Call, AllocationType AllocType); 56006c3fb27SDimitry Andric void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc); 56106c3fb27SDimitry Andric CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 56206c3fb27SDimitry Andric Instruction *>::FuncInfo 56306c3fb27SDimitry Andric cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call, 56406c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap, 56506c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, 56606c3fb27SDimitry Andric unsigned CloneNo); 56706c3fb27SDimitry Andric std::string getLabel(const Function *Func, const Instruction *Call, 56806c3fb27SDimitry Andric unsigned CloneNo) const; 56906c3fb27SDimitry Andric 57006c3fb27SDimitry Andric const Module &Mod; 57106c3fb27SDimitry Andric function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter; 57206c3fb27SDimitry Andric }; 57306c3fb27SDimitry Andric 57406c3fb27SDimitry Andric /// Represents a call in the summary index graph, which can either be an 57506c3fb27SDimitry Andric /// allocation or an interior callsite node in an allocation's context. 57606c3fb27SDimitry Andric /// Holds a pointer to the corresponding data structure in the index. 57706c3fb27SDimitry Andric struct IndexCall : public PointerUnion<CallsiteInfo *, AllocInfo *> { 57806c3fb27SDimitry Andric IndexCall() : PointerUnion() {} 57906c3fb27SDimitry Andric IndexCall(std::nullptr_t) : IndexCall() {} 58006c3fb27SDimitry Andric IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {} 58106c3fb27SDimitry Andric IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {} 58206c3fb27SDimitry Andric IndexCall(PointerUnion PT) : PointerUnion(PT) {} 58306c3fb27SDimitry Andric 58406c3fb27SDimitry Andric IndexCall *operator->() { return this; } 58506c3fb27SDimitry Andric 58606c3fb27SDimitry Andric PointerUnion<CallsiteInfo *, AllocInfo *> getBase() const { return *this; } 58706c3fb27SDimitry Andric 58806c3fb27SDimitry Andric void print(raw_ostream &OS) const { 58906c3fb27SDimitry Andric if (auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(getBase())) { 59006c3fb27SDimitry Andric OS << *AI; 59106c3fb27SDimitry Andric } else { 59206c3fb27SDimitry Andric auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(getBase()); 59306c3fb27SDimitry Andric assert(CI); 59406c3fb27SDimitry Andric OS << *CI; 59506c3fb27SDimitry Andric } 59606c3fb27SDimitry Andric } 59706c3fb27SDimitry Andric }; 59806c3fb27SDimitry Andric 59906c3fb27SDimitry Andric /// CRTP derived class for graphs built from summary index (ThinLTO). 60006c3fb27SDimitry Andric class IndexCallsiteContextGraph 60106c3fb27SDimitry Andric : public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 60206c3fb27SDimitry Andric IndexCall> { 60306c3fb27SDimitry Andric public: 60406c3fb27SDimitry Andric IndexCallsiteContextGraph( 60506c3fb27SDimitry Andric ModuleSummaryIndex &Index, 60606c3fb27SDimitry Andric function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 60706c3fb27SDimitry Andric isPrevailing); 60806c3fb27SDimitry Andric 60906c3fb27SDimitry Andric private: 61006c3fb27SDimitry Andric friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 61106c3fb27SDimitry Andric IndexCall>; 61206c3fb27SDimitry Andric 61306c3fb27SDimitry Andric uint64_t getStackId(uint64_t IdOrIndex) const; 61406c3fb27SDimitry Andric bool calleeMatchesFunc(IndexCall &Call, const FunctionSummary *Func); 61506c3fb27SDimitry Andric uint64_t getLastStackId(IndexCall &Call); 61606c3fb27SDimitry Andric std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call); 61706c3fb27SDimitry Andric void updateAllocationCall(CallInfo &Call, AllocationType AllocType); 61806c3fb27SDimitry Andric void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc); 61906c3fb27SDimitry Andric CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 62006c3fb27SDimitry Andric IndexCall>::FuncInfo 62106c3fb27SDimitry Andric cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call, 62206c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap, 62306c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, 62406c3fb27SDimitry Andric unsigned CloneNo); 62506c3fb27SDimitry Andric std::string getLabel(const FunctionSummary *Func, const IndexCall &Call, 62606c3fb27SDimitry Andric unsigned CloneNo) const; 62706c3fb27SDimitry Andric 62806c3fb27SDimitry Andric // Saves mapping from function summaries containing memprof records back to 62906c3fb27SDimitry Andric // its VI, for use in checking and debugging. 63006c3fb27SDimitry Andric std::map<const FunctionSummary *, ValueInfo> FSToVIMap; 63106c3fb27SDimitry Andric 63206c3fb27SDimitry Andric const ModuleSummaryIndex &Index; 63306c3fb27SDimitry Andric }; 63406c3fb27SDimitry Andric } // namespace 63506c3fb27SDimitry Andric 63606c3fb27SDimitry Andric namespace llvm { 63706c3fb27SDimitry Andric template <> 63806c3fb27SDimitry Andric struct DenseMapInfo<typename CallsiteContextGraph< 63906c3fb27SDimitry Andric ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo> 64006c3fb27SDimitry Andric : public DenseMapInfo<std::pair<Instruction *, unsigned>> {}; 64106c3fb27SDimitry Andric template <> 64206c3fb27SDimitry Andric struct DenseMapInfo<typename CallsiteContextGraph< 64306c3fb27SDimitry Andric IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo> 64406c3fb27SDimitry Andric : public DenseMapInfo<std::pair<IndexCall, unsigned>> {}; 64506c3fb27SDimitry Andric template <> 64606c3fb27SDimitry Andric struct DenseMapInfo<IndexCall> 64706c3fb27SDimitry Andric : public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {}; 64806c3fb27SDimitry Andric } // end namespace llvm 64906c3fb27SDimitry Andric 65006c3fb27SDimitry Andric namespace { 65106c3fb27SDimitry Andric 65206c3fb27SDimitry Andric struct FieldSeparator { 65306c3fb27SDimitry Andric bool Skip = true; 65406c3fb27SDimitry Andric const char *Sep; 65506c3fb27SDimitry Andric 65606c3fb27SDimitry Andric FieldSeparator(const char *Sep = ", ") : Sep(Sep) {} 65706c3fb27SDimitry Andric }; 65806c3fb27SDimitry Andric 65906c3fb27SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) { 66006c3fb27SDimitry Andric if (FS.Skip) { 66106c3fb27SDimitry Andric FS.Skip = false; 66206c3fb27SDimitry Andric return OS; 66306c3fb27SDimitry Andric } 66406c3fb27SDimitry Andric return OS << FS.Sep; 66506c3fb27SDimitry Andric } 66606c3fb27SDimitry Andric 66706c3fb27SDimitry Andric // Map the uint8_t alloc types (which may contain NotCold|Cold) to the alloc 66806c3fb27SDimitry Andric // type we should actually use on the corresponding allocation. 66906c3fb27SDimitry Andric // If we can't clone a node that has NotCold+Cold alloc type, we will fall 67006c3fb27SDimitry Andric // back to using NotCold. So don't bother cloning to distinguish NotCold+Cold 67106c3fb27SDimitry Andric // from NotCold. 67206c3fb27SDimitry Andric AllocationType allocTypeToUse(uint8_t AllocTypes) { 67306c3fb27SDimitry Andric assert(AllocTypes != (uint8_t)AllocationType::None); 67406c3fb27SDimitry Andric if (AllocTypes == 67506c3fb27SDimitry Andric ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold)) 67606c3fb27SDimitry Andric return AllocationType::NotCold; 67706c3fb27SDimitry Andric else 67806c3fb27SDimitry Andric return (AllocationType)AllocTypes; 67906c3fb27SDimitry Andric } 68006c3fb27SDimitry Andric 68106c3fb27SDimitry Andric // Helper to check if the alloc types for all edges recorded in the 68206c3fb27SDimitry Andric // InAllocTypes vector match the alloc types for all edges in the Edges 68306c3fb27SDimitry Andric // vector. 68406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 68506c3fb27SDimitry Andric bool allocTypesMatch( 68606c3fb27SDimitry Andric const std::vector<uint8_t> &InAllocTypes, 68706c3fb27SDimitry Andric const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>> 68806c3fb27SDimitry Andric &Edges) { 68906c3fb27SDimitry Andric return std::equal( 69006c3fb27SDimitry Andric InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), 69106c3fb27SDimitry Andric [](const uint8_t &l, 69206c3fb27SDimitry Andric const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) { 69306c3fb27SDimitry Andric // Can share if one of the edges is None type - don't 69406c3fb27SDimitry Andric // care about the type along that edge as it doesn't 69506c3fb27SDimitry Andric // exist for those context ids. 69606c3fb27SDimitry Andric if (l == (uint8_t)AllocationType::None || 69706c3fb27SDimitry Andric r->AllocTypes == (uint8_t)AllocationType::None) 69806c3fb27SDimitry Andric return true; 69906c3fb27SDimitry Andric return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes); 70006c3fb27SDimitry Andric }); 70106c3fb27SDimitry Andric } 70206c3fb27SDimitry Andric 70306c3fb27SDimitry Andric } // end anonymous namespace 70406c3fb27SDimitry Andric 70506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 70606c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 70706c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst( 70806c3fb27SDimitry Andric const CallInfo &C) { 70906c3fb27SDimitry Andric ContextNode *Node = getNodeForAlloc(C); 71006c3fb27SDimitry Andric if (Node) 71106c3fb27SDimitry Andric return Node; 71206c3fb27SDimitry Andric 71306c3fb27SDimitry Andric return NonAllocationCallToContextNodeMap.lookup(C); 71406c3fb27SDimitry Andric } 71506c3fb27SDimitry Andric 71606c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 71706c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 71806c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc( 71906c3fb27SDimitry Andric const CallInfo &C) { 72006c3fb27SDimitry Andric return AllocationCallToContextNodeMap.lookup(C); 72106c3fb27SDimitry Andric } 72206c3fb27SDimitry Andric 72306c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 72406c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 72506c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId( 72606c3fb27SDimitry Andric uint64_t StackId) { 72706c3fb27SDimitry Andric auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId); 72806c3fb27SDimitry Andric if (StackEntryNode != StackEntryIdToContextNodeMap.end()) 72906c3fb27SDimitry Andric return StackEntryNode->second; 73006c3fb27SDimitry Andric return nullptr; 73106c3fb27SDimitry Andric } 73206c3fb27SDimitry Andric 73306c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 73406c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::unsetNodeForInst( 73506c3fb27SDimitry Andric const CallInfo &C) { 73606c3fb27SDimitry Andric AllocationCallToContextNodeMap.erase(C) || 73706c3fb27SDimitry Andric NonAllocationCallToContextNodeMap.erase(C); 73806c3fb27SDimitry Andric assert(!AllocationCallToContextNodeMap.count(C) && 73906c3fb27SDimitry Andric !NonAllocationCallToContextNodeMap.count(C)); 74006c3fb27SDimitry Andric } 74106c3fb27SDimitry Andric 74206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 74306c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 74406c3fb27SDimitry Andric addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, 74506c3fb27SDimitry Andric unsigned int ContextId) { 74606c3fb27SDimitry Andric for (auto &Edge : CallerEdges) { 74706c3fb27SDimitry Andric if (Edge->Caller == Caller) { 74806c3fb27SDimitry Andric Edge->AllocTypes |= (uint8_t)AllocType; 74906c3fb27SDimitry Andric Edge->getContextIds().insert(ContextId); 75006c3fb27SDimitry Andric return; 75106c3fb27SDimitry Andric } 75206c3fb27SDimitry Andric } 75306c3fb27SDimitry Andric std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>( 75406c3fb27SDimitry Andric this, Caller, (uint8_t)AllocType, DenseSet<uint32_t>({ContextId})); 75506c3fb27SDimitry Andric CallerEdges.push_back(Edge); 75606c3fb27SDimitry Andric Caller->CalleeEdges.push_back(Edge); 75706c3fb27SDimitry Andric } 75806c3fb27SDimitry Andric 75906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 76006c3fb27SDimitry Andric void CallsiteContextGraph< 76106c3fb27SDimitry Andric DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) { 76206c3fb27SDimitry Andric for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) { 76306c3fb27SDimitry Andric auto Edge = *EI; 76406c3fb27SDimitry Andric if (Edge->AllocTypes == (uint8_t)AllocationType::None) { 76506c3fb27SDimitry Andric assert(Edge->ContextIds.empty()); 76606c3fb27SDimitry Andric Edge->Callee->eraseCallerEdge(Edge.get()); 76706c3fb27SDimitry Andric EI = Node->CalleeEdges.erase(EI); 76806c3fb27SDimitry Andric } else 76906c3fb27SDimitry Andric ++EI; 77006c3fb27SDimitry Andric } 77106c3fb27SDimitry Andric } 77206c3fb27SDimitry Andric 77306c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 77406c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge * 77506c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 77606c3fb27SDimitry Andric findEdgeFromCallee(const ContextNode *Callee) { 77706c3fb27SDimitry Andric for (const auto &Edge : CalleeEdges) 77806c3fb27SDimitry Andric if (Edge->Callee == Callee) 77906c3fb27SDimitry Andric return Edge.get(); 78006c3fb27SDimitry Andric return nullptr; 78106c3fb27SDimitry Andric } 78206c3fb27SDimitry Andric 78306c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 78406c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge * 78506c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 78606c3fb27SDimitry Andric findEdgeFromCaller(const ContextNode *Caller) { 78706c3fb27SDimitry Andric for (const auto &Edge : CallerEdges) 78806c3fb27SDimitry Andric if (Edge->Caller == Caller) 78906c3fb27SDimitry Andric return Edge.get(); 79006c3fb27SDimitry Andric return nullptr; 79106c3fb27SDimitry Andric } 79206c3fb27SDimitry Andric 79306c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 79406c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 79506c3fb27SDimitry Andric eraseCalleeEdge(const ContextEdge *Edge) { 796*5f757f3fSDimitry Andric auto EI = llvm::find_if( 797*5f757f3fSDimitry Andric CalleeEdges, [Edge](const std::shared_ptr<ContextEdge> &CalleeEdge) { 79806c3fb27SDimitry Andric return CalleeEdge.get() == Edge; 79906c3fb27SDimitry Andric }); 80006c3fb27SDimitry Andric assert(EI != CalleeEdges.end()); 80106c3fb27SDimitry Andric CalleeEdges.erase(EI); 80206c3fb27SDimitry Andric } 80306c3fb27SDimitry Andric 80406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 80506c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 80606c3fb27SDimitry Andric eraseCallerEdge(const ContextEdge *Edge) { 807*5f757f3fSDimitry Andric auto EI = llvm::find_if( 808*5f757f3fSDimitry Andric CallerEdges, [Edge](const std::shared_ptr<ContextEdge> &CallerEdge) { 80906c3fb27SDimitry Andric return CallerEdge.get() == Edge; 81006c3fb27SDimitry Andric }); 81106c3fb27SDimitry Andric assert(EI != CallerEdges.end()); 81206c3fb27SDimitry Andric CallerEdges.erase(EI); 81306c3fb27SDimitry Andric } 81406c3fb27SDimitry Andric 81506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 81606c3fb27SDimitry Andric uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType( 81706c3fb27SDimitry Andric DenseSet<uint32_t> &ContextIds) { 81806c3fb27SDimitry Andric uint8_t BothTypes = 81906c3fb27SDimitry Andric (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold; 82006c3fb27SDimitry Andric uint8_t AllocType = (uint8_t)AllocationType::None; 82106c3fb27SDimitry Andric for (auto Id : ContextIds) { 82206c3fb27SDimitry Andric AllocType |= (uint8_t)ContextIdToAllocationType[Id]; 82306c3fb27SDimitry Andric // Bail early if alloc type reached both, no further refinement. 82406c3fb27SDimitry Andric if (AllocType == BothTypes) 82506c3fb27SDimitry Andric return AllocType; 82606c3fb27SDimitry Andric } 82706c3fb27SDimitry Andric return AllocType; 82806c3fb27SDimitry Andric } 82906c3fb27SDimitry Andric 83006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 83106c3fb27SDimitry Andric uint8_t 83206c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl( 83306c3fb27SDimitry Andric const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) { 83406c3fb27SDimitry Andric uint8_t BothTypes = 83506c3fb27SDimitry Andric (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold; 83606c3fb27SDimitry Andric uint8_t AllocType = (uint8_t)AllocationType::None; 83706c3fb27SDimitry Andric for (auto Id : Node1Ids) { 83806c3fb27SDimitry Andric if (!Node2Ids.count(Id)) 83906c3fb27SDimitry Andric continue; 84006c3fb27SDimitry Andric AllocType |= (uint8_t)ContextIdToAllocationType[Id]; 84106c3fb27SDimitry Andric // Bail early if alloc type reached both, no further refinement. 84206c3fb27SDimitry Andric if (AllocType == BothTypes) 84306c3fb27SDimitry Andric return AllocType; 84406c3fb27SDimitry Andric } 84506c3fb27SDimitry Andric return AllocType; 84606c3fb27SDimitry Andric } 84706c3fb27SDimitry Andric 84806c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 84906c3fb27SDimitry Andric uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes( 85006c3fb27SDimitry Andric const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) { 85106c3fb27SDimitry Andric if (Node1Ids.size() < Node2Ids.size()) 85206c3fb27SDimitry Andric return intersectAllocTypesImpl(Node1Ids, Node2Ids); 85306c3fb27SDimitry Andric else 85406c3fb27SDimitry Andric return intersectAllocTypesImpl(Node2Ids, Node1Ids); 85506c3fb27SDimitry Andric } 85606c3fb27SDimitry Andric 85706c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 85806c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 85906c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode( 86006c3fb27SDimitry Andric CallInfo Call, const FuncTy *F) { 86106c3fb27SDimitry Andric assert(!getNodeForAlloc(Call)); 86206c3fb27SDimitry Andric NodeOwner.push_back( 86306c3fb27SDimitry Andric std::make_unique<ContextNode>(/*IsAllocation=*/true, Call)); 86406c3fb27SDimitry Andric ContextNode *AllocNode = NodeOwner.back().get(); 86506c3fb27SDimitry Andric AllocationCallToContextNodeMap[Call] = AllocNode; 86606c3fb27SDimitry Andric NodeToCallingFunc[AllocNode] = F; 86706c3fb27SDimitry Andric // Use LastContextId as a uniq id for MIB allocation nodes. 86806c3fb27SDimitry Andric AllocNode->OrigStackOrAllocId = LastContextId; 86906c3fb27SDimitry Andric // Alloc type should be updated as we add in the MIBs. We should assert 87006c3fb27SDimitry Andric // afterwards that it is not still None. 87106c3fb27SDimitry Andric AllocNode->AllocTypes = (uint8_t)AllocationType::None; 87206c3fb27SDimitry Andric 87306c3fb27SDimitry Andric return AllocNode; 87406c3fb27SDimitry Andric } 87506c3fb27SDimitry Andric 87606c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 87706c3fb27SDimitry Andric template <class NodeT, class IteratorT> 87806c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB( 87906c3fb27SDimitry Andric ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext, 88006c3fb27SDimitry Andric CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType) { 88106c3fb27SDimitry Andric // Treating the hot alloc type as NotCold before the disambiguation for "hot" 88206c3fb27SDimitry Andric // is done. 88306c3fb27SDimitry Andric if (AllocType == AllocationType::Hot) 88406c3fb27SDimitry Andric AllocType = AllocationType::NotCold; 88506c3fb27SDimitry Andric 88606c3fb27SDimitry Andric ContextIdToAllocationType[++LastContextId] = AllocType; 88706c3fb27SDimitry Andric 88806c3fb27SDimitry Andric // Update alloc type and context ids for this MIB. 88906c3fb27SDimitry Andric AllocNode->AllocTypes |= (uint8_t)AllocType; 89006c3fb27SDimitry Andric AllocNode->ContextIds.insert(LastContextId); 89106c3fb27SDimitry Andric 89206c3fb27SDimitry Andric // Now add or update nodes for each stack id in alloc's context. 89306c3fb27SDimitry Andric // Later when processing the stack ids on non-alloc callsites we will adjust 89406c3fb27SDimitry Andric // for any inlining in the context. 89506c3fb27SDimitry Andric ContextNode *PrevNode = AllocNode; 89606c3fb27SDimitry Andric // Look for recursion (direct recursion should have been collapsed by 89706c3fb27SDimitry Andric // module summary analysis, here we should just be detecting mutual 89806c3fb27SDimitry Andric // recursion). Mark these nodes so we don't try to clone. 89906c3fb27SDimitry Andric SmallSet<uint64_t, 8> StackIdSet; 90006c3fb27SDimitry Andric // Skip any on the allocation call (inlining). 90106c3fb27SDimitry Andric for (auto ContextIter = StackContext.beginAfterSharedPrefix(CallsiteContext); 90206c3fb27SDimitry Andric ContextIter != StackContext.end(); ++ContextIter) { 90306c3fb27SDimitry Andric auto StackId = getStackId(*ContextIter); 90406c3fb27SDimitry Andric ContextNode *StackNode = getNodeForStackId(StackId); 90506c3fb27SDimitry Andric if (!StackNode) { 90606c3fb27SDimitry Andric NodeOwner.push_back( 90706c3fb27SDimitry Andric std::make_unique<ContextNode>(/*IsAllocation=*/false)); 90806c3fb27SDimitry Andric StackNode = NodeOwner.back().get(); 90906c3fb27SDimitry Andric StackEntryIdToContextNodeMap[StackId] = StackNode; 91006c3fb27SDimitry Andric StackNode->OrigStackOrAllocId = StackId; 91106c3fb27SDimitry Andric } 91206c3fb27SDimitry Andric auto Ins = StackIdSet.insert(StackId); 91306c3fb27SDimitry Andric if (!Ins.second) 91406c3fb27SDimitry Andric StackNode->Recursive = true; 91506c3fb27SDimitry Andric StackNode->ContextIds.insert(LastContextId); 91606c3fb27SDimitry Andric StackNode->AllocTypes |= (uint8_t)AllocType; 91706c3fb27SDimitry Andric PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId); 91806c3fb27SDimitry Andric PrevNode = StackNode; 91906c3fb27SDimitry Andric } 92006c3fb27SDimitry Andric } 92106c3fb27SDimitry Andric 92206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 92306c3fb27SDimitry Andric DenseSet<uint32_t> 92406c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds( 92506c3fb27SDimitry Andric const DenseSet<uint32_t> &StackSequenceContextIds, 92606c3fb27SDimitry Andric DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) { 92706c3fb27SDimitry Andric DenseSet<uint32_t> NewContextIds; 92806c3fb27SDimitry Andric for (auto OldId : StackSequenceContextIds) { 92906c3fb27SDimitry Andric NewContextIds.insert(++LastContextId); 93006c3fb27SDimitry Andric OldToNewContextIds[OldId].insert(LastContextId); 93106c3fb27SDimitry Andric assert(ContextIdToAllocationType.count(OldId)); 93206c3fb27SDimitry Andric // The new context has the same allocation type as original. 93306c3fb27SDimitry Andric ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId]; 93406c3fb27SDimitry Andric } 93506c3fb27SDimitry Andric return NewContextIds; 93606c3fb27SDimitry Andric } 93706c3fb27SDimitry Andric 93806c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 93906c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: 94006c3fb27SDimitry Andric propagateDuplicateContextIds( 94106c3fb27SDimitry Andric const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) { 94206c3fb27SDimitry Andric // Build a set of duplicated context ids corresponding to the input id set. 94306c3fb27SDimitry Andric auto GetNewIds = [&OldToNewContextIds](const DenseSet<uint32_t> &ContextIds) { 94406c3fb27SDimitry Andric DenseSet<uint32_t> NewIds; 94506c3fb27SDimitry Andric for (auto Id : ContextIds) 94606c3fb27SDimitry Andric if (auto NewId = OldToNewContextIds.find(Id); 94706c3fb27SDimitry Andric NewId != OldToNewContextIds.end()) 94806c3fb27SDimitry Andric NewIds.insert(NewId->second.begin(), NewId->second.end()); 94906c3fb27SDimitry Andric return NewIds; 95006c3fb27SDimitry Andric }; 95106c3fb27SDimitry Andric 95206c3fb27SDimitry Andric // Recursively update context ids sets along caller edges. 95306c3fb27SDimitry Andric auto UpdateCallers = [&](ContextNode *Node, 95406c3fb27SDimitry Andric DenseSet<const ContextEdge *> &Visited, 95506c3fb27SDimitry Andric auto &&UpdateCallers) -> void { 95606c3fb27SDimitry Andric for (const auto &Edge : Node->CallerEdges) { 95706c3fb27SDimitry Andric auto Inserted = Visited.insert(Edge.get()); 95806c3fb27SDimitry Andric if (!Inserted.second) 95906c3fb27SDimitry Andric continue; 96006c3fb27SDimitry Andric ContextNode *NextNode = Edge->Caller; 96106c3fb27SDimitry Andric DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Edge->getContextIds()); 96206c3fb27SDimitry Andric // Only need to recursively iterate to NextNode via this caller edge if 96306c3fb27SDimitry Andric // it resulted in any added ids to NextNode. 96406c3fb27SDimitry Andric if (!NewIdsToAdd.empty()) { 96506c3fb27SDimitry Andric Edge->getContextIds().insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); 96606c3fb27SDimitry Andric NextNode->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); 96706c3fb27SDimitry Andric UpdateCallers(NextNode, Visited, UpdateCallers); 96806c3fb27SDimitry Andric } 96906c3fb27SDimitry Andric } 97006c3fb27SDimitry Andric }; 97106c3fb27SDimitry Andric 97206c3fb27SDimitry Andric DenseSet<const ContextEdge *> Visited; 97306c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) { 97406c3fb27SDimitry Andric auto *Node = Entry.second; 97506c3fb27SDimitry Andric // Update ids on the allocation nodes before calling the recursive 97606c3fb27SDimitry Andric // update along caller edges, since this simplifies the logic during 97706c3fb27SDimitry Andric // that traversal. 97806c3fb27SDimitry Andric DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Node->ContextIds); 97906c3fb27SDimitry Andric Node->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); 98006c3fb27SDimitry Andric UpdateCallers(Node, Visited, UpdateCallers); 98106c3fb27SDimitry Andric } 98206c3fb27SDimitry Andric } 98306c3fb27SDimitry Andric 98406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 98506c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode( 98606c3fb27SDimitry Andric ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee) { 98706c3fb27SDimitry Andric // Make a copy of the context ids, since this will be adjusted below as they 98806c3fb27SDimitry Andric // are moved. 98906c3fb27SDimitry Andric DenseSet<uint32_t> RemainingContextIds = NewNode->ContextIds; 99006c3fb27SDimitry Andric auto &OrigEdges = 99106c3fb27SDimitry Andric TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges; 99206c3fb27SDimitry Andric // Increment iterator in loop so that we can remove edges as needed. 99306c3fb27SDimitry Andric for (auto EI = OrigEdges.begin(); EI != OrigEdges.end();) { 99406c3fb27SDimitry Andric auto Edge = *EI; 99506c3fb27SDimitry Andric // Remove any matching context ids from Edge, return set that were found and 99606c3fb27SDimitry Andric // removed, these are the new edge's context ids. Also update the remaining 99706c3fb27SDimitry Andric // (not found ids). 99806c3fb27SDimitry Andric DenseSet<uint32_t> NewEdgeContextIds, NotFoundContextIds; 99906c3fb27SDimitry Andric set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds, 100006c3fb27SDimitry Andric NotFoundContextIds); 100106c3fb27SDimitry Andric RemainingContextIds.swap(NotFoundContextIds); 100206c3fb27SDimitry Andric // If no matching context ids for this edge, skip it. 100306c3fb27SDimitry Andric if (NewEdgeContextIds.empty()) { 100406c3fb27SDimitry Andric ++EI; 100506c3fb27SDimitry Andric continue; 100606c3fb27SDimitry Andric } 100706c3fb27SDimitry Andric if (TowardsCallee) { 100806c3fb27SDimitry Andric auto NewEdge = std::make_shared<ContextEdge>( 100906c3fb27SDimitry Andric Edge->Callee, NewNode, computeAllocType(NewEdgeContextIds), 101006c3fb27SDimitry Andric NewEdgeContextIds); 101106c3fb27SDimitry Andric NewNode->CalleeEdges.push_back(NewEdge); 101206c3fb27SDimitry Andric NewEdge->Callee->CallerEdges.push_back(NewEdge); 101306c3fb27SDimitry Andric } else { 101406c3fb27SDimitry Andric auto NewEdge = std::make_shared<ContextEdge>( 101506c3fb27SDimitry Andric NewNode, Edge->Caller, computeAllocType(NewEdgeContextIds), 101606c3fb27SDimitry Andric NewEdgeContextIds); 101706c3fb27SDimitry Andric NewNode->CallerEdges.push_back(NewEdge); 101806c3fb27SDimitry Andric NewEdge->Caller->CalleeEdges.push_back(NewEdge); 101906c3fb27SDimitry Andric } 102006c3fb27SDimitry Andric // Remove old edge if context ids empty. 102106c3fb27SDimitry Andric if (Edge->getContextIds().empty()) { 102206c3fb27SDimitry Andric if (TowardsCallee) { 102306c3fb27SDimitry Andric Edge->Callee->eraseCallerEdge(Edge.get()); 102406c3fb27SDimitry Andric EI = OrigNode->CalleeEdges.erase(EI); 102506c3fb27SDimitry Andric } else { 102606c3fb27SDimitry Andric Edge->Caller->eraseCalleeEdge(Edge.get()); 102706c3fb27SDimitry Andric EI = OrigNode->CallerEdges.erase(EI); 102806c3fb27SDimitry Andric } 102906c3fb27SDimitry Andric continue; 103006c3fb27SDimitry Andric } 103106c3fb27SDimitry Andric ++EI; 103206c3fb27SDimitry Andric } 103306c3fb27SDimitry Andric } 103406c3fb27SDimitry Andric 103506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 103606c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: 103706c3fb27SDimitry Andric assignStackNodesPostOrder(ContextNode *Node, 103806c3fb27SDimitry Andric DenseSet<const ContextNode *> &Visited, 103906c3fb27SDimitry Andric DenseMap<uint64_t, std::vector<CallContextInfo>> 104006c3fb27SDimitry Andric &StackIdToMatchingCalls) { 104106c3fb27SDimitry Andric auto Inserted = Visited.insert(Node); 104206c3fb27SDimitry Andric if (!Inserted.second) 104306c3fb27SDimitry Andric return; 104406c3fb27SDimitry Andric // Post order traversal. Iterate over a copy since we may add nodes and 104506c3fb27SDimitry Andric // therefore new callers during the recursive call, invalidating any 104606c3fb27SDimitry Andric // iterator over the original edge vector. We don't need to process these 104706c3fb27SDimitry Andric // new nodes as they were already processed on creation. 104806c3fb27SDimitry Andric auto CallerEdges = Node->CallerEdges; 104906c3fb27SDimitry Andric for (auto &Edge : CallerEdges) { 105006c3fb27SDimitry Andric // Skip any that have been removed during the recursion. 105106c3fb27SDimitry Andric if (!Edge) 105206c3fb27SDimitry Andric continue; 105306c3fb27SDimitry Andric assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls); 105406c3fb27SDimitry Andric } 105506c3fb27SDimitry Andric 105606c3fb27SDimitry Andric // If this node's stack id is in the map, update the graph to contain new 105706c3fb27SDimitry Andric // nodes representing any inlining at interior callsites. Note we move the 105806c3fb27SDimitry Andric // associated context ids over to the new nodes. 105906c3fb27SDimitry Andric 106006c3fb27SDimitry Andric // Ignore this node if it is for an allocation or we didn't record any 106106c3fb27SDimitry Andric // stack id lists ending at it. 106206c3fb27SDimitry Andric if (Node->IsAllocation || 106306c3fb27SDimitry Andric !StackIdToMatchingCalls.count(Node->OrigStackOrAllocId)) 106406c3fb27SDimitry Andric return; 106506c3fb27SDimitry Andric 106606c3fb27SDimitry Andric auto &Calls = StackIdToMatchingCalls[Node->OrigStackOrAllocId]; 106706c3fb27SDimitry Andric // Handle the simple case first. A single call with a single stack id. 106806c3fb27SDimitry Andric // In this case there is no need to create any new context nodes, simply 106906c3fb27SDimitry Andric // assign the context node for stack id to this Call. 107006c3fb27SDimitry Andric if (Calls.size() == 1) { 107106c3fb27SDimitry Andric auto &[Call, Ids, Func, SavedContextIds] = Calls[0]; 107206c3fb27SDimitry Andric if (Ids.size() == 1) { 107306c3fb27SDimitry Andric assert(SavedContextIds.empty()); 107406c3fb27SDimitry Andric // It should be this Node 107506c3fb27SDimitry Andric assert(Node == getNodeForStackId(Ids[0])); 107606c3fb27SDimitry Andric if (Node->Recursive) 107706c3fb27SDimitry Andric return; 107806c3fb27SDimitry Andric Node->setCall(Call); 107906c3fb27SDimitry Andric NonAllocationCallToContextNodeMap[Call] = Node; 108006c3fb27SDimitry Andric NodeToCallingFunc[Node] = Func; 108106c3fb27SDimitry Andric return; 108206c3fb27SDimitry Andric } 108306c3fb27SDimitry Andric } 108406c3fb27SDimitry Andric 108506c3fb27SDimitry Andric // Find the node for the last stack id, which should be the same 108606c3fb27SDimitry Andric // across all calls recorded for this id, and is this node's id. 108706c3fb27SDimitry Andric uint64_t LastId = Node->OrigStackOrAllocId; 108806c3fb27SDimitry Andric ContextNode *LastNode = getNodeForStackId(LastId); 108906c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 109006c3fb27SDimitry Andric assert(LastNode); 109106c3fb27SDimitry Andric 109206c3fb27SDimitry Andric for (unsigned I = 0; I < Calls.size(); I++) { 109306c3fb27SDimitry Andric auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; 109406c3fb27SDimitry Andric // Skip any for which we didn't assign any ids, these don't get a node in 109506c3fb27SDimitry Andric // the graph. 109606c3fb27SDimitry Andric if (SavedContextIds.empty()) 109706c3fb27SDimitry Andric continue; 109806c3fb27SDimitry Andric 109906c3fb27SDimitry Andric assert(LastId == Ids.back()); 110006c3fb27SDimitry Andric 110106c3fb27SDimitry Andric ContextNode *FirstNode = getNodeForStackId(Ids[0]); 110206c3fb27SDimitry Andric assert(FirstNode); 110306c3fb27SDimitry Andric 110406c3fb27SDimitry Andric // Recompute the context ids for this stack id sequence (the 110506c3fb27SDimitry Andric // intersection of the context ids of the corresponding nodes). 110606c3fb27SDimitry Andric // Start with the ids we saved in the map for this call, which could be 110706c3fb27SDimitry Andric // duplicated context ids. We have to recompute as we might have overlap 110806c3fb27SDimitry Andric // overlap between the saved context ids for different last nodes, and 110906c3fb27SDimitry Andric // removed them already during the post order traversal. 111006c3fb27SDimitry Andric set_intersect(SavedContextIds, FirstNode->ContextIds); 111106c3fb27SDimitry Andric ContextNode *PrevNode = nullptr; 111206c3fb27SDimitry Andric for (auto Id : Ids) { 111306c3fb27SDimitry Andric ContextNode *CurNode = getNodeForStackId(Id); 111406c3fb27SDimitry Andric // We should only have kept stack ids that had nodes and weren't 111506c3fb27SDimitry Andric // recursive. 111606c3fb27SDimitry Andric assert(CurNode); 111706c3fb27SDimitry Andric assert(!CurNode->Recursive); 111806c3fb27SDimitry Andric if (!PrevNode) { 111906c3fb27SDimitry Andric PrevNode = CurNode; 112006c3fb27SDimitry Andric continue; 112106c3fb27SDimitry Andric } 112206c3fb27SDimitry Andric auto *Edge = CurNode->findEdgeFromCallee(PrevNode); 112306c3fb27SDimitry Andric if (!Edge) { 112406c3fb27SDimitry Andric SavedContextIds.clear(); 112506c3fb27SDimitry Andric break; 112606c3fb27SDimitry Andric } 112706c3fb27SDimitry Andric PrevNode = CurNode; 112806c3fb27SDimitry Andric set_intersect(SavedContextIds, Edge->getContextIds()); 112906c3fb27SDimitry Andric 113006c3fb27SDimitry Andric // If we now have no context ids for clone, skip this call. 113106c3fb27SDimitry Andric if (SavedContextIds.empty()) 113206c3fb27SDimitry Andric break; 113306c3fb27SDimitry Andric } 113406c3fb27SDimitry Andric if (SavedContextIds.empty()) 113506c3fb27SDimitry Andric continue; 113606c3fb27SDimitry Andric 113706c3fb27SDimitry Andric // Create new context node. 113806c3fb27SDimitry Andric NodeOwner.push_back( 113906c3fb27SDimitry Andric std::make_unique<ContextNode>(/*IsAllocation=*/false, Call)); 114006c3fb27SDimitry Andric ContextNode *NewNode = NodeOwner.back().get(); 114106c3fb27SDimitry Andric NodeToCallingFunc[NewNode] = Func; 114206c3fb27SDimitry Andric NonAllocationCallToContextNodeMap[Call] = NewNode; 114306c3fb27SDimitry Andric NewNode->ContextIds = SavedContextIds; 114406c3fb27SDimitry Andric NewNode->AllocTypes = computeAllocType(NewNode->ContextIds); 114506c3fb27SDimitry Andric 114606c3fb27SDimitry Andric // Connect to callees of innermost stack frame in inlined call chain. 114706c3fb27SDimitry Andric // This updates context ids for FirstNode's callee's to reflect those 114806c3fb27SDimitry Andric // moved to NewNode. 114906c3fb27SDimitry Andric connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true); 115006c3fb27SDimitry Andric 115106c3fb27SDimitry Andric // Connect to callers of outermost stack frame in inlined call chain. 115206c3fb27SDimitry Andric // This updates context ids for FirstNode's caller's to reflect those 115306c3fb27SDimitry Andric // moved to NewNode. 115406c3fb27SDimitry Andric connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false); 115506c3fb27SDimitry Andric 115606c3fb27SDimitry Andric // Now we need to remove context ids from edges/nodes between First and 115706c3fb27SDimitry Andric // Last Node. 115806c3fb27SDimitry Andric PrevNode = nullptr; 115906c3fb27SDimitry Andric for (auto Id : Ids) { 116006c3fb27SDimitry Andric ContextNode *CurNode = getNodeForStackId(Id); 116106c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 116206c3fb27SDimitry Andric assert(CurNode); 116306c3fb27SDimitry Andric 116406c3fb27SDimitry Andric // Remove the context ids moved to NewNode from CurNode, and the 116506c3fb27SDimitry Andric // edge from the prior node. 116606c3fb27SDimitry Andric set_subtract(CurNode->ContextIds, NewNode->ContextIds); 116706c3fb27SDimitry Andric if (PrevNode) { 116806c3fb27SDimitry Andric auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode); 116906c3fb27SDimitry Andric assert(PrevEdge); 117006c3fb27SDimitry Andric set_subtract(PrevEdge->getContextIds(), NewNode->ContextIds); 117106c3fb27SDimitry Andric if (PrevEdge->getContextIds().empty()) { 117206c3fb27SDimitry Andric PrevNode->eraseCallerEdge(PrevEdge); 117306c3fb27SDimitry Andric CurNode->eraseCalleeEdge(PrevEdge); 117406c3fb27SDimitry Andric } 117506c3fb27SDimitry Andric } 117606c3fb27SDimitry Andric PrevNode = CurNode; 117706c3fb27SDimitry Andric } 117806c3fb27SDimitry Andric } 117906c3fb27SDimitry Andric } 118006c3fb27SDimitry Andric 118106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 118206c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() { 118306c3fb27SDimitry Andric // Map of stack id to all calls with that as the last (outermost caller) 118406c3fb27SDimitry Andric // callsite id that has a context node (some might not due to pruning 118506c3fb27SDimitry Andric // performed during matching of the allocation profile contexts). 118606c3fb27SDimitry Andric // The CallContextInfo contains the Call and a list of its stack ids with 118706c3fb27SDimitry Andric // ContextNodes, the function containing Call, and the set of context ids 118806c3fb27SDimitry Andric // the analysis will eventually identify for use in any new node created 118906c3fb27SDimitry Andric // for that callsite. 119006c3fb27SDimitry Andric DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls; 119106c3fb27SDimitry Andric for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) { 119206c3fb27SDimitry Andric for (auto &Call : CallsWithMetadata) { 119306c3fb27SDimitry Andric // Ignore allocations, already handled. 119406c3fb27SDimitry Andric if (AllocationCallToContextNodeMap.count(Call)) 119506c3fb27SDimitry Andric continue; 119606c3fb27SDimitry Andric auto StackIdsWithContextNodes = 119706c3fb27SDimitry Andric getStackIdsWithContextNodesForCall(Call.call()); 119806c3fb27SDimitry Andric // If there were no nodes created for MIBs on allocs (maybe this was in 119906c3fb27SDimitry Andric // the unambiguous part of the MIB stack that was pruned), ignore. 120006c3fb27SDimitry Andric if (StackIdsWithContextNodes.empty()) 120106c3fb27SDimitry Andric continue; 120206c3fb27SDimitry Andric // Otherwise, record this Call along with the list of ids for the last 120306c3fb27SDimitry Andric // (outermost caller) stack id with a node. 120406c3fb27SDimitry Andric StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back( 120506c3fb27SDimitry Andric {Call.call(), StackIdsWithContextNodes, Func, {}}); 120606c3fb27SDimitry Andric } 120706c3fb27SDimitry Andric } 120806c3fb27SDimitry Andric 120906c3fb27SDimitry Andric // First make a pass through all stack ids that correspond to a call, 121006c3fb27SDimitry Andric // as identified in the above loop. Compute the context ids corresponding to 121106c3fb27SDimitry Andric // each of these calls when they correspond to multiple stack ids due to 121206c3fb27SDimitry Andric // due to inlining. Perform any duplication of context ids required when 121306c3fb27SDimitry Andric // there is more than one call with the same stack ids. Their (possibly newly 121406c3fb27SDimitry Andric // duplicated) context ids are saved in the StackIdToMatchingCalls map. 121506c3fb27SDimitry Andric DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds; 121606c3fb27SDimitry Andric for (auto &It : StackIdToMatchingCalls) { 121706c3fb27SDimitry Andric auto &Calls = It.getSecond(); 121806c3fb27SDimitry Andric // Skip single calls with a single stack id. These don't need a new node. 121906c3fb27SDimitry Andric if (Calls.size() == 1) { 122006c3fb27SDimitry Andric auto &Ids = std::get<1>(Calls[0]); 122106c3fb27SDimitry Andric if (Ids.size() == 1) 122206c3fb27SDimitry Andric continue; 122306c3fb27SDimitry Andric } 122406c3fb27SDimitry Andric // In order to do the best and maximal matching of inlined calls to context 122506c3fb27SDimitry Andric // node sequences we will sort the vectors of stack ids in descending order 122606c3fb27SDimitry Andric // of length, and within each length, lexicographically by stack id. The 122706c3fb27SDimitry Andric // latter is so that we can specially handle calls that have identical stack 122806c3fb27SDimitry Andric // id sequences (either due to cloning or artificially because of the MIB 122906c3fb27SDimitry Andric // context pruning). 123006c3fb27SDimitry Andric std::stable_sort(Calls.begin(), Calls.end(), 123106c3fb27SDimitry Andric [](const CallContextInfo &A, const CallContextInfo &B) { 123206c3fb27SDimitry Andric auto &IdsA = std::get<1>(A); 123306c3fb27SDimitry Andric auto &IdsB = std::get<1>(B); 123406c3fb27SDimitry Andric return IdsA.size() > IdsB.size() || 123506c3fb27SDimitry Andric (IdsA.size() == IdsB.size() && IdsA < IdsB); 123606c3fb27SDimitry Andric }); 123706c3fb27SDimitry Andric 123806c3fb27SDimitry Andric // Find the node for the last stack id, which should be the same 123906c3fb27SDimitry Andric // across all calls recorded for this id, and is the id for this 124006c3fb27SDimitry Andric // entry in the StackIdToMatchingCalls map. 124106c3fb27SDimitry Andric uint64_t LastId = It.getFirst(); 124206c3fb27SDimitry Andric ContextNode *LastNode = getNodeForStackId(LastId); 124306c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 124406c3fb27SDimitry Andric assert(LastNode); 124506c3fb27SDimitry Andric 124606c3fb27SDimitry Andric if (LastNode->Recursive) 124706c3fb27SDimitry Andric continue; 124806c3fb27SDimitry Andric 124906c3fb27SDimitry Andric // Initialize the context ids with the last node's. We will subsequently 125006c3fb27SDimitry Andric // refine the context ids by computing the intersection along all edges. 125106c3fb27SDimitry Andric DenseSet<uint32_t> LastNodeContextIds = LastNode->ContextIds; 125206c3fb27SDimitry Andric assert(!LastNodeContextIds.empty()); 125306c3fb27SDimitry Andric 125406c3fb27SDimitry Andric for (unsigned I = 0; I < Calls.size(); I++) { 125506c3fb27SDimitry Andric auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; 125606c3fb27SDimitry Andric assert(SavedContextIds.empty()); 125706c3fb27SDimitry Andric assert(LastId == Ids.back()); 125806c3fb27SDimitry Andric 125906c3fb27SDimitry Andric // First compute the context ids for this stack id sequence (the 126006c3fb27SDimitry Andric // intersection of the context ids of the corresponding nodes). 126106c3fb27SDimitry Andric // Start with the remaining saved ids for the last node. 126206c3fb27SDimitry Andric assert(!LastNodeContextIds.empty()); 126306c3fb27SDimitry Andric DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds; 126406c3fb27SDimitry Andric 126506c3fb27SDimitry Andric ContextNode *PrevNode = LastNode; 126606c3fb27SDimitry Andric ContextNode *CurNode = LastNode; 126706c3fb27SDimitry Andric bool Skip = false; 126806c3fb27SDimitry Andric 126906c3fb27SDimitry Andric // Iterate backwards through the stack Ids, starting after the last Id 127006c3fb27SDimitry Andric // in the list, which was handled once outside for all Calls. 127106c3fb27SDimitry Andric for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) { 127206c3fb27SDimitry Andric auto Id = *IdIter; 127306c3fb27SDimitry Andric CurNode = getNodeForStackId(Id); 127406c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 127506c3fb27SDimitry Andric assert(CurNode); 127606c3fb27SDimitry Andric 127706c3fb27SDimitry Andric if (CurNode->Recursive) { 127806c3fb27SDimitry Andric Skip = true; 127906c3fb27SDimitry Andric break; 128006c3fb27SDimitry Andric } 128106c3fb27SDimitry Andric 128206c3fb27SDimitry Andric auto *Edge = CurNode->findEdgeFromCaller(PrevNode); 128306c3fb27SDimitry Andric // If there is no edge then the nodes belong to different MIB contexts, 128406c3fb27SDimitry Andric // and we should skip this inlined context sequence. For example, this 128506c3fb27SDimitry Andric // particular inlined context may include stack ids A->B, and we may 128606c3fb27SDimitry Andric // indeed have nodes for both A and B, but it is possible that they were 128706c3fb27SDimitry Andric // never profiled in sequence in a single MIB for any allocation (i.e. 128806c3fb27SDimitry Andric // we might have profiled an allocation that involves the callsite A, 128906c3fb27SDimitry Andric // but through a different one of its callee callsites, and we might 129006c3fb27SDimitry Andric // have profiled an allocation that involves callsite B, but reached 129106c3fb27SDimitry Andric // from a different caller callsite). 129206c3fb27SDimitry Andric if (!Edge) { 129306c3fb27SDimitry Andric Skip = true; 129406c3fb27SDimitry Andric break; 129506c3fb27SDimitry Andric } 129606c3fb27SDimitry Andric PrevNode = CurNode; 129706c3fb27SDimitry Andric 129806c3fb27SDimitry Andric // Update the context ids, which is the intersection of the ids along 129906c3fb27SDimitry Andric // all edges in the sequence. 130006c3fb27SDimitry Andric set_intersect(StackSequenceContextIds, Edge->getContextIds()); 130106c3fb27SDimitry Andric 130206c3fb27SDimitry Andric // If we now have no context ids for clone, skip this call. 130306c3fb27SDimitry Andric if (StackSequenceContextIds.empty()) { 130406c3fb27SDimitry Andric Skip = true; 130506c3fb27SDimitry Andric break; 130606c3fb27SDimitry Andric } 130706c3fb27SDimitry Andric } 130806c3fb27SDimitry Andric if (Skip) 130906c3fb27SDimitry Andric continue; 131006c3fb27SDimitry Andric 131106c3fb27SDimitry Andric // If some of this call's stack ids did not have corresponding nodes (due 131206c3fb27SDimitry Andric // to pruning), don't include any context ids for contexts that extend 131306c3fb27SDimitry Andric // beyond these nodes. Otherwise we would be matching part of unrelated / 131406c3fb27SDimitry Andric // not fully matching stack contexts. To do this, subtract any context ids 131506c3fb27SDimitry Andric // found in caller nodes of the last node found above. 131606c3fb27SDimitry Andric if (Ids.back() != getLastStackId(Call)) { 131706c3fb27SDimitry Andric for (const auto &PE : CurNode->CallerEdges) { 131806c3fb27SDimitry Andric set_subtract(StackSequenceContextIds, PE->getContextIds()); 131906c3fb27SDimitry Andric if (StackSequenceContextIds.empty()) 132006c3fb27SDimitry Andric break; 132106c3fb27SDimitry Andric } 132206c3fb27SDimitry Andric // If we now have no context ids for clone, skip this call. 132306c3fb27SDimitry Andric if (StackSequenceContextIds.empty()) 132406c3fb27SDimitry Andric continue; 132506c3fb27SDimitry Andric } 132606c3fb27SDimitry Andric 132706c3fb27SDimitry Andric // Check if the next set of stack ids is the same (since the Calls vector 132806c3fb27SDimitry Andric // of tuples is sorted by the stack ids we can just look at the next one). 132906c3fb27SDimitry Andric bool DuplicateContextIds = false; 133006c3fb27SDimitry Andric if (I + 1 < Calls.size()) { 133106c3fb27SDimitry Andric auto NextIds = std::get<1>(Calls[I + 1]); 133206c3fb27SDimitry Andric DuplicateContextIds = Ids == NextIds; 133306c3fb27SDimitry Andric } 133406c3fb27SDimitry Andric 133506c3fb27SDimitry Andric // If we don't have duplicate context ids, then we can assign all the 133606c3fb27SDimitry Andric // context ids computed for the original node sequence to this call. 133706c3fb27SDimitry Andric // If there are duplicate calls with the same stack ids then we synthesize 133806c3fb27SDimitry Andric // new context ids that are duplicates of the originals. These are 133906c3fb27SDimitry Andric // assigned to SavedContextIds, which is a reference into the map entry 134006c3fb27SDimitry Andric // for this call, allowing us to access these ids later on. 134106c3fb27SDimitry Andric OldToNewContextIds.reserve(OldToNewContextIds.size() + 134206c3fb27SDimitry Andric StackSequenceContextIds.size()); 134306c3fb27SDimitry Andric SavedContextIds = 134406c3fb27SDimitry Andric DuplicateContextIds 134506c3fb27SDimitry Andric ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds) 134606c3fb27SDimitry Andric : StackSequenceContextIds; 134706c3fb27SDimitry Andric assert(!SavedContextIds.empty()); 134806c3fb27SDimitry Andric 134906c3fb27SDimitry Andric if (!DuplicateContextIds) { 135006c3fb27SDimitry Andric // Update saved last node's context ids to remove those that are 135106c3fb27SDimitry Andric // assigned to other calls, so that it is ready for the next call at 135206c3fb27SDimitry Andric // this stack id. 135306c3fb27SDimitry Andric set_subtract(LastNodeContextIds, StackSequenceContextIds); 135406c3fb27SDimitry Andric if (LastNodeContextIds.empty()) 135506c3fb27SDimitry Andric break; 135606c3fb27SDimitry Andric } 135706c3fb27SDimitry Andric } 135806c3fb27SDimitry Andric } 135906c3fb27SDimitry Andric 136006c3fb27SDimitry Andric // Propagate the duplicate context ids over the graph. 136106c3fb27SDimitry Andric propagateDuplicateContextIds(OldToNewContextIds); 136206c3fb27SDimitry Andric 136306c3fb27SDimitry Andric if (VerifyCCG) 136406c3fb27SDimitry Andric check(); 136506c3fb27SDimitry Andric 136606c3fb27SDimitry Andric // Now perform a post-order traversal over the graph, starting with the 136706c3fb27SDimitry Andric // allocation nodes, essentially processing nodes from callers to callees. 136806c3fb27SDimitry Andric // For any that contains an id in the map, update the graph to contain new 136906c3fb27SDimitry Andric // nodes representing any inlining at interior callsites. Note we move the 137006c3fb27SDimitry Andric // associated context ids over to the new nodes. 137106c3fb27SDimitry Andric DenseSet<const ContextNode *> Visited; 137206c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) 137306c3fb27SDimitry Andric assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls); 137406c3fb27SDimitry Andric } 137506c3fb27SDimitry Andric 137606c3fb27SDimitry Andric uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) { 137706c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext( 137806c3fb27SDimitry Andric Call->getMetadata(LLVMContext::MD_callsite)); 137906c3fb27SDimitry Andric return CallsiteContext.back(); 138006c3fb27SDimitry Andric } 138106c3fb27SDimitry Andric 138206c3fb27SDimitry Andric uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) { 138306c3fb27SDimitry Andric assert(isa<CallsiteInfo *>(Call.getBase())); 138406c3fb27SDimitry Andric CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator> 138506c3fb27SDimitry Andric CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase())); 138606c3fb27SDimitry Andric // Need to convert index into stack id. 138706c3fb27SDimitry Andric return Index.getStackIdAtIndex(CallsiteContext.back()); 138806c3fb27SDimitry Andric } 138906c3fb27SDimitry Andric 139006c3fb27SDimitry Andric static const std::string MemProfCloneSuffix = ".memprof."; 139106c3fb27SDimitry Andric 139206c3fb27SDimitry Andric static std::string getMemProfFuncName(Twine Base, unsigned CloneNo) { 139306c3fb27SDimitry Andric // We use CloneNo == 0 to refer to the original version, which doesn't get 139406c3fb27SDimitry Andric // renamed with a suffix. 139506c3fb27SDimitry Andric if (!CloneNo) 139606c3fb27SDimitry Andric return Base.str(); 139706c3fb27SDimitry Andric return (Base + MemProfCloneSuffix + Twine(CloneNo)).str(); 139806c3fb27SDimitry Andric } 139906c3fb27SDimitry Andric 140006c3fb27SDimitry Andric std::string ModuleCallsiteContextGraph::getLabel(const Function *Func, 140106c3fb27SDimitry Andric const Instruction *Call, 140206c3fb27SDimitry Andric unsigned CloneNo) const { 140306c3fb27SDimitry Andric return (Twine(Call->getFunction()->getName()) + " -> " + 140406c3fb27SDimitry Andric cast<CallBase>(Call)->getCalledFunction()->getName()) 140506c3fb27SDimitry Andric .str(); 140606c3fb27SDimitry Andric } 140706c3fb27SDimitry Andric 140806c3fb27SDimitry Andric std::string IndexCallsiteContextGraph::getLabel(const FunctionSummary *Func, 140906c3fb27SDimitry Andric const IndexCall &Call, 141006c3fb27SDimitry Andric unsigned CloneNo) const { 141106c3fb27SDimitry Andric auto VI = FSToVIMap.find(Func); 141206c3fb27SDimitry Andric assert(VI != FSToVIMap.end()); 141306c3fb27SDimitry Andric if (isa<AllocInfo *>(Call.getBase())) 141406c3fb27SDimitry Andric return (VI->second.name() + " -> alloc").str(); 141506c3fb27SDimitry Andric else { 141606c3fb27SDimitry Andric auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(Call.getBase()); 141706c3fb27SDimitry Andric return (VI->second.name() + " -> " + 141806c3fb27SDimitry Andric getMemProfFuncName(Callsite->Callee.name(), 141906c3fb27SDimitry Andric Callsite->Clones[CloneNo])) 142006c3fb27SDimitry Andric .str(); 142106c3fb27SDimitry Andric } 142206c3fb27SDimitry Andric } 142306c3fb27SDimitry Andric 142406c3fb27SDimitry Andric std::vector<uint64_t> 142506c3fb27SDimitry Andric ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall( 142606c3fb27SDimitry Andric Instruction *Call) { 142706c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext( 142806c3fb27SDimitry Andric Call->getMetadata(LLVMContext::MD_callsite)); 142906c3fb27SDimitry Andric return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>( 143006c3fb27SDimitry Andric CallsiteContext); 143106c3fb27SDimitry Andric } 143206c3fb27SDimitry Andric 143306c3fb27SDimitry Andric std::vector<uint64_t> 143406c3fb27SDimitry Andric IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) { 143506c3fb27SDimitry Andric assert(isa<CallsiteInfo *>(Call.getBase())); 143606c3fb27SDimitry Andric CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator> 143706c3fb27SDimitry Andric CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase())); 143806c3fb27SDimitry Andric return getStackIdsWithContextNodes<CallsiteInfo, 143906c3fb27SDimitry Andric SmallVector<unsigned>::const_iterator>( 144006c3fb27SDimitry Andric CallsiteContext); 144106c3fb27SDimitry Andric } 144206c3fb27SDimitry Andric 144306c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 144406c3fb27SDimitry Andric template <class NodeT, class IteratorT> 144506c3fb27SDimitry Andric std::vector<uint64_t> 144606c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes( 144706c3fb27SDimitry Andric CallStack<NodeT, IteratorT> &CallsiteContext) { 144806c3fb27SDimitry Andric std::vector<uint64_t> StackIds; 144906c3fb27SDimitry Andric for (auto IdOrIndex : CallsiteContext) { 145006c3fb27SDimitry Andric auto StackId = getStackId(IdOrIndex); 145106c3fb27SDimitry Andric ContextNode *Node = getNodeForStackId(StackId); 145206c3fb27SDimitry Andric if (!Node) 145306c3fb27SDimitry Andric break; 145406c3fb27SDimitry Andric StackIds.push_back(StackId); 145506c3fb27SDimitry Andric } 145606c3fb27SDimitry Andric return StackIds; 145706c3fb27SDimitry Andric } 145806c3fb27SDimitry Andric 145906c3fb27SDimitry Andric ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( 146006c3fb27SDimitry Andric Module &M, function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) 146106c3fb27SDimitry Andric : Mod(M), OREGetter(OREGetter) { 146206c3fb27SDimitry Andric for (auto &F : M) { 146306c3fb27SDimitry Andric std::vector<CallInfo> CallsWithMetadata; 146406c3fb27SDimitry Andric for (auto &BB : F) { 146506c3fb27SDimitry Andric for (auto &I : BB) { 146606c3fb27SDimitry Andric if (!isa<CallBase>(I)) 146706c3fb27SDimitry Andric continue; 146806c3fb27SDimitry Andric if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) { 146906c3fb27SDimitry Andric CallsWithMetadata.push_back(&I); 147006c3fb27SDimitry Andric auto *AllocNode = addAllocNode(&I, &F); 147106c3fb27SDimitry Andric auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite); 147206c3fb27SDimitry Andric assert(CallsiteMD); 147306c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext(CallsiteMD); 147406c3fb27SDimitry Andric // Add all of the MIBs and their stack nodes. 147506c3fb27SDimitry Andric for (auto &MDOp : MemProfMD->operands()) { 147606c3fb27SDimitry Andric auto *MIBMD = cast<const MDNode>(MDOp); 147706c3fb27SDimitry Andric MDNode *StackNode = getMIBStackNode(MIBMD); 147806c3fb27SDimitry Andric assert(StackNode); 147906c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode); 148006c3fb27SDimitry Andric addStackNodesForMIB<MDNode, MDNode::op_iterator>( 148106c3fb27SDimitry Andric AllocNode, StackContext, CallsiteContext, 148206c3fb27SDimitry Andric getMIBAllocType(MIBMD)); 148306c3fb27SDimitry Andric } 148406c3fb27SDimitry Andric assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); 148506c3fb27SDimitry Andric // Memprof and callsite metadata on memory allocations no longer 148606c3fb27SDimitry Andric // needed. 148706c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_memprof, nullptr); 148806c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_callsite, nullptr); 148906c3fb27SDimitry Andric } 149006c3fb27SDimitry Andric // For callsite metadata, add to list for this function for later use. 149106c3fb27SDimitry Andric else if (I.getMetadata(LLVMContext::MD_callsite)) 149206c3fb27SDimitry Andric CallsWithMetadata.push_back(&I); 149306c3fb27SDimitry Andric } 149406c3fb27SDimitry Andric } 149506c3fb27SDimitry Andric if (!CallsWithMetadata.empty()) 149606c3fb27SDimitry Andric FuncToCallsWithMetadata.push_back({&F, CallsWithMetadata}); 149706c3fb27SDimitry Andric } 149806c3fb27SDimitry Andric 149906c3fb27SDimitry Andric if (DumpCCG) { 150006c3fb27SDimitry Andric dbgs() << "CCG before updating call stack chains:\n"; 150106c3fb27SDimitry Andric dbgs() << *this; 150206c3fb27SDimitry Andric } 150306c3fb27SDimitry Andric 150406c3fb27SDimitry Andric if (ExportToDot) 150506c3fb27SDimitry Andric exportToDot("prestackupdate"); 150606c3fb27SDimitry Andric 150706c3fb27SDimitry Andric updateStackNodes(); 150806c3fb27SDimitry Andric 150906c3fb27SDimitry Andric handleCallsitesWithMultipleTargets(); 151006c3fb27SDimitry Andric 151106c3fb27SDimitry Andric // Strip off remaining callsite metadata, no longer needed. 151206c3fb27SDimitry Andric for (auto &FuncEntry : FuncToCallsWithMetadata) 151306c3fb27SDimitry Andric for (auto &Call : FuncEntry.second) 151406c3fb27SDimitry Andric Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr); 151506c3fb27SDimitry Andric } 151606c3fb27SDimitry Andric 151706c3fb27SDimitry Andric IndexCallsiteContextGraph::IndexCallsiteContextGraph( 151806c3fb27SDimitry Andric ModuleSummaryIndex &Index, 151906c3fb27SDimitry Andric function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 152006c3fb27SDimitry Andric isPrevailing) 152106c3fb27SDimitry Andric : Index(Index) { 152206c3fb27SDimitry Andric for (auto &I : Index) { 152306c3fb27SDimitry Andric auto VI = Index.getValueInfo(I); 152406c3fb27SDimitry Andric for (auto &S : VI.getSummaryList()) { 152506c3fb27SDimitry Andric // We should only add the prevailing nodes. Otherwise we may try to clone 152606c3fb27SDimitry Andric // in a weak copy that won't be linked (and may be different than the 152706c3fb27SDimitry Andric // prevailing version). 152806c3fb27SDimitry Andric // We only keep the memprof summary on the prevailing copy now when 152906c3fb27SDimitry Andric // building the combined index, as a space optimization, however don't 153006c3fb27SDimitry Andric // rely on this optimization. The linker doesn't resolve local linkage 153106c3fb27SDimitry Andric // values so don't check whether those are prevailing. 153206c3fb27SDimitry Andric if (!GlobalValue::isLocalLinkage(S->linkage()) && 153306c3fb27SDimitry Andric !isPrevailing(VI.getGUID(), S.get())) 153406c3fb27SDimitry Andric continue; 153506c3fb27SDimitry Andric auto *FS = dyn_cast<FunctionSummary>(S.get()); 153606c3fb27SDimitry Andric if (!FS) 153706c3fb27SDimitry Andric continue; 153806c3fb27SDimitry Andric std::vector<CallInfo> CallsWithMetadata; 153906c3fb27SDimitry Andric if (!FS->allocs().empty()) { 154006c3fb27SDimitry Andric for (auto &AN : FS->mutableAllocs()) { 154106c3fb27SDimitry Andric // This can happen because of recursion elimination handling that 154206c3fb27SDimitry Andric // currently exists in ModuleSummaryAnalysis. Skip these for now. 154306c3fb27SDimitry Andric // We still added them to the summary because we need to be able to 154406c3fb27SDimitry Andric // correlate properly in applyImport in the backends. 154506c3fb27SDimitry Andric if (AN.MIBs.empty()) 154606c3fb27SDimitry Andric continue; 154706c3fb27SDimitry Andric CallsWithMetadata.push_back({&AN}); 154806c3fb27SDimitry Andric auto *AllocNode = addAllocNode({&AN}, FS); 154906c3fb27SDimitry Andric // Pass an empty CallStack to the CallsiteContext (second) 155006c3fb27SDimitry Andric // parameter, since for ThinLTO we already collapsed out the inlined 155106c3fb27SDimitry Andric // stack ids on the allocation call during ModuleSummaryAnalysis. 155206c3fb27SDimitry Andric CallStack<MIBInfo, SmallVector<unsigned>::const_iterator> 155306c3fb27SDimitry Andric EmptyContext; 155406c3fb27SDimitry Andric // Now add all of the MIBs and their stack nodes. 155506c3fb27SDimitry Andric for (auto &MIB : AN.MIBs) { 155606c3fb27SDimitry Andric CallStack<MIBInfo, SmallVector<unsigned>::const_iterator> 155706c3fb27SDimitry Andric StackContext(&MIB); 155806c3fb27SDimitry Andric addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>( 155906c3fb27SDimitry Andric AllocNode, StackContext, EmptyContext, MIB.AllocType); 156006c3fb27SDimitry Andric } 156106c3fb27SDimitry Andric assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); 156206c3fb27SDimitry Andric // Initialize version 0 on the summary alloc node to the current alloc 156306c3fb27SDimitry Andric // type, unless it has both types in which case make it default, so 156406c3fb27SDimitry Andric // that in the case where we aren't able to clone the original version 156506c3fb27SDimitry Andric // always ends up with the default allocation behavior. 156606c3fb27SDimitry Andric AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes); 156706c3fb27SDimitry Andric } 156806c3fb27SDimitry Andric } 156906c3fb27SDimitry Andric // For callsite metadata, add to list for this function for later use. 157006c3fb27SDimitry Andric if (!FS->callsites().empty()) 157106c3fb27SDimitry Andric for (auto &SN : FS->mutableCallsites()) 157206c3fb27SDimitry Andric CallsWithMetadata.push_back({&SN}); 157306c3fb27SDimitry Andric 157406c3fb27SDimitry Andric if (!CallsWithMetadata.empty()) 157506c3fb27SDimitry Andric FuncToCallsWithMetadata.push_back({FS, CallsWithMetadata}); 157606c3fb27SDimitry Andric 157706c3fb27SDimitry Andric if (!FS->allocs().empty() || !FS->callsites().empty()) 157806c3fb27SDimitry Andric FSToVIMap[FS] = VI; 157906c3fb27SDimitry Andric } 158006c3fb27SDimitry Andric } 158106c3fb27SDimitry Andric 158206c3fb27SDimitry Andric if (DumpCCG) { 158306c3fb27SDimitry Andric dbgs() << "CCG before updating call stack chains:\n"; 158406c3fb27SDimitry Andric dbgs() << *this; 158506c3fb27SDimitry Andric } 158606c3fb27SDimitry Andric 158706c3fb27SDimitry Andric if (ExportToDot) 158806c3fb27SDimitry Andric exportToDot("prestackupdate"); 158906c3fb27SDimitry Andric 159006c3fb27SDimitry Andric updateStackNodes(); 159106c3fb27SDimitry Andric 159206c3fb27SDimitry Andric handleCallsitesWithMultipleTargets(); 159306c3fb27SDimitry Andric } 159406c3fb27SDimitry Andric 159506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 159606c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, 159706c3fb27SDimitry Andric CallTy>::handleCallsitesWithMultipleTargets() { 159806c3fb27SDimitry Andric // Look for and workaround callsites that call multiple functions. 159906c3fb27SDimitry Andric // This can happen for indirect calls, which needs better handling, and in 160006c3fb27SDimitry Andric // more rare cases (e.g. macro expansion). 160106c3fb27SDimitry Andric // TODO: To fix this for indirect calls we will want to perform speculative 160206c3fb27SDimitry Andric // devirtualization using either the normal PGO info with ICP, or using the 160306c3fb27SDimitry Andric // information in the profiled MemProf contexts. We can do this prior to 160406c3fb27SDimitry Andric // this transformation for regular LTO, and for ThinLTO we can simulate that 160506c3fb27SDimitry Andric // effect in the summary and perform the actual speculative devirtualization 160606c3fb27SDimitry Andric // while cloning in the ThinLTO backend. 160706c3fb27SDimitry Andric for (auto Entry = NonAllocationCallToContextNodeMap.begin(); 160806c3fb27SDimitry Andric Entry != NonAllocationCallToContextNodeMap.end();) { 160906c3fb27SDimitry Andric auto *Node = Entry->second; 161006c3fb27SDimitry Andric assert(Node->Clones.empty()); 161106c3fb27SDimitry Andric // Check all node callees and see if in the same function. 161206c3fb27SDimitry Andric bool Removed = false; 161306c3fb27SDimitry Andric auto Call = Node->Call.call(); 161406c3fb27SDimitry Andric for (auto &Edge : Node->CalleeEdges) { 161506c3fb27SDimitry Andric if (!Edge->Callee->hasCall()) 161606c3fb27SDimitry Andric continue; 161706c3fb27SDimitry Andric assert(NodeToCallingFunc.count(Edge->Callee)); 161806c3fb27SDimitry Andric // Check if the called function matches that of the callee node. 161906c3fb27SDimitry Andric if (calleeMatchesFunc(Call, NodeToCallingFunc[Edge->Callee])) 162006c3fb27SDimitry Andric continue; 162106c3fb27SDimitry Andric // Work around by setting Node to have a null call, so it gets 162206c3fb27SDimitry Andric // skipped during cloning. Otherwise assignFunctions will assert 162306c3fb27SDimitry Andric // because its data structures are not designed to handle this case. 162406c3fb27SDimitry Andric Entry = NonAllocationCallToContextNodeMap.erase(Entry); 162506c3fb27SDimitry Andric Node->setCall(CallInfo()); 162606c3fb27SDimitry Andric Removed = true; 162706c3fb27SDimitry Andric break; 162806c3fb27SDimitry Andric } 162906c3fb27SDimitry Andric if (!Removed) 163006c3fb27SDimitry Andric Entry++; 163106c3fb27SDimitry Andric } 163206c3fb27SDimitry Andric } 163306c3fb27SDimitry Andric 163406c3fb27SDimitry Andric uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { 163506c3fb27SDimitry Andric // In the Module (IR) case this is already the Id. 163606c3fb27SDimitry Andric return IdOrIndex; 163706c3fb27SDimitry Andric } 163806c3fb27SDimitry Andric 163906c3fb27SDimitry Andric uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { 164006c3fb27SDimitry Andric // In the Index case this is an index into the stack id list in the summary 164106c3fb27SDimitry Andric // index, convert it to an Id. 164206c3fb27SDimitry Andric return Index.getStackIdAtIndex(IdOrIndex); 164306c3fb27SDimitry Andric } 164406c3fb27SDimitry Andric 164506c3fb27SDimitry Andric bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call, 164606c3fb27SDimitry Andric const Function *Func) { 164706c3fb27SDimitry Andric auto *CB = dyn_cast<CallBase>(Call); 164806c3fb27SDimitry Andric if (!CB->getCalledOperand()) 164906c3fb27SDimitry Andric return false; 165006c3fb27SDimitry Andric auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts(); 165106c3fb27SDimitry Andric auto *CalleeFunc = dyn_cast<Function>(CalleeVal); 165206c3fb27SDimitry Andric if (CalleeFunc == Func) 165306c3fb27SDimitry Andric return true; 165406c3fb27SDimitry Andric auto *Alias = dyn_cast<GlobalAlias>(CalleeVal); 165506c3fb27SDimitry Andric return Alias && Alias->getAliasee() == Func; 165606c3fb27SDimitry Andric } 165706c3fb27SDimitry Andric 165806c3fb27SDimitry Andric bool IndexCallsiteContextGraph::calleeMatchesFunc(IndexCall &Call, 165906c3fb27SDimitry Andric const FunctionSummary *Func) { 166006c3fb27SDimitry Andric ValueInfo Callee = 166106c3fb27SDimitry Andric dyn_cast_if_present<CallsiteInfo *>(Call.getBase())->Callee; 166206c3fb27SDimitry Andric // If there is no summary list then this is a call to an externally defined 166306c3fb27SDimitry Andric // symbol. 166406c3fb27SDimitry Andric AliasSummary *Alias = 166506c3fb27SDimitry Andric Callee.getSummaryList().empty() 166606c3fb27SDimitry Andric ? nullptr 166706c3fb27SDimitry Andric : dyn_cast<AliasSummary>(Callee.getSummaryList()[0].get()); 166806c3fb27SDimitry Andric assert(FSToVIMap.count(Func)); 166906c3fb27SDimitry Andric return Callee == FSToVIMap[Func] || 167006c3fb27SDimitry Andric // If callee is an alias, check the aliasee, since only function 167106c3fb27SDimitry Andric // summary base objects will contain the stack node summaries and thus 167206c3fb27SDimitry Andric // get a context node. 167306c3fb27SDimitry Andric (Alias && Alias->getAliaseeVI() == FSToVIMap[Func]); 167406c3fb27SDimitry Andric } 167506c3fb27SDimitry Andric 167606c3fb27SDimitry Andric static std::string getAllocTypeString(uint8_t AllocTypes) { 167706c3fb27SDimitry Andric if (!AllocTypes) 167806c3fb27SDimitry Andric return "None"; 167906c3fb27SDimitry Andric std::string Str; 168006c3fb27SDimitry Andric if (AllocTypes & (uint8_t)AllocationType::NotCold) 168106c3fb27SDimitry Andric Str += "NotCold"; 168206c3fb27SDimitry Andric if (AllocTypes & (uint8_t)AllocationType::Cold) 168306c3fb27SDimitry Andric Str += "Cold"; 168406c3fb27SDimitry Andric return Str; 168506c3fb27SDimitry Andric } 168606c3fb27SDimitry Andric 168706c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 168806c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump() 168906c3fb27SDimitry Andric const { 169006c3fb27SDimitry Andric print(dbgs()); 169106c3fb27SDimitry Andric dbgs() << "\n"; 169206c3fb27SDimitry Andric } 169306c3fb27SDimitry Andric 169406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 169506c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print( 169606c3fb27SDimitry Andric raw_ostream &OS) const { 169706c3fb27SDimitry Andric OS << "Node " << this << "\n"; 169806c3fb27SDimitry Andric OS << "\t"; 169906c3fb27SDimitry Andric printCall(OS); 170006c3fb27SDimitry Andric if (Recursive) 170106c3fb27SDimitry Andric OS << " (recursive)"; 170206c3fb27SDimitry Andric OS << "\n"; 170306c3fb27SDimitry Andric OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n"; 170406c3fb27SDimitry Andric OS << "\tContextIds:"; 170506c3fb27SDimitry Andric std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end()); 170606c3fb27SDimitry Andric std::sort(SortedIds.begin(), SortedIds.end()); 170706c3fb27SDimitry Andric for (auto Id : SortedIds) 170806c3fb27SDimitry Andric OS << " " << Id; 170906c3fb27SDimitry Andric OS << "\n"; 171006c3fb27SDimitry Andric OS << "\tCalleeEdges:\n"; 171106c3fb27SDimitry Andric for (auto &Edge : CalleeEdges) 171206c3fb27SDimitry Andric OS << "\t\t" << *Edge << "\n"; 171306c3fb27SDimitry Andric OS << "\tCallerEdges:\n"; 171406c3fb27SDimitry Andric for (auto &Edge : CallerEdges) 171506c3fb27SDimitry Andric OS << "\t\t" << *Edge << "\n"; 171606c3fb27SDimitry Andric if (!Clones.empty()) { 171706c3fb27SDimitry Andric OS << "\tClones: "; 171806c3fb27SDimitry Andric FieldSeparator FS; 171906c3fb27SDimitry Andric for (auto *Clone : Clones) 172006c3fb27SDimitry Andric OS << FS << Clone; 172106c3fb27SDimitry Andric OS << "\n"; 172206c3fb27SDimitry Andric } else if (CloneOf) { 172306c3fb27SDimitry Andric OS << "\tClone of " << CloneOf << "\n"; 172406c3fb27SDimitry Andric } 172506c3fb27SDimitry Andric } 172606c3fb27SDimitry Andric 172706c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 172806c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump() 172906c3fb27SDimitry Andric const { 173006c3fb27SDimitry Andric print(dbgs()); 173106c3fb27SDimitry Andric dbgs() << "\n"; 173206c3fb27SDimitry Andric } 173306c3fb27SDimitry Andric 173406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 173506c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print( 173606c3fb27SDimitry Andric raw_ostream &OS) const { 173706c3fb27SDimitry Andric OS << "Edge from Callee " << Callee << " to Caller: " << Caller 173806c3fb27SDimitry Andric << " AllocTypes: " << getAllocTypeString(AllocTypes); 173906c3fb27SDimitry Andric OS << " ContextIds:"; 174006c3fb27SDimitry Andric std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end()); 174106c3fb27SDimitry Andric std::sort(SortedIds.begin(), SortedIds.end()); 174206c3fb27SDimitry Andric for (auto Id : SortedIds) 174306c3fb27SDimitry Andric OS << " " << Id; 174406c3fb27SDimitry Andric } 174506c3fb27SDimitry Andric 174606c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 174706c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump() const { 174806c3fb27SDimitry Andric print(dbgs()); 174906c3fb27SDimitry Andric } 175006c3fb27SDimitry Andric 175106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 175206c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print( 175306c3fb27SDimitry Andric raw_ostream &OS) const { 175406c3fb27SDimitry Andric OS << "Callsite Context Graph:\n"; 175506c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 175606c3fb27SDimitry Andric for (const auto Node : nodes<GraphType>(this)) { 175706c3fb27SDimitry Andric if (Node->isRemoved()) 175806c3fb27SDimitry Andric continue; 175906c3fb27SDimitry Andric Node->print(OS); 176006c3fb27SDimitry Andric OS << "\n"; 176106c3fb27SDimitry Andric } 176206c3fb27SDimitry Andric } 176306c3fb27SDimitry Andric 176406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 176506c3fb27SDimitry Andric static void checkEdge( 176606c3fb27SDimitry Andric const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) { 176706c3fb27SDimitry Andric // Confirm that alloc type is not None and that we have at least one context 176806c3fb27SDimitry Andric // id. 176906c3fb27SDimitry Andric assert(Edge->AllocTypes != (uint8_t)AllocationType::None); 177006c3fb27SDimitry Andric assert(!Edge->ContextIds.empty()); 177106c3fb27SDimitry Andric } 177206c3fb27SDimitry Andric 177306c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 177406c3fb27SDimitry Andric static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node, 177506c3fb27SDimitry Andric bool CheckEdges = true) { 177606c3fb27SDimitry Andric if (Node->isRemoved()) 177706c3fb27SDimitry Andric return; 177806c3fb27SDimitry Andric // Node's context ids should be the union of both its callee and caller edge 177906c3fb27SDimitry Andric // context ids. 178006c3fb27SDimitry Andric if (Node->CallerEdges.size()) { 178106c3fb27SDimitry Andric auto EI = Node->CallerEdges.begin(); 178206c3fb27SDimitry Andric auto &FirstEdge = *EI; 178306c3fb27SDimitry Andric EI++; 178406c3fb27SDimitry Andric DenseSet<uint32_t> CallerEdgeContextIds(FirstEdge->ContextIds); 178506c3fb27SDimitry Andric for (; EI != Node->CallerEdges.end(); EI++) { 178606c3fb27SDimitry Andric const auto &Edge = *EI; 178706c3fb27SDimitry Andric if (CheckEdges) 178806c3fb27SDimitry Andric checkEdge<DerivedCCG, FuncTy, CallTy>(Edge); 178906c3fb27SDimitry Andric set_union(CallerEdgeContextIds, Edge->ContextIds); 179006c3fb27SDimitry Andric } 179106c3fb27SDimitry Andric // Node can have more context ids than callers if some contexts terminate at 179206c3fb27SDimitry Andric // node and some are longer. 179306c3fb27SDimitry Andric assert(Node->ContextIds == CallerEdgeContextIds || 179406c3fb27SDimitry Andric set_is_subset(CallerEdgeContextIds, Node->ContextIds)); 179506c3fb27SDimitry Andric } 179606c3fb27SDimitry Andric if (Node->CalleeEdges.size()) { 179706c3fb27SDimitry Andric auto EI = Node->CalleeEdges.begin(); 179806c3fb27SDimitry Andric auto &FirstEdge = *EI; 179906c3fb27SDimitry Andric EI++; 180006c3fb27SDimitry Andric DenseSet<uint32_t> CalleeEdgeContextIds(FirstEdge->ContextIds); 180106c3fb27SDimitry Andric for (; EI != Node->CalleeEdges.end(); EI++) { 180206c3fb27SDimitry Andric const auto &Edge = *EI; 180306c3fb27SDimitry Andric if (CheckEdges) 180406c3fb27SDimitry Andric checkEdge<DerivedCCG, FuncTy, CallTy>(Edge); 180506c3fb27SDimitry Andric set_union(CalleeEdgeContextIds, Edge->ContextIds); 180606c3fb27SDimitry Andric } 180706c3fb27SDimitry Andric assert(Node->ContextIds == CalleeEdgeContextIds); 180806c3fb27SDimitry Andric } 180906c3fb27SDimitry Andric } 181006c3fb27SDimitry Andric 181106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 181206c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const { 181306c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 181406c3fb27SDimitry Andric for (const auto Node : nodes<GraphType>(this)) { 181506c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false); 181606c3fb27SDimitry Andric for (auto &Edge : Node->CallerEdges) 181706c3fb27SDimitry Andric checkEdge<DerivedCCG, FuncTy, CallTy>(Edge); 181806c3fb27SDimitry Andric } 181906c3fb27SDimitry Andric } 182006c3fb27SDimitry Andric 182106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 182206c3fb27SDimitry Andric struct GraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> { 182306c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 182406c3fb27SDimitry Andric using NodeRef = const ContextNode<DerivedCCG, FuncTy, CallTy> *; 182506c3fb27SDimitry Andric 182606c3fb27SDimitry Andric using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>; 182706c3fb27SDimitry Andric static NodeRef getNode(const NodePtrTy &P) { return P.get(); } 182806c3fb27SDimitry Andric 182906c3fb27SDimitry Andric using nodes_iterator = 183006c3fb27SDimitry Andric mapped_iterator<typename std::vector<NodePtrTy>::const_iterator, 183106c3fb27SDimitry Andric decltype(&getNode)>; 183206c3fb27SDimitry Andric 183306c3fb27SDimitry Andric static nodes_iterator nodes_begin(GraphType G) { 183406c3fb27SDimitry Andric return nodes_iterator(G->NodeOwner.begin(), &getNode); 183506c3fb27SDimitry Andric } 183606c3fb27SDimitry Andric 183706c3fb27SDimitry Andric static nodes_iterator nodes_end(GraphType G) { 183806c3fb27SDimitry Andric return nodes_iterator(G->NodeOwner.end(), &getNode); 183906c3fb27SDimitry Andric } 184006c3fb27SDimitry Andric 184106c3fb27SDimitry Andric static NodeRef getEntryNode(GraphType G) { 184206c3fb27SDimitry Andric return G->NodeOwner.begin()->get(); 184306c3fb27SDimitry Andric } 184406c3fb27SDimitry Andric 184506c3fb27SDimitry Andric using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>; 184606c3fb27SDimitry Andric static const ContextNode<DerivedCCG, FuncTy, CallTy> * 184706c3fb27SDimitry Andric GetCallee(const EdgePtrTy &P) { 184806c3fb27SDimitry Andric return P->Callee; 184906c3fb27SDimitry Andric } 185006c3fb27SDimitry Andric 185106c3fb27SDimitry Andric using ChildIteratorType = 185206c3fb27SDimitry Andric mapped_iterator<typename std::vector<std::shared_ptr<ContextEdge< 185306c3fb27SDimitry Andric DerivedCCG, FuncTy, CallTy>>>::const_iterator, 185406c3fb27SDimitry Andric decltype(&GetCallee)>; 185506c3fb27SDimitry Andric 185606c3fb27SDimitry Andric static ChildIteratorType child_begin(NodeRef N) { 185706c3fb27SDimitry Andric return ChildIteratorType(N->CalleeEdges.begin(), &GetCallee); 185806c3fb27SDimitry Andric } 185906c3fb27SDimitry Andric 186006c3fb27SDimitry Andric static ChildIteratorType child_end(NodeRef N) { 186106c3fb27SDimitry Andric return ChildIteratorType(N->CalleeEdges.end(), &GetCallee); 186206c3fb27SDimitry Andric } 186306c3fb27SDimitry Andric }; 186406c3fb27SDimitry Andric 186506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 186606c3fb27SDimitry Andric struct DOTGraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> 186706c3fb27SDimitry Andric : public DefaultDOTGraphTraits { 186806c3fb27SDimitry Andric DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} 186906c3fb27SDimitry Andric 187006c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 187106c3fb27SDimitry Andric using GTraits = GraphTraits<GraphType>; 187206c3fb27SDimitry Andric using NodeRef = typename GTraits::NodeRef; 187306c3fb27SDimitry Andric using ChildIteratorType = typename GTraits::ChildIteratorType; 187406c3fb27SDimitry Andric 187506c3fb27SDimitry Andric static std::string getNodeLabel(NodeRef Node, GraphType G) { 187606c3fb27SDimitry Andric std::string LabelString = 187706c3fb27SDimitry Andric (Twine("OrigId: ") + (Node->IsAllocation ? "Alloc" : "") + 187806c3fb27SDimitry Andric Twine(Node->OrigStackOrAllocId)) 187906c3fb27SDimitry Andric .str(); 188006c3fb27SDimitry Andric LabelString += "\n"; 188106c3fb27SDimitry Andric if (Node->hasCall()) { 188206c3fb27SDimitry Andric auto Func = G->NodeToCallingFunc.find(Node); 188306c3fb27SDimitry Andric assert(Func != G->NodeToCallingFunc.end()); 188406c3fb27SDimitry Andric LabelString += 188506c3fb27SDimitry Andric G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo()); 188606c3fb27SDimitry Andric } else { 188706c3fb27SDimitry Andric LabelString += "null call"; 188806c3fb27SDimitry Andric if (Node->Recursive) 188906c3fb27SDimitry Andric LabelString += " (recursive)"; 189006c3fb27SDimitry Andric else 189106c3fb27SDimitry Andric LabelString += " (external)"; 189206c3fb27SDimitry Andric } 189306c3fb27SDimitry Andric return LabelString; 189406c3fb27SDimitry Andric } 189506c3fb27SDimitry Andric 189606c3fb27SDimitry Andric static std::string getNodeAttributes(NodeRef Node, GraphType) { 189706c3fb27SDimitry Andric std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " + 189806c3fb27SDimitry Andric getContextIds(Node->ContextIds) + "\"") 189906c3fb27SDimitry Andric .str(); 190006c3fb27SDimitry Andric AttributeString += 190106c3fb27SDimitry Andric (Twine(",fillcolor=\"") + getColor(Node->AllocTypes) + "\"").str(); 190206c3fb27SDimitry Andric AttributeString += ",style=\"filled\""; 190306c3fb27SDimitry Andric if (Node->CloneOf) { 190406c3fb27SDimitry Andric AttributeString += ",color=\"blue\""; 190506c3fb27SDimitry Andric AttributeString += ",style=\"filled,bold,dashed\""; 190606c3fb27SDimitry Andric } else 190706c3fb27SDimitry Andric AttributeString += ",style=\"filled\""; 190806c3fb27SDimitry Andric return AttributeString; 190906c3fb27SDimitry Andric } 191006c3fb27SDimitry Andric 191106c3fb27SDimitry Andric static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, 191206c3fb27SDimitry Andric GraphType) { 191306c3fb27SDimitry Andric auto &Edge = *(ChildIter.getCurrent()); 191406c3fb27SDimitry Andric return (Twine("tooltip=\"") + getContextIds(Edge->ContextIds) + "\"" + 191506c3fb27SDimitry Andric Twine(",fillcolor=\"") + getColor(Edge->AllocTypes) + "\"") 191606c3fb27SDimitry Andric .str(); 191706c3fb27SDimitry Andric } 191806c3fb27SDimitry Andric 191906c3fb27SDimitry Andric // Since the NodeOwners list includes nodes that are no longer connected to 192006c3fb27SDimitry Andric // the graph, skip them here. 192106c3fb27SDimitry Andric static bool isNodeHidden(NodeRef Node, GraphType) { 192206c3fb27SDimitry Andric return Node->isRemoved(); 192306c3fb27SDimitry Andric } 192406c3fb27SDimitry Andric 192506c3fb27SDimitry Andric private: 192606c3fb27SDimitry Andric static std::string getContextIds(const DenseSet<uint32_t> &ContextIds) { 192706c3fb27SDimitry Andric std::string IdString = "ContextIds:"; 192806c3fb27SDimitry Andric if (ContextIds.size() < 100) { 192906c3fb27SDimitry Andric std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end()); 193006c3fb27SDimitry Andric std::sort(SortedIds.begin(), SortedIds.end()); 193106c3fb27SDimitry Andric for (auto Id : SortedIds) 193206c3fb27SDimitry Andric IdString += (" " + Twine(Id)).str(); 193306c3fb27SDimitry Andric } else { 193406c3fb27SDimitry Andric IdString += (" (" + Twine(ContextIds.size()) + " ids)").str(); 193506c3fb27SDimitry Andric } 193606c3fb27SDimitry Andric return IdString; 193706c3fb27SDimitry Andric } 193806c3fb27SDimitry Andric 193906c3fb27SDimitry Andric static std::string getColor(uint8_t AllocTypes) { 194006c3fb27SDimitry Andric if (AllocTypes == (uint8_t)AllocationType::NotCold) 194106c3fb27SDimitry Andric // Color "brown1" actually looks like a lighter red. 194206c3fb27SDimitry Andric return "brown1"; 194306c3fb27SDimitry Andric if (AllocTypes == (uint8_t)AllocationType::Cold) 194406c3fb27SDimitry Andric return "cyan"; 194506c3fb27SDimitry Andric if (AllocTypes == 194606c3fb27SDimitry Andric ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold)) 194706c3fb27SDimitry Andric // Lighter purple. 194806c3fb27SDimitry Andric return "mediumorchid1"; 194906c3fb27SDimitry Andric return "gray"; 195006c3fb27SDimitry Andric } 195106c3fb27SDimitry Andric 195206c3fb27SDimitry Andric static std::string getNodeId(NodeRef Node) { 195306c3fb27SDimitry Andric std::stringstream SStream; 195406c3fb27SDimitry Andric SStream << std::hex << "N0x" << (unsigned long long)Node; 195506c3fb27SDimitry Andric std::string Result = SStream.str(); 195606c3fb27SDimitry Andric return Result; 195706c3fb27SDimitry Andric } 195806c3fb27SDimitry Andric }; 195906c3fb27SDimitry Andric 196006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 196106c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot( 196206c3fb27SDimitry Andric std::string Label) const { 196306c3fb27SDimitry Andric WriteGraph(this, "", false, Label, 196406c3fb27SDimitry Andric DotFilePathPrefix + "ccg." + Label + ".dot"); 196506c3fb27SDimitry Andric } 196606c3fb27SDimitry Andric 196706c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 196806c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 196906c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone( 197006c3fb27SDimitry Andric const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI) { 197106c3fb27SDimitry Andric ContextNode *Node = Edge->Callee; 197206c3fb27SDimitry Andric NodeOwner.push_back( 197306c3fb27SDimitry Andric std::make_unique<ContextNode>(Node->IsAllocation, Node->Call)); 197406c3fb27SDimitry Andric ContextNode *Clone = NodeOwner.back().get(); 197506c3fb27SDimitry Andric Node->addClone(Clone); 197606c3fb27SDimitry Andric assert(NodeToCallingFunc.count(Node)); 197706c3fb27SDimitry Andric NodeToCallingFunc[Clone] = NodeToCallingFunc[Node]; 197806c3fb27SDimitry Andric moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true); 197906c3fb27SDimitry Andric return Clone; 198006c3fb27SDimitry Andric } 198106c3fb27SDimitry Andric 198206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 198306c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: 198406c3fb27SDimitry Andric moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge, 198506c3fb27SDimitry Andric ContextNode *NewCallee, EdgeIter *CallerEdgeI, 198606c3fb27SDimitry Andric bool NewClone) { 198706c3fb27SDimitry Andric // NewCallee and Edge's current callee must be clones of the same original 198806c3fb27SDimitry Andric // node (Edge's current callee may be the original node too). 198906c3fb27SDimitry Andric assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode()); 199006c3fb27SDimitry Andric auto &EdgeContextIds = Edge->getContextIds(); 199106c3fb27SDimitry Andric ContextNode *OldCallee = Edge->Callee; 199206c3fb27SDimitry Andric if (CallerEdgeI) 199306c3fb27SDimitry Andric *CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI); 199406c3fb27SDimitry Andric else 199506c3fb27SDimitry Andric OldCallee->eraseCallerEdge(Edge.get()); 199606c3fb27SDimitry Andric Edge->Callee = NewCallee; 199706c3fb27SDimitry Andric NewCallee->CallerEdges.push_back(Edge); 199806c3fb27SDimitry Andric // Don't need to update Edge's context ids since we are simply reconnecting 199906c3fb27SDimitry Andric // it. 200006c3fb27SDimitry Andric set_subtract(OldCallee->ContextIds, EdgeContextIds); 200106c3fb27SDimitry Andric NewCallee->ContextIds.insert(EdgeContextIds.begin(), EdgeContextIds.end()); 200206c3fb27SDimitry Andric NewCallee->AllocTypes |= Edge->AllocTypes; 200306c3fb27SDimitry Andric OldCallee->AllocTypes = computeAllocType(OldCallee->ContextIds); 200406c3fb27SDimitry Andric // OldCallee alloc type should be None iff its context id set is now empty. 200506c3fb27SDimitry Andric assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) == 200606c3fb27SDimitry Andric OldCallee->ContextIds.empty()); 200706c3fb27SDimitry Andric // Now walk the old callee node's callee edges and move Edge's context ids 200806c3fb27SDimitry Andric // over to the corresponding edge into the clone (which is created here if 200906c3fb27SDimitry Andric // this is a newly created clone). 201006c3fb27SDimitry Andric for (auto &OldCalleeEdge : OldCallee->CalleeEdges) { 201106c3fb27SDimitry Andric // The context ids moving to the new callee are the subset of this edge's 201206c3fb27SDimitry Andric // context ids and the context ids on the caller edge being moved. 201306c3fb27SDimitry Andric DenseSet<uint32_t> EdgeContextIdsToMove = 201406c3fb27SDimitry Andric set_intersection(OldCalleeEdge->getContextIds(), EdgeContextIds); 201506c3fb27SDimitry Andric set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove); 201606c3fb27SDimitry Andric OldCalleeEdge->AllocTypes = 201706c3fb27SDimitry Andric computeAllocType(OldCalleeEdge->getContextIds()); 201806c3fb27SDimitry Andric if (!NewClone) { 201906c3fb27SDimitry Andric // Update context ids / alloc type on corresponding edge to NewCallee. 202006c3fb27SDimitry Andric // There is a chance this may not exist if we are reusing an existing 202106c3fb27SDimitry Andric // clone, specifically during function assignment, where we would have 202206c3fb27SDimitry Andric // removed none type edges after creating the clone. If we can't find 202306c3fb27SDimitry Andric // a corresponding edge there, fall through to the cloning below. 202406c3fb27SDimitry Andric if (auto *NewCalleeEdge = 202506c3fb27SDimitry Andric NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) { 202606c3fb27SDimitry Andric NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.begin(), 202706c3fb27SDimitry Andric EdgeContextIdsToMove.end()); 202806c3fb27SDimitry Andric NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove); 202906c3fb27SDimitry Andric continue; 203006c3fb27SDimitry Andric } 203106c3fb27SDimitry Andric } 203206c3fb27SDimitry Andric auto NewEdge = std::make_shared<ContextEdge>( 203306c3fb27SDimitry Andric OldCalleeEdge->Callee, NewCallee, 203406c3fb27SDimitry Andric computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove); 203506c3fb27SDimitry Andric NewCallee->CalleeEdges.push_back(NewEdge); 203606c3fb27SDimitry Andric NewEdge->Callee->CallerEdges.push_back(NewEdge); 203706c3fb27SDimitry Andric } 203806c3fb27SDimitry Andric if (VerifyCCG) { 203906c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee, /*CheckEdges=*/false); 204006c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee, /*CheckEdges=*/false); 204106c3fb27SDimitry Andric for (const auto &OldCalleeEdge : OldCallee->CalleeEdges) 204206c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee, 204306c3fb27SDimitry Andric /*CheckEdges=*/false); 204406c3fb27SDimitry Andric for (const auto &NewCalleeEdge : NewCallee->CalleeEdges) 204506c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee, 204606c3fb27SDimitry Andric /*CheckEdges=*/false); 204706c3fb27SDimitry Andric } 204806c3fb27SDimitry Andric } 204906c3fb27SDimitry Andric 205006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 205106c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() { 205206c3fb27SDimitry Andric DenseSet<const ContextNode *> Visited; 205306c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) 205406c3fb27SDimitry Andric identifyClones(Entry.second, Visited); 205506c3fb27SDimitry Andric } 205606c3fb27SDimitry Andric 205706c3fb27SDimitry Andric // helper function to check an AllocType is cold or notcold or both. 205806c3fb27SDimitry Andric bool checkColdOrNotCold(uint8_t AllocType) { 205906c3fb27SDimitry Andric return (AllocType == (uint8_t)AllocationType::Cold) || 206006c3fb27SDimitry Andric (AllocType == (uint8_t)AllocationType::NotCold) || 206106c3fb27SDimitry Andric (AllocType == 206206c3fb27SDimitry Andric ((uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold)); 206306c3fb27SDimitry Andric } 206406c3fb27SDimitry Andric 206506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 206606c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones( 206706c3fb27SDimitry Andric ContextNode *Node, DenseSet<const ContextNode *> &Visited) { 206806c3fb27SDimitry Andric if (VerifyNodes) 206906c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node); 207006c3fb27SDimitry Andric assert(!Node->CloneOf); 207106c3fb27SDimitry Andric 207206c3fb27SDimitry Andric // If Node as a null call, then either it wasn't found in the module (regular 207306c3fb27SDimitry Andric // LTO) or summary index (ThinLTO), or there were other conditions blocking 207406c3fb27SDimitry Andric // cloning (e.g. recursion, calls multiple targets, etc). 207506c3fb27SDimitry Andric // Do this here so that we don't try to recursively clone callers below, which 207606c3fb27SDimitry Andric // isn't useful at least for this node. 207706c3fb27SDimitry Andric if (!Node->hasCall()) 207806c3fb27SDimitry Andric return; 207906c3fb27SDimitry Andric 208006c3fb27SDimitry Andric #ifndef NDEBUG 208106c3fb27SDimitry Andric auto Insert = 208206c3fb27SDimitry Andric #endif 208306c3fb27SDimitry Andric Visited.insert(Node); 208406c3fb27SDimitry Andric // We should not have visited this node yet. 208506c3fb27SDimitry Andric assert(Insert.second); 208606c3fb27SDimitry Andric // The recursive call to identifyClones may delete the current edge from the 208706c3fb27SDimitry Andric // CallerEdges vector. Make a copy and iterate on that, simpler than passing 208806c3fb27SDimitry Andric // in an iterator and having recursive call erase from it. Other edges may 208906c3fb27SDimitry Andric // also get removed during the recursion, which will have null Callee and 209006c3fb27SDimitry Andric // Caller pointers (and are deleted later), so we skip those below. 209106c3fb27SDimitry Andric { 209206c3fb27SDimitry Andric auto CallerEdges = Node->CallerEdges; 209306c3fb27SDimitry Andric for (auto &Edge : CallerEdges) { 209406c3fb27SDimitry Andric // Skip any that have been removed by an earlier recursive call. 209506c3fb27SDimitry Andric if (Edge->Callee == nullptr && Edge->Caller == nullptr) { 2096*5f757f3fSDimitry Andric assert(!llvm::count(Node->CallerEdges, Edge)); 209706c3fb27SDimitry Andric continue; 209806c3fb27SDimitry Andric } 209906c3fb27SDimitry Andric // Ignore any caller we previously visited via another edge. 210006c3fb27SDimitry Andric if (!Visited.count(Edge->Caller) && !Edge->Caller->CloneOf) { 210106c3fb27SDimitry Andric identifyClones(Edge->Caller, Visited); 210206c3fb27SDimitry Andric } 210306c3fb27SDimitry Andric } 210406c3fb27SDimitry Andric } 210506c3fb27SDimitry Andric 210606c3fb27SDimitry Andric // Check if we reached an unambiguous call or have have only a single caller. 210706c3fb27SDimitry Andric if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1) 210806c3fb27SDimitry Andric return; 210906c3fb27SDimitry Andric 211006c3fb27SDimitry Andric // We need to clone. 211106c3fb27SDimitry Andric 211206c3fb27SDimitry Andric // Try to keep the original version as alloc type NotCold. This will make 211306c3fb27SDimitry Andric // cases with indirect calls or any other situation with an unknown call to 211406c3fb27SDimitry Andric // the original function get the default behavior. We do this by sorting the 211506c3fb27SDimitry Andric // CallerEdges of the Node we will clone by alloc type. 211606c3fb27SDimitry Andric // 211706c3fb27SDimitry Andric // Give NotCold edge the lowest sort priority so those edges are at the end of 211806c3fb27SDimitry Andric // the caller edges vector, and stay on the original version (since the below 211906c3fb27SDimitry Andric // code clones greedily until it finds all remaining edges have the same type 212006c3fb27SDimitry Andric // and leaves the remaining ones on the original Node). 212106c3fb27SDimitry Andric // 212206c3fb27SDimitry Andric // We shouldn't actually have any None type edges, so the sorting priority for 212306c3fb27SDimitry Andric // that is arbitrary, and we assert in that case below. 212406c3fb27SDimitry Andric const unsigned AllocTypeCloningPriority[] = {/*None*/ 3, /*NotCold*/ 4, 212506c3fb27SDimitry Andric /*Cold*/ 1, 212606c3fb27SDimitry Andric /*NotColdCold*/ 2}; 212706c3fb27SDimitry Andric std::stable_sort(Node->CallerEdges.begin(), Node->CallerEdges.end(), 212806c3fb27SDimitry Andric [&](const std::shared_ptr<ContextEdge> &A, 212906c3fb27SDimitry Andric const std::shared_ptr<ContextEdge> &B) { 213006c3fb27SDimitry Andric assert(checkColdOrNotCold(A->AllocTypes) && 213106c3fb27SDimitry Andric checkColdOrNotCold(B->AllocTypes)); 213206c3fb27SDimitry Andric 213306c3fb27SDimitry Andric if (A->AllocTypes == B->AllocTypes) 213406c3fb27SDimitry Andric // Use the first context id for each edge as a 213506c3fb27SDimitry Andric // tie-breaker. 213606c3fb27SDimitry Andric return *A->ContextIds.begin() < *B->ContextIds.begin(); 213706c3fb27SDimitry Andric return AllocTypeCloningPriority[A->AllocTypes] < 213806c3fb27SDimitry Andric AllocTypeCloningPriority[B->AllocTypes]; 213906c3fb27SDimitry Andric }); 214006c3fb27SDimitry Andric 214106c3fb27SDimitry Andric assert(Node->AllocTypes != (uint8_t)AllocationType::None); 214206c3fb27SDimitry Andric 214306c3fb27SDimitry Andric // Iterate until we find no more opportunities for disambiguating the alloc 214406c3fb27SDimitry Andric // types via cloning. In most cases this loop will terminate once the Node 214506c3fb27SDimitry Andric // has a single allocation type, in which case no more cloning is needed. 214606c3fb27SDimitry Andric // We need to be able to remove Edge from CallerEdges, so need to adjust 214706c3fb27SDimitry Andric // iterator inside the loop. 214806c3fb27SDimitry Andric for (auto EI = Node->CallerEdges.begin(); EI != Node->CallerEdges.end();) { 214906c3fb27SDimitry Andric auto CallerEdge = *EI; 215006c3fb27SDimitry Andric 215106c3fb27SDimitry Andric // See if cloning the prior caller edge left this node with a single alloc 215206c3fb27SDimitry Andric // type or a single caller. In that case no more cloning of Node is needed. 215306c3fb27SDimitry Andric if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1) 215406c3fb27SDimitry Andric break; 215506c3fb27SDimitry Andric 215606c3fb27SDimitry Andric // Compute the node callee edge alloc types corresponding to the context ids 215706c3fb27SDimitry Andric // for this caller edge. 215806c3fb27SDimitry Andric std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge; 215906c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge.reserve(Node->CalleeEdges.size()); 216006c3fb27SDimitry Andric for (auto &CalleeEdge : Node->CalleeEdges) 216106c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes( 216206c3fb27SDimitry Andric CalleeEdge->getContextIds(), CallerEdge->getContextIds())); 216306c3fb27SDimitry Andric 216406c3fb27SDimitry Andric // Don't clone if doing so will not disambiguate any alloc types amongst 216506c3fb27SDimitry Andric // caller edges (including the callee edges that would be cloned). 216606c3fb27SDimitry Andric // Otherwise we will simply move all edges to the clone. 216706c3fb27SDimitry Andric // 216806c3fb27SDimitry Andric // First check if by cloning we will disambiguate the caller allocation 216906c3fb27SDimitry Andric // type from node's allocation type. Query allocTypeToUse so that we don't 217006c3fb27SDimitry Andric // bother cloning to distinguish NotCold+Cold from NotCold. Note that 217106c3fb27SDimitry Andric // neither of these should be None type. 217206c3fb27SDimitry Andric // 217306c3fb27SDimitry Andric // Then check if by cloning node at least one of the callee edges will be 217406c3fb27SDimitry Andric // disambiguated by splitting out different context ids. 217506c3fb27SDimitry Andric assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None); 217606c3fb27SDimitry Andric assert(Node->AllocTypes != (uint8_t)AllocationType::None); 217706c3fb27SDimitry Andric if (allocTypeToUse(CallerEdge->AllocTypes) == 217806c3fb27SDimitry Andric allocTypeToUse(Node->AllocTypes) && 217906c3fb27SDimitry Andric allocTypesMatch<DerivedCCG, FuncTy, CallTy>( 218006c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) { 218106c3fb27SDimitry Andric ++EI; 218206c3fb27SDimitry Andric continue; 218306c3fb27SDimitry Andric } 218406c3fb27SDimitry Andric 218506c3fb27SDimitry Andric // First see if we can use an existing clone. Check each clone and its 218606c3fb27SDimitry Andric // callee edges for matching alloc types. 218706c3fb27SDimitry Andric ContextNode *Clone = nullptr; 218806c3fb27SDimitry Andric for (auto *CurClone : Node->Clones) { 218906c3fb27SDimitry Andric if (allocTypeToUse(CurClone->AllocTypes) != 219006c3fb27SDimitry Andric allocTypeToUse(CallerEdge->AllocTypes)) 219106c3fb27SDimitry Andric continue; 219206c3fb27SDimitry Andric 219306c3fb27SDimitry Andric if (!allocTypesMatch<DerivedCCG, FuncTy, CallTy>( 219406c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge, CurClone->CalleeEdges)) 219506c3fb27SDimitry Andric continue; 219606c3fb27SDimitry Andric Clone = CurClone; 219706c3fb27SDimitry Andric break; 219806c3fb27SDimitry Andric } 219906c3fb27SDimitry Andric 220006c3fb27SDimitry Andric // The edge iterator is adjusted when we move the CallerEdge to the clone. 220106c3fb27SDimitry Andric if (Clone) 220206c3fb27SDimitry Andric moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI); 220306c3fb27SDimitry Andric else 220406c3fb27SDimitry Andric Clone = moveEdgeToNewCalleeClone(CallerEdge, &EI); 220506c3fb27SDimitry Andric 220606c3fb27SDimitry Andric assert(EI == Node->CallerEdges.end() || 220706c3fb27SDimitry Andric Node->AllocTypes != (uint8_t)AllocationType::None); 220806c3fb27SDimitry Andric // Sanity check that no alloc types on clone or its edges are None. 220906c3fb27SDimitry Andric assert(Clone->AllocTypes != (uint8_t)AllocationType::None); 221006c3fb27SDimitry Andric assert(llvm::none_of( 221106c3fb27SDimitry Andric Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) { 221206c3fb27SDimitry Andric return E->AllocTypes == (uint8_t)AllocationType::None; 221306c3fb27SDimitry Andric })); 221406c3fb27SDimitry Andric } 221506c3fb27SDimitry Andric 221606c3fb27SDimitry Andric // Cloning may have resulted in some cloned callee edges with type None, 221706c3fb27SDimitry Andric // because they aren't carrying any contexts. Remove those edges. 221806c3fb27SDimitry Andric for (auto *Clone : Node->Clones) { 221906c3fb27SDimitry Andric removeNoneTypeCalleeEdges(Clone); 222006c3fb27SDimitry Andric if (VerifyNodes) 222106c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Clone); 222206c3fb27SDimitry Andric } 222306c3fb27SDimitry Andric // We should still have some context ids on the original Node. 222406c3fb27SDimitry Andric assert(!Node->ContextIds.empty()); 222506c3fb27SDimitry Andric 222606c3fb27SDimitry Andric // Remove any callee edges that ended up with alloc type None after creating 222706c3fb27SDimitry Andric // clones and updating callee edges. 222806c3fb27SDimitry Andric removeNoneTypeCalleeEdges(Node); 222906c3fb27SDimitry Andric 223006c3fb27SDimitry Andric // Sanity check that no alloc types on node or edges are None. 223106c3fb27SDimitry Andric assert(Node->AllocTypes != (uint8_t)AllocationType::None); 223206c3fb27SDimitry Andric assert(llvm::none_of(Node->CalleeEdges, 223306c3fb27SDimitry Andric [&](const std::shared_ptr<ContextEdge> &E) { 223406c3fb27SDimitry Andric return E->AllocTypes == (uint8_t)AllocationType::None; 223506c3fb27SDimitry Andric })); 223606c3fb27SDimitry Andric assert(llvm::none_of(Node->CallerEdges, 223706c3fb27SDimitry Andric [&](const std::shared_ptr<ContextEdge> &E) { 223806c3fb27SDimitry Andric return E->AllocTypes == (uint8_t)AllocationType::None; 223906c3fb27SDimitry Andric })); 224006c3fb27SDimitry Andric 224106c3fb27SDimitry Andric if (VerifyNodes) 224206c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node); 224306c3fb27SDimitry Andric } 224406c3fb27SDimitry Andric 224506c3fb27SDimitry Andric void ModuleCallsiteContextGraph::updateAllocationCall( 224606c3fb27SDimitry Andric CallInfo &Call, AllocationType AllocType) { 224706c3fb27SDimitry Andric std::string AllocTypeString = getAllocTypeAttributeString(AllocType); 224806c3fb27SDimitry Andric auto A = llvm::Attribute::get(Call.call()->getFunction()->getContext(), 224906c3fb27SDimitry Andric "memprof", AllocTypeString); 225006c3fb27SDimitry Andric cast<CallBase>(Call.call())->addFnAttr(A); 225106c3fb27SDimitry Andric OREGetter(Call.call()->getFunction()) 225206c3fb27SDimitry Andric .emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", Call.call()) 225306c3fb27SDimitry Andric << ore::NV("AllocationCall", Call.call()) << " in clone " 225406c3fb27SDimitry Andric << ore::NV("Caller", Call.call()->getFunction()) 225506c3fb27SDimitry Andric << " marked with memprof allocation attribute " 225606c3fb27SDimitry Andric << ore::NV("Attribute", AllocTypeString)); 225706c3fb27SDimitry Andric } 225806c3fb27SDimitry Andric 225906c3fb27SDimitry Andric void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &Call, 226006c3fb27SDimitry Andric AllocationType AllocType) { 226106c3fb27SDimitry Andric auto *AI = Call.call().dyn_cast<AllocInfo *>(); 226206c3fb27SDimitry Andric assert(AI); 226306c3fb27SDimitry Andric assert(AI->Versions.size() > Call.cloneNo()); 226406c3fb27SDimitry Andric AI->Versions[Call.cloneNo()] = (uint8_t)AllocType; 226506c3fb27SDimitry Andric } 226606c3fb27SDimitry Andric 226706c3fb27SDimitry Andric void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall, 226806c3fb27SDimitry Andric FuncInfo CalleeFunc) { 226906c3fb27SDimitry Andric if (CalleeFunc.cloneNo() > 0) 227006c3fb27SDimitry Andric cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func()); 227106c3fb27SDimitry Andric OREGetter(CallerCall.call()->getFunction()) 227206c3fb27SDimitry Andric .emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call()) 227306c3fb27SDimitry Andric << ore::NV("Call", CallerCall.call()) << " in clone " 227406c3fb27SDimitry Andric << ore::NV("Caller", CallerCall.call()->getFunction()) 227506c3fb27SDimitry Andric << " assigned to call function clone " 227606c3fb27SDimitry Andric << ore::NV("Callee", CalleeFunc.func())); 227706c3fb27SDimitry Andric } 227806c3fb27SDimitry Andric 227906c3fb27SDimitry Andric void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall, 228006c3fb27SDimitry Andric FuncInfo CalleeFunc) { 228106c3fb27SDimitry Andric auto *CI = CallerCall.call().dyn_cast<CallsiteInfo *>(); 228206c3fb27SDimitry Andric assert(CI && 228306c3fb27SDimitry Andric "Caller cannot be an allocation which should not have profiled calls"); 228406c3fb27SDimitry Andric assert(CI->Clones.size() > CallerCall.cloneNo()); 228506c3fb27SDimitry Andric CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo(); 228606c3fb27SDimitry Andric } 228706c3fb27SDimitry Andric 228806c3fb27SDimitry Andric CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 228906c3fb27SDimitry Andric Instruction *>::FuncInfo 229006c3fb27SDimitry Andric ModuleCallsiteContextGraph::cloneFunctionForCallsite( 229106c3fb27SDimitry Andric FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap, 229206c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) { 229306c3fb27SDimitry Andric // Use existing LLVM facilities for cloning and obtaining Call in clone 229406c3fb27SDimitry Andric ValueToValueMapTy VMap; 229506c3fb27SDimitry Andric auto *NewFunc = CloneFunction(Func.func(), VMap); 229606c3fb27SDimitry Andric std::string Name = getMemProfFuncName(Func.func()->getName(), CloneNo); 229706c3fb27SDimitry Andric assert(!Func.func()->getParent()->getFunction(Name)); 229806c3fb27SDimitry Andric NewFunc->setName(Name); 229906c3fb27SDimitry Andric for (auto &Inst : CallsWithMetadataInFunc) { 230006c3fb27SDimitry Andric // This map always has the initial version in it. 230106c3fb27SDimitry Andric assert(Inst.cloneNo() == 0); 230206c3fb27SDimitry Andric CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo}; 230306c3fb27SDimitry Andric } 230406c3fb27SDimitry Andric OREGetter(Func.func()) 230506c3fb27SDimitry Andric .emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", Func.func()) 230606c3fb27SDimitry Andric << "created clone " << ore::NV("NewFunction", NewFunc)); 230706c3fb27SDimitry Andric return {NewFunc, CloneNo}; 230806c3fb27SDimitry Andric } 230906c3fb27SDimitry Andric 231006c3fb27SDimitry Andric CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 231106c3fb27SDimitry Andric IndexCall>::FuncInfo 231206c3fb27SDimitry Andric IndexCallsiteContextGraph::cloneFunctionForCallsite( 231306c3fb27SDimitry Andric FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap, 231406c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) { 231506c3fb27SDimitry Andric // Check how many clones we have of Call (and therefore function). 231606c3fb27SDimitry Andric // The next clone number is the current size of versions array. 231706c3fb27SDimitry Andric // Confirm this matches the CloneNo provided by the caller, which is based on 231806c3fb27SDimitry Andric // the number of function clones we have. 231906c3fb27SDimitry Andric assert(CloneNo == 232006c3fb27SDimitry Andric (Call.call().is<AllocInfo *>() 232106c3fb27SDimitry Andric ? Call.call().dyn_cast<AllocInfo *>()->Versions.size() 232206c3fb27SDimitry Andric : Call.call().dyn_cast<CallsiteInfo *>()->Clones.size())); 232306c3fb27SDimitry Andric // Walk all the instructions in this function. Create a new version for 232406c3fb27SDimitry Andric // each (by adding an entry to the Versions/Clones summary array), and copy 232506c3fb27SDimitry Andric // over the version being called for the function clone being cloned here. 232606c3fb27SDimitry Andric // Additionally, add an entry to the CallMap for the new function clone, 232706c3fb27SDimitry Andric // mapping the original call (clone 0, what is in CallsWithMetadataInFunc) 232806c3fb27SDimitry Andric // to the new call clone. 232906c3fb27SDimitry Andric for (auto &Inst : CallsWithMetadataInFunc) { 233006c3fb27SDimitry Andric // This map always has the initial version in it. 233106c3fb27SDimitry Andric assert(Inst.cloneNo() == 0); 233206c3fb27SDimitry Andric if (auto *AI = Inst.call().dyn_cast<AllocInfo *>()) { 233306c3fb27SDimitry Andric assert(AI->Versions.size() == CloneNo); 233406c3fb27SDimitry Andric // We assign the allocation type later (in updateAllocationCall), just add 233506c3fb27SDimitry Andric // an entry for it here. 233606c3fb27SDimitry Andric AI->Versions.push_back(0); 233706c3fb27SDimitry Andric } else { 233806c3fb27SDimitry Andric auto *CI = Inst.call().dyn_cast<CallsiteInfo *>(); 233906c3fb27SDimitry Andric assert(CI && CI->Clones.size() == CloneNo); 234006c3fb27SDimitry Andric // We assign the clone number later (in updateCall), just add an entry for 234106c3fb27SDimitry Andric // it here. 234206c3fb27SDimitry Andric CI->Clones.push_back(0); 234306c3fb27SDimitry Andric } 234406c3fb27SDimitry Andric CallMap[Inst] = {Inst.call(), CloneNo}; 234506c3fb27SDimitry Andric } 234606c3fb27SDimitry Andric return {Func.func(), CloneNo}; 234706c3fb27SDimitry Andric } 234806c3fb27SDimitry Andric 234906c3fb27SDimitry Andric // This method assigns cloned callsites to functions, cloning the functions as 235006c3fb27SDimitry Andric // needed. The assignment is greedy and proceeds roughly as follows: 235106c3fb27SDimitry Andric // 235206c3fb27SDimitry Andric // For each function Func: 235306c3fb27SDimitry Andric // For each call with graph Node having clones: 235406c3fb27SDimitry Andric // Initialize ClonesWorklist to Node and its clones 235506c3fb27SDimitry Andric // Initialize NodeCloneCount to 0 235606c3fb27SDimitry Andric // While ClonesWorklist is not empty: 235706c3fb27SDimitry Andric // Clone = pop front ClonesWorklist 235806c3fb27SDimitry Andric // NodeCloneCount++ 235906c3fb27SDimitry Andric // If Func has been cloned less than NodeCloneCount times: 236006c3fb27SDimitry Andric // If NodeCloneCount is 1: 236106c3fb27SDimitry Andric // Assign Clone to original Func 236206c3fb27SDimitry Andric // Continue 236306c3fb27SDimitry Andric // Create a new function clone 236406c3fb27SDimitry Andric // If other callers not assigned to call a function clone yet: 236506c3fb27SDimitry Andric // Assign them to call new function clone 236606c3fb27SDimitry Andric // Continue 236706c3fb27SDimitry Andric // Assign any other caller calling the cloned version to new clone 236806c3fb27SDimitry Andric // 236906c3fb27SDimitry Andric // For each caller of Clone: 237006c3fb27SDimitry Andric // If caller is assigned to call a specific function clone: 237106c3fb27SDimitry Andric // If we cannot assign Clone to that function clone: 237206c3fb27SDimitry Andric // Create new callsite Clone NewClone 237306c3fb27SDimitry Andric // Add NewClone to ClonesWorklist 237406c3fb27SDimitry Andric // Continue 237506c3fb27SDimitry Andric // Assign Clone to existing caller's called function clone 237606c3fb27SDimitry Andric // Else: 237706c3fb27SDimitry Andric // If Clone not already assigned to a function clone: 237806c3fb27SDimitry Andric // Assign to first function clone without assignment 237906c3fb27SDimitry Andric // Assign caller to selected function clone 238006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 238106c3fb27SDimitry Andric bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { 238206c3fb27SDimitry Andric bool Changed = false; 238306c3fb27SDimitry Andric 238406c3fb27SDimitry Andric // Keep track of the assignment of nodes (callsites) to function clones they 238506c3fb27SDimitry Andric // call. 238606c3fb27SDimitry Andric DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap; 238706c3fb27SDimitry Andric 238806c3fb27SDimitry Andric // Update caller node to call function version CalleeFunc, by recording the 238906c3fb27SDimitry Andric // assignment in CallsiteToCalleeFuncCloneMap. 239006c3fb27SDimitry Andric auto RecordCalleeFuncOfCallsite = [&](ContextNode *Caller, 239106c3fb27SDimitry Andric const FuncInfo &CalleeFunc) { 239206c3fb27SDimitry Andric assert(Caller->hasCall()); 239306c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[Caller] = CalleeFunc; 239406c3fb27SDimitry Andric }; 239506c3fb27SDimitry Andric 239606c3fb27SDimitry Andric // Walk all functions for which we saw calls with memprof metadata, and handle 239706c3fb27SDimitry Andric // cloning for each of its calls. 239806c3fb27SDimitry Andric for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) { 239906c3fb27SDimitry Andric FuncInfo OrigFunc(Func); 240006c3fb27SDimitry Andric // Map from each clone of OrigFunc to a map of remappings of each call of 240106c3fb27SDimitry Andric // interest (from original uncloned call to the corresponding cloned call in 240206c3fb27SDimitry Andric // that function clone). 240306c3fb27SDimitry Andric std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap; 240406c3fb27SDimitry Andric for (auto &Call : CallsWithMetadata) { 240506c3fb27SDimitry Andric ContextNode *Node = getNodeForInst(Call); 240606c3fb27SDimitry Andric // Skip call if we do not have a node for it (all uses of its stack ids 240706c3fb27SDimitry Andric // were either on inlined chains or pruned from the MIBs), or if we did 240806c3fb27SDimitry Andric // not create any clones for it. 240906c3fb27SDimitry Andric if (!Node || Node->Clones.empty()) 241006c3fb27SDimitry Andric continue; 241106c3fb27SDimitry Andric assert(Node->hasCall() && 241206c3fb27SDimitry Andric "Not having a call should have prevented cloning"); 241306c3fb27SDimitry Andric 241406c3fb27SDimitry Andric // Track the assignment of function clones to clones of the current 241506c3fb27SDimitry Andric // callsite Node being handled. 241606c3fb27SDimitry Andric std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap; 241706c3fb27SDimitry Andric 241806c3fb27SDimitry Andric // Assign callsite version CallsiteClone to function version FuncClone, 241906c3fb27SDimitry Andric // and also assign (possibly cloned) Call to CallsiteClone. 242006c3fb27SDimitry Andric auto AssignCallsiteCloneToFuncClone = [&](const FuncInfo &FuncClone, 242106c3fb27SDimitry Andric CallInfo &Call, 242206c3fb27SDimitry Andric ContextNode *CallsiteClone, 242306c3fb27SDimitry Andric bool IsAlloc) { 242406c3fb27SDimitry Andric // Record the clone of callsite node assigned to this function clone. 242506c3fb27SDimitry Andric FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone; 242606c3fb27SDimitry Andric 242706c3fb27SDimitry Andric assert(FuncClonesToCallMap.count(FuncClone)); 242806c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone]; 242906c3fb27SDimitry Andric CallInfo CallClone(Call); 243006c3fb27SDimitry Andric if (CallMap.count(Call)) 243106c3fb27SDimitry Andric CallClone = CallMap[Call]; 243206c3fb27SDimitry Andric CallsiteClone->setCall(CallClone); 243306c3fb27SDimitry Andric }; 243406c3fb27SDimitry Andric 243506c3fb27SDimitry Andric // Keep track of the clones of callsite Node that need to be assigned to 243606c3fb27SDimitry Andric // function clones. This list may be expanded in the loop body below if we 243706c3fb27SDimitry Andric // find additional cloning is required. 243806c3fb27SDimitry Andric std::deque<ContextNode *> ClonesWorklist; 243906c3fb27SDimitry Andric // Ignore original Node if we moved all of its contexts to clones. 244006c3fb27SDimitry Andric if (!Node->ContextIds.empty()) 244106c3fb27SDimitry Andric ClonesWorklist.push_back(Node); 244206c3fb27SDimitry Andric ClonesWorklist.insert(ClonesWorklist.end(), Node->Clones.begin(), 244306c3fb27SDimitry Andric Node->Clones.end()); 244406c3fb27SDimitry Andric 244506c3fb27SDimitry Andric // Now walk through all of the clones of this callsite Node that we need, 244606c3fb27SDimitry Andric // and determine the assignment to a corresponding clone of the current 244706c3fb27SDimitry Andric // function (creating new function clones as needed). 244806c3fb27SDimitry Andric unsigned NodeCloneCount = 0; 244906c3fb27SDimitry Andric while (!ClonesWorklist.empty()) { 245006c3fb27SDimitry Andric ContextNode *Clone = ClonesWorklist.front(); 245106c3fb27SDimitry Andric ClonesWorklist.pop_front(); 245206c3fb27SDimitry Andric NodeCloneCount++; 245306c3fb27SDimitry Andric if (VerifyNodes) 245406c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Clone); 245506c3fb27SDimitry Andric 245606c3fb27SDimitry Andric // Need to create a new function clone if we have more callsite clones 245706c3fb27SDimitry Andric // than existing function clones, which would have been assigned to an 245806c3fb27SDimitry Andric // earlier clone in the list (we assign callsite clones to function 245906c3fb27SDimitry Andric // clones greedily). 246006c3fb27SDimitry Andric if (FuncClonesToCallMap.size() < NodeCloneCount) { 246106c3fb27SDimitry Andric // If this is the first callsite copy, assign to original function. 246206c3fb27SDimitry Andric if (NodeCloneCount == 1) { 246306c3fb27SDimitry Andric // Since FuncClonesToCallMap is empty in this case, no clones have 246406c3fb27SDimitry Andric // been created for this function yet, and no callers should have 246506c3fb27SDimitry Andric // been assigned a function clone for this callee node yet. 246606c3fb27SDimitry Andric assert(llvm::none_of( 246706c3fb27SDimitry Andric Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) { 246806c3fb27SDimitry Andric return CallsiteToCalleeFuncCloneMap.count(E->Caller); 246906c3fb27SDimitry Andric })); 247006c3fb27SDimitry Andric // Initialize with empty call map, assign Clone to original function 247106c3fb27SDimitry Andric // and its callers, and skip to the next clone. 247206c3fb27SDimitry Andric FuncClonesToCallMap[OrigFunc] = {}; 247306c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 247406c3fb27SDimitry Andric OrigFunc, Call, Clone, 247506c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 247606c3fb27SDimitry Andric for (auto &CE : Clone->CallerEdges) { 247706c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 247806c3fb27SDimitry Andric if (!CE->Caller->hasCall()) 247906c3fb27SDimitry Andric continue; 248006c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(CE->Caller, OrigFunc); 248106c3fb27SDimitry Andric } 248206c3fb27SDimitry Andric continue; 248306c3fb27SDimitry Andric } 248406c3fb27SDimitry Andric 248506c3fb27SDimitry Andric // First locate which copy of OrigFunc to clone again. If a caller 248606c3fb27SDimitry Andric // of this callsite clone was already assigned to call a particular 248706c3fb27SDimitry Andric // function clone, we need to redirect all of those callers to the 248806c3fb27SDimitry Andric // new function clone, and update their other callees within this 248906c3fb27SDimitry Andric // function. 249006c3fb27SDimitry Andric FuncInfo PreviousAssignedFuncClone; 249106c3fb27SDimitry Andric auto EI = llvm::find_if( 249206c3fb27SDimitry Andric Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) { 249306c3fb27SDimitry Andric return CallsiteToCalleeFuncCloneMap.count(E->Caller); 249406c3fb27SDimitry Andric }); 249506c3fb27SDimitry Andric bool CallerAssignedToCloneOfFunc = false; 249606c3fb27SDimitry Andric if (EI != Clone->CallerEdges.end()) { 249706c3fb27SDimitry Andric const std::shared_ptr<ContextEdge> &Edge = *EI; 249806c3fb27SDimitry Andric PreviousAssignedFuncClone = 249906c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[Edge->Caller]; 250006c3fb27SDimitry Andric CallerAssignedToCloneOfFunc = true; 250106c3fb27SDimitry Andric } 250206c3fb27SDimitry Andric 250306c3fb27SDimitry Andric // Clone function and save it along with the CallInfo map created 250406c3fb27SDimitry Andric // during cloning in the FuncClonesToCallMap. 250506c3fb27SDimitry Andric std::map<CallInfo, CallInfo> NewCallMap; 250606c3fb27SDimitry Andric unsigned CloneNo = FuncClonesToCallMap.size(); 250706c3fb27SDimitry Andric assert(CloneNo > 0 && "Clone 0 is the original function, which " 250806c3fb27SDimitry Andric "should already exist in the map"); 250906c3fb27SDimitry Andric FuncInfo NewFuncClone = cloneFunctionForCallsite( 251006c3fb27SDimitry Andric OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo); 251106c3fb27SDimitry Andric FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap)); 251206c3fb27SDimitry Andric FunctionClonesAnalysis++; 251306c3fb27SDimitry Andric Changed = true; 251406c3fb27SDimitry Andric 251506c3fb27SDimitry Andric // If no caller callsites were already assigned to a clone of this 251606c3fb27SDimitry Andric // function, we can simply assign this clone to the new func clone 251706c3fb27SDimitry Andric // and update all callers to it, then skip to the next clone. 251806c3fb27SDimitry Andric if (!CallerAssignedToCloneOfFunc) { 251906c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 252006c3fb27SDimitry Andric NewFuncClone, Call, Clone, 252106c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 252206c3fb27SDimitry Andric for (auto &CE : Clone->CallerEdges) { 252306c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 252406c3fb27SDimitry Andric if (!CE->Caller->hasCall()) 252506c3fb27SDimitry Andric continue; 252606c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone); 252706c3fb27SDimitry Andric } 252806c3fb27SDimitry Andric continue; 252906c3fb27SDimitry Andric } 253006c3fb27SDimitry Andric 253106c3fb27SDimitry Andric // We may need to do additional node cloning in this case. 253206c3fb27SDimitry Andric // Reset the CallsiteToCalleeFuncCloneMap entry for any callers 253306c3fb27SDimitry Andric // that were previously assigned to call PreviousAssignedFuncClone, 253406c3fb27SDimitry Andric // to record that they now call NewFuncClone. 253506c3fb27SDimitry Andric for (auto CE : Clone->CallerEdges) { 253606c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 253706c3fb27SDimitry Andric if (!CE->Caller->hasCall()) 253806c3fb27SDimitry Andric continue; 253906c3fb27SDimitry Andric 254006c3fb27SDimitry Andric if (!CallsiteToCalleeFuncCloneMap.count(CE->Caller) || 254106c3fb27SDimitry Andric // We subsequently fall through to later handling that 254206c3fb27SDimitry Andric // will perform any additional cloning required for 254306c3fb27SDimitry Andric // callers that were calling other function clones. 254406c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[CE->Caller] != 254506c3fb27SDimitry Andric PreviousAssignedFuncClone) 254606c3fb27SDimitry Andric continue; 254706c3fb27SDimitry Andric 254806c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone); 254906c3fb27SDimitry Andric 255006c3fb27SDimitry Andric // If we are cloning a function that was already assigned to some 255106c3fb27SDimitry Andric // callers, then essentially we are creating new callsite clones 255206c3fb27SDimitry Andric // of the other callsites in that function that are reached by those 255306c3fb27SDimitry Andric // callers. Clone the other callees of the current callsite's caller 255406c3fb27SDimitry Andric // that were already assigned to PreviousAssignedFuncClone 255506c3fb27SDimitry Andric // accordingly. This is important since we subsequently update the 255606c3fb27SDimitry Andric // calls from the nodes in the graph and their assignments to callee 255706c3fb27SDimitry Andric // functions recorded in CallsiteToCalleeFuncCloneMap. 255806c3fb27SDimitry Andric for (auto CalleeEdge : CE->Caller->CalleeEdges) { 255906c3fb27SDimitry Andric // Skip any that have been removed on an earlier iteration when 256006c3fb27SDimitry Andric // cleaning up newly None type callee edges. 256106c3fb27SDimitry Andric if (!CalleeEdge) 256206c3fb27SDimitry Andric continue; 256306c3fb27SDimitry Andric ContextNode *Callee = CalleeEdge->Callee; 256406c3fb27SDimitry Andric // Skip the current callsite, we are looking for other 256506c3fb27SDimitry Andric // callsites Caller calls, as well as any that does not have a 256606c3fb27SDimitry Andric // recorded callsite Call. 256706c3fb27SDimitry Andric if (Callee == Clone || !Callee->hasCall()) 256806c3fb27SDimitry Andric continue; 256906c3fb27SDimitry Andric ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge); 257006c3fb27SDimitry Andric removeNoneTypeCalleeEdges(NewClone); 257106c3fb27SDimitry Andric // Moving the edge may have resulted in some none type 257206c3fb27SDimitry Andric // callee edges on the original Callee. 257306c3fb27SDimitry Andric removeNoneTypeCalleeEdges(Callee); 257406c3fb27SDimitry Andric assert(NewClone->AllocTypes != (uint8_t)AllocationType::None); 257506c3fb27SDimitry Andric // If the Callee node was already assigned to call a specific 257606c3fb27SDimitry Andric // function version, make sure its new clone is assigned to call 257706c3fb27SDimitry Andric // that same function clone. 257806c3fb27SDimitry Andric if (CallsiteToCalleeFuncCloneMap.count(Callee)) 257906c3fb27SDimitry Andric RecordCalleeFuncOfCallsite( 258006c3fb27SDimitry Andric NewClone, CallsiteToCalleeFuncCloneMap[Callee]); 258106c3fb27SDimitry Andric // Update NewClone with the new Call clone of this callsite's Call 258206c3fb27SDimitry Andric // created for the new function clone created earlier. 258306c3fb27SDimitry Andric // Recall that we have already ensured when building the graph 258406c3fb27SDimitry Andric // that each caller can only call callsites within the same 258506c3fb27SDimitry Andric // function, so we are guaranteed that Callee Call is in the 258606c3fb27SDimitry Andric // current OrigFunc. 258706c3fb27SDimitry Andric // CallMap is set up as indexed by original Call at clone 0. 258806c3fb27SDimitry Andric CallInfo OrigCall(Callee->getOrigNode()->Call); 258906c3fb27SDimitry Andric OrigCall.setCloneNo(0); 259006c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap = 259106c3fb27SDimitry Andric FuncClonesToCallMap[NewFuncClone]; 259206c3fb27SDimitry Andric assert(CallMap.count(OrigCall)); 259306c3fb27SDimitry Andric CallInfo NewCall(CallMap[OrigCall]); 259406c3fb27SDimitry Andric assert(NewCall); 259506c3fb27SDimitry Andric NewClone->setCall(NewCall); 259606c3fb27SDimitry Andric } 259706c3fb27SDimitry Andric } 259806c3fb27SDimitry Andric // Fall through to handling below to perform the recording of the 259906c3fb27SDimitry Andric // function for this callsite clone. This enables handling of cases 260006c3fb27SDimitry Andric // where the callers were assigned to different clones of a function. 260106c3fb27SDimitry Andric } 260206c3fb27SDimitry Andric 260306c3fb27SDimitry Andric // See if we can use existing function clone. Walk through 260406c3fb27SDimitry Andric // all caller edges to see if any have already been assigned to 260506c3fb27SDimitry Andric // a clone of this callsite's function. If we can use it, do so. If not, 260606c3fb27SDimitry Andric // because that function clone is already assigned to a different clone 260706c3fb27SDimitry Andric // of this callsite, then we need to clone again. 260806c3fb27SDimitry Andric // Basically, this checking is needed to handle the case where different 260906c3fb27SDimitry Andric // caller functions/callsites may need versions of this function 261006c3fb27SDimitry Andric // containing different mixes of callsite clones across the different 261106c3fb27SDimitry Andric // callsites within the function. If that happens, we need to create 261206c3fb27SDimitry Andric // additional function clones to handle the various combinations. 261306c3fb27SDimitry Andric // 261406c3fb27SDimitry Andric // Keep track of any new clones of this callsite created by the 261506c3fb27SDimitry Andric // following loop, as well as any existing clone that we decided to 261606c3fb27SDimitry Andric // assign this clone to. 261706c3fb27SDimitry Andric std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap; 261806c3fb27SDimitry Andric FuncInfo FuncCloneAssignedToCurCallsiteClone; 261906c3fb27SDimitry Andric // We need to be able to remove Edge from CallerEdges, so need to adjust 262006c3fb27SDimitry Andric // iterator in the loop. 262106c3fb27SDimitry Andric for (auto EI = Clone->CallerEdges.begin(); 262206c3fb27SDimitry Andric EI != Clone->CallerEdges.end();) { 262306c3fb27SDimitry Andric auto Edge = *EI; 262406c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 262506c3fb27SDimitry Andric if (!Edge->Caller->hasCall()) { 262606c3fb27SDimitry Andric EI++; 262706c3fb27SDimitry Andric continue; 262806c3fb27SDimitry Andric } 262906c3fb27SDimitry Andric // If this caller already assigned to call a version of OrigFunc, need 263006c3fb27SDimitry Andric // to ensure we can assign this callsite clone to that function clone. 263106c3fb27SDimitry Andric if (CallsiteToCalleeFuncCloneMap.count(Edge->Caller)) { 263206c3fb27SDimitry Andric FuncInfo FuncCloneCalledByCaller = 263306c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[Edge->Caller]; 263406c3fb27SDimitry Andric // First we need to confirm that this function clone is available 263506c3fb27SDimitry Andric // for use by this callsite node clone. 263606c3fb27SDimitry Andric // 263706c3fb27SDimitry Andric // While FuncCloneToCurNodeCloneMap is built only for this Node and 263806c3fb27SDimitry Andric // its callsite clones, one of those callsite clones X could have 263906c3fb27SDimitry Andric // been assigned to the same function clone called by Edge's caller 264006c3fb27SDimitry Andric // - if Edge's caller calls another callsite within Node's original 264106c3fb27SDimitry Andric // function, and that callsite has another caller reaching clone X. 264206c3fb27SDimitry Andric // We need to clone Node again in this case. 264306c3fb27SDimitry Andric if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) && 264406c3fb27SDimitry Andric FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] != 264506c3fb27SDimitry Andric Clone) || 264606c3fb27SDimitry Andric // Detect when we have multiple callers of this callsite that 264706c3fb27SDimitry Andric // have already been assigned to specific, and different, clones 264806c3fb27SDimitry Andric // of OrigFunc (due to other unrelated callsites in Func they 264906c3fb27SDimitry Andric // reach via call contexts). Is this Clone of callsite Node 265006c3fb27SDimitry Andric // assigned to a different clone of OrigFunc? If so, clone Node 265106c3fb27SDimitry Andric // again. 265206c3fb27SDimitry Andric (FuncCloneAssignedToCurCallsiteClone && 265306c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone != 265406c3fb27SDimitry Andric FuncCloneCalledByCaller)) { 265506c3fb27SDimitry Andric // We need to use a different newly created callsite clone, in 265606c3fb27SDimitry Andric // order to assign it to another new function clone on a 265706c3fb27SDimitry Andric // subsequent iteration over the Clones array (adjusted below). 265806c3fb27SDimitry Andric // Note we specifically do not reset the 265906c3fb27SDimitry Andric // CallsiteToCalleeFuncCloneMap entry for this caller, so that 266006c3fb27SDimitry Andric // when this new clone is processed later we know which version of 266106c3fb27SDimitry Andric // the function to copy (so that other callsite clones we have 266206c3fb27SDimitry Andric // assigned to that function clone are properly cloned over). See 266306c3fb27SDimitry Andric // comments in the function cloning handling earlier. 266406c3fb27SDimitry Andric 266506c3fb27SDimitry Andric // Check if we already have cloned this callsite again while 266606c3fb27SDimitry Andric // walking through caller edges, for a caller calling the same 266706c3fb27SDimitry Andric // function clone. If so, we can move this edge to that new clone 266806c3fb27SDimitry Andric // rather than creating yet another new clone. 266906c3fb27SDimitry Andric if (FuncCloneToNewCallsiteCloneMap.count( 267006c3fb27SDimitry Andric FuncCloneCalledByCaller)) { 267106c3fb27SDimitry Andric ContextNode *NewClone = 267206c3fb27SDimitry Andric FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller]; 267306c3fb27SDimitry Andric moveEdgeToExistingCalleeClone(Edge, NewClone, &EI); 267406c3fb27SDimitry Andric // Cleanup any none type edges cloned over. 267506c3fb27SDimitry Andric removeNoneTypeCalleeEdges(NewClone); 267606c3fb27SDimitry Andric } else { 267706c3fb27SDimitry Andric // Create a new callsite clone. 267806c3fb27SDimitry Andric ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge, &EI); 267906c3fb27SDimitry Andric removeNoneTypeCalleeEdges(NewClone); 268006c3fb27SDimitry Andric FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] = 268106c3fb27SDimitry Andric NewClone; 268206c3fb27SDimitry Andric // Add to list of clones and process later. 268306c3fb27SDimitry Andric ClonesWorklist.push_back(NewClone); 268406c3fb27SDimitry Andric assert(EI == Clone->CallerEdges.end() || 268506c3fb27SDimitry Andric Clone->AllocTypes != (uint8_t)AllocationType::None); 268606c3fb27SDimitry Andric assert(NewClone->AllocTypes != (uint8_t)AllocationType::None); 268706c3fb27SDimitry Andric } 268806c3fb27SDimitry Andric // Moving the caller edge may have resulted in some none type 268906c3fb27SDimitry Andric // callee edges. 269006c3fb27SDimitry Andric removeNoneTypeCalleeEdges(Clone); 269106c3fb27SDimitry Andric // We will handle the newly created callsite clone in a subsequent 269206c3fb27SDimitry Andric // iteration over this Node's Clones. Continue here since we 269306c3fb27SDimitry Andric // already adjusted iterator EI while moving the edge. 269406c3fb27SDimitry Andric continue; 269506c3fb27SDimitry Andric } 269606c3fb27SDimitry Andric 269706c3fb27SDimitry Andric // Otherwise, we can use the function clone already assigned to this 269806c3fb27SDimitry Andric // caller. 269906c3fb27SDimitry Andric if (!FuncCloneAssignedToCurCallsiteClone) { 270006c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller; 270106c3fb27SDimitry Andric // Assign Clone to FuncCloneCalledByCaller 270206c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 270306c3fb27SDimitry Andric FuncCloneCalledByCaller, Call, Clone, 270406c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 270506c3fb27SDimitry Andric } else 270606c3fb27SDimitry Andric // Don't need to do anything - callsite is already calling this 270706c3fb27SDimitry Andric // function clone. 270806c3fb27SDimitry Andric assert(FuncCloneAssignedToCurCallsiteClone == 270906c3fb27SDimitry Andric FuncCloneCalledByCaller); 271006c3fb27SDimitry Andric 271106c3fb27SDimitry Andric } else { 271206c3fb27SDimitry Andric // We have not already assigned this caller to a version of 271306c3fb27SDimitry Andric // OrigFunc. Do the assignment now. 271406c3fb27SDimitry Andric 271506c3fb27SDimitry Andric // First check if we have already assigned this callsite clone to a 271606c3fb27SDimitry Andric // clone of OrigFunc for another caller during this iteration over 271706c3fb27SDimitry Andric // its caller edges. 271806c3fb27SDimitry Andric if (!FuncCloneAssignedToCurCallsiteClone) { 271906c3fb27SDimitry Andric // Find first function in FuncClonesToCallMap without an assigned 272006c3fb27SDimitry Andric // clone of this callsite Node. We should always have one 272106c3fb27SDimitry Andric // available at this point due to the earlier cloning when the 272206c3fb27SDimitry Andric // FuncClonesToCallMap size was smaller than the clone number. 272306c3fb27SDimitry Andric for (auto &CF : FuncClonesToCallMap) { 272406c3fb27SDimitry Andric if (!FuncCloneToCurNodeCloneMap.count(CF.first)) { 272506c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone = CF.first; 272606c3fb27SDimitry Andric break; 272706c3fb27SDimitry Andric } 272806c3fb27SDimitry Andric } 272906c3fb27SDimitry Andric assert(FuncCloneAssignedToCurCallsiteClone); 273006c3fb27SDimitry Andric // Assign Clone to FuncCloneAssignedToCurCallsiteClone 273106c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 273206c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone, Call, Clone, 273306c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 273406c3fb27SDimitry Andric } else 273506c3fb27SDimitry Andric assert(FuncCloneToCurNodeCloneMap 273606c3fb27SDimitry Andric [FuncCloneAssignedToCurCallsiteClone] == Clone); 273706c3fb27SDimitry Andric // Update callers to record function version called. 273806c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(Edge->Caller, 273906c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone); 274006c3fb27SDimitry Andric } 274106c3fb27SDimitry Andric 274206c3fb27SDimitry Andric EI++; 274306c3fb27SDimitry Andric } 274406c3fb27SDimitry Andric } 274506c3fb27SDimitry Andric if (VerifyCCG) { 274606c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node); 274706c3fb27SDimitry Andric for (const auto &PE : Node->CalleeEdges) 274806c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee); 274906c3fb27SDimitry Andric for (const auto &CE : Node->CallerEdges) 275006c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller); 275106c3fb27SDimitry Andric for (auto *Clone : Node->Clones) { 275206c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Clone); 275306c3fb27SDimitry Andric for (const auto &PE : Clone->CalleeEdges) 275406c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee); 275506c3fb27SDimitry Andric for (const auto &CE : Clone->CallerEdges) 275606c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller); 275706c3fb27SDimitry Andric } 275806c3fb27SDimitry Andric } 275906c3fb27SDimitry Andric } 276006c3fb27SDimitry Andric } 276106c3fb27SDimitry Andric 276206c3fb27SDimitry Andric auto UpdateCalls = [&](ContextNode *Node, 276306c3fb27SDimitry Andric DenseSet<const ContextNode *> &Visited, 276406c3fb27SDimitry Andric auto &&UpdateCalls) { 276506c3fb27SDimitry Andric auto Inserted = Visited.insert(Node); 276606c3fb27SDimitry Andric if (!Inserted.second) 276706c3fb27SDimitry Andric return; 276806c3fb27SDimitry Andric 276906c3fb27SDimitry Andric for (auto *Clone : Node->Clones) 277006c3fb27SDimitry Andric UpdateCalls(Clone, Visited, UpdateCalls); 277106c3fb27SDimitry Andric 277206c3fb27SDimitry Andric for (auto &Edge : Node->CallerEdges) 277306c3fb27SDimitry Andric UpdateCalls(Edge->Caller, Visited, UpdateCalls); 277406c3fb27SDimitry Andric 277506c3fb27SDimitry Andric // Skip if either no call to update, or if we ended up with no context ids 277606c3fb27SDimitry Andric // (we moved all edges onto other clones). 277706c3fb27SDimitry Andric if (!Node->hasCall() || Node->ContextIds.empty()) 277806c3fb27SDimitry Andric return; 277906c3fb27SDimitry Andric 278006c3fb27SDimitry Andric if (Node->IsAllocation) { 278106c3fb27SDimitry Andric updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes)); 278206c3fb27SDimitry Andric return; 278306c3fb27SDimitry Andric } 278406c3fb27SDimitry Andric 278506c3fb27SDimitry Andric if (!CallsiteToCalleeFuncCloneMap.count(Node)) 278606c3fb27SDimitry Andric return; 278706c3fb27SDimitry Andric 278806c3fb27SDimitry Andric auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node]; 278906c3fb27SDimitry Andric updateCall(Node->Call, CalleeFunc); 279006c3fb27SDimitry Andric }; 279106c3fb27SDimitry Andric 279206c3fb27SDimitry Andric // Performs DFS traversal starting from allocation nodes to update calls to 279306c3fb27SDimitry Andric // reflect cloning decisions recorded earlier. For regular LTO this will 279406c3fb27SDimitry Andric // update the actual calls in the IR to call the appropriate function clone 279506c3fb27SDimitry Andric // (and add attributes to allocation calls), whereas for ThinLTO the decisions 279606c3fb27SDimitry Andric // are recorded in the summary entries. 279706c3fb27SDimitry Andric DenseSet<const ContextNode *> Visited; 279806c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) 279906c3fb27SDimitry Andric UpdateCalls(Entry.second, Visited, UpdateCalls); 280006c3fb27SDimitry Andric 280106c3fb27SDimitry Andric return Changed; 280206c3fb27SDimitry Andric } 280306c3fb27SDimitry Andric 280406c3fb27SDimitry Andric static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones( 280506c3fb27SDimitry Andric Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, 280606c3fb27SDimitry Andric std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>> 280706c3fb27SDimitry Andric &FuncToAliasMap) { 280806c3fb27SDimitry Andric // The first "clone" is the original copy, we should only call this if we 280906c3fb27SDimitry Andric // needed to create new clones. 281006c3fb27SDimitry Andric assert(NumClones > 1); 281106c3fb27SDimitry Andric SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; 281206c3fb27SDimitry Andric VMaps.reserve(NumClones - 1); 281306c3fb27SDimitry Andric FunctionsClonedThinBackend++; 281406c3fb27SDimitry Andric for (unsigned I = 1; I < NumClones; I++) { 281506c3fb27SDimitry Andric VMaps.emplace_back(std::make_unique<ValueToValueMapTy>()); 281606c3fb27SDimitry Andric auto *NewF = CloneFunction(&F, *VMaps.back()); 281706c3fb27SDimitry Andric FunctionClonesThinBackend++; 281806c3fb27SDimitry Andric // Strip memprof and callsite metadata from clone as they are no longer 281906c3fb27SDimitry Andric // needed. 282006c3fb27SDimitry Andric for (auto &BB : *NewF) { 282106c3fb27SDimitry Andric for (auto &Inst : BB) { 282206c3fb27SDimitry Andric Inst.setMetadata(LLVMContext::MD_memprof, nullptr); 282306c3fb27SDimitry Andric Inst.setMetadata(LLVMContext::MD_callsite, nullptr); 282406c3fb27SDimitry Andric } 282506c3fb27SDimitry Andric } 282606c3fb27SDimitry Andric std::string Name = getMemProfFuncName(F.getName(), I); 282706c3fb27SDimitry Andric auto *PrevF = M.getFunction(Name); 282806c3fb27SDimitry Andric if (PrevF) { 282906c3fb27SDimitry Andric // We might have created this when adjusting callsite in another 283006c3fb27SDimitry Andric // function. It should be a declaration. 283106c3fb27SDimitry Andric assert(PrevF->isDeclaration()); 283206c3fb27SDimitry Andric NewF->takeName(PrevF); 283306c3fb27SDimitry Andric PrevF->replaceAllUsesWith(NewF); 283406c3fb27SDimitry Andric PrevF->eraseFromParent(); 283506c3fb27SDimitry Andric } else 283606c3fb27SDimitry Andric NewF->setName(Name); 283706c3fb27SDimitry Andric ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) 283806c3fb27SDimitry Andric << "created clone " << ore::NV("NewFunction", NewF)); 283906c3fb27SDimitry Andric 284006c3fb27SDimitry Andric // Now handle aliases to this function, and clone those as well. 284106c3fb27SDimitry Andric if (!FuncToAliasMap.count(&F)) 284206c3fb27SDimitry Andric continue; 284306c3fb27SDimitry Andric for (auto *A : FuncToAliasMap[&F]) { 284406c3fb27SDimitry Andric std::string Name = getMemProfFuncName(A->getName(), I); 284506c3fb27SDimitry Andric auto *PrevA = M.getNamedAlias(Name); 284606c3fb27SDimitry Andric auto *NewA = GlobalAlias::create(A->getValueType(), 284706c3fb27SDimitry Andric A->getType()->getPointerAddressSpace(), 284806c3fb27SDimitry Andric A->getLinkage(), Name, NewF); 284906c3fb27SDimitry Andric NewA->copyAttributesFrom(A); 285006c3fb27SDimitry Andric if (PrevA) { 285106c3fb27SDimitry Andric // We might have created this when adjusting callsite in another 285206c3fb27SDimitry Andric // function. It should be a declaration. 285306c3fb27SDimitry Andric assert(PrevA->isDeclaration()); 285406c3fb27SDimitry Andric NewA->takeName(PrevA); 285506c3fb27SDimitry Andric PrevA->replaceAllUsesWith(NewA); 285606c3fb27SDimitry Andric PrevA->eraseFromParent(); 285706c3fb27SDimitry Andric } 285806c3fb27SDimitry Andric } 285906c3fb27SDimitry Andric } 286006c3fb27SDimitry Andric return VMaps; 286106c3fb27SDimitry Andric } 286206c3fb27SDimitry Andric 286306c3fb27SDimitry Andric // Locate the summary for F. This is complicated by the fact that it might 286406c3fb27SDimitry Andric // have been internalized or promoted. 286506c3fb27SDimitry Andric static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, 286606c3fb27SDimitry Andric const ModuleSummaryIndex *ImportSummary) { 286706c3fb27SDimitry Andric // FIXME: Ideally we would retain the original GUID in some fashion on the 286806c3fb27SDimitry Andric // function (e.g. as metadata), but for now do our best to locate the 286906c3fb27SDimitry Andric // summary without that information. 287006c3fb27SDimitry Andric ValueInfo TheFnVI = ImportSummary->getValueInfo(F.getGUID()); 287106c3fb27SDimitry Andric if (!TheFnVI) 287206c3fb27SDimitry Andric // See if theFn was internalized, by checking index directly with 287306c3fb27SDimitry Andric // original name (this avoids the name adjustment done by getGUID() for 287406c3fb27SDimitry Andric // internal symbols). 287506c3fb27SDimitry Andric TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(F.getName())); 287606c3fb27SDimitry Andric if (TheFnVI) 287706c3fb27SDimitry Andric return TheFnVI; 287806c3fb27SDimitry Andric // Now query with the original name before any promotion was performed. 287906c3fb27SDimitry Andric StringRef OrigName = 288006c3fb27SDimitry Andric ModuleSummaryIndex::getOriginalNameBeforePromote(F.getName()); 288106c3fb27SDimitry Andric std::string OrigId = GlobalValue::getGlobalIdentifier( 288206c3fb27SDimitry Andric OrigName, GlobalValue::InternalLinkage, M.getSourceFileName()); 288306c3fb27SDimitry Andric TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(OrigId)); 288406c3fb27SDimitry Andric if (TheFnVI) 288506c3fb27SDimitry Andric return TheFnVI; 288606c3fb27SDimitry Andric // Could be a promoted local imported from another module. We need to pass 288706c3fb27SDimitry Andric // down more info here to find the original module id. For now, try with 288806c3fb27SDimitry Andric // the OrigName which might have been stored in the OidGuidMap in the 288906c3fb27SDimitry Andric // index. This would not work if there were same-named locals in multiple 289006c3fb27SDimitry Andric // modules, however. 289106c3fb27SDimitry Andric auto OrigGUID = 289206c3fb27SDimitry Andric ImportSummary->getGUIDFromOriginalID(GlobalValue::getGUID(OrigName)); 289306c3fb27SDimitry Andric if (OrigGUID) 289406c3fb27SDimitry Andric TheFnVI = ImportSummary->getValueInfo(OrigGUID); 289506c3fb27SDimitry Andric return TheFnVI; 289606c3fb27SDimitry Andric } 289706c3fb27SDimitry Andric 289806c3fb27SDimitry Andric bool MemProfContextDisambiguation::applyImport(Module &M) { 289906c3fb27SDimitry Andric assert(ImportSummary); 290006c3fb27SDimitry Andric bool Changed = false; 290106c3fb27SDimitry Andric 290206c3fb27SDimitry Andric auto IsMemProfClone = [](const Function &F) { 290306c3fb27SDimitry Andric return F.getName().contains(MemProfCloneSuffix); 290406c3fb27SDimitry Andric }; 290506c3fb27SDimitry Andric 290606c3fb27SDimitry Andric // We also need to clone any aliases that reference cloned functions, because 290706c3fb27SDimitry Andric // the modified callsites may invoke via the alias. Keep track of the aliases 290806c3fb27SDimitry Andric // for each function. 290906c3fb27SDimitry Andric std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>> 291006c3fb27SDimitry Andric FuncToAliasMap; 291106c3fb27SDimitry Andric for (auto &A : M.aliases()) { 291206c3fb27SDimitry Andric auto *Aliasee = A.getAliaseeObject(); 291306c3fb27SDimitry Andric if (auto *F = dyn_cast<Function>(Aliasee)) 291406c3fb27SDimitry Andric FuncToAliasMap[F].insert(&A); 291506c3fb27SDimitry Andric } 291606c3fb27SDimitry Andric 291706c3fb27SDimitry Andric for (auto &F : M) { 291806c3fb27SDimitry Andric if (F.isDeclaration() || IsMemProfClone(F)) 291906c3fb27SDimitry Andric continue; 292006c3fb27SDimitry Andric 292106c3fb27SDimitry Andric OptimizationRemarkEmitter ORE(&F); 292206c3fb27SDimitry Andric 292306c3fb27SDimitry Andric SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; 292406c3fb27SDimitry Andric bool ClonesCreated = false; 292506c3fb27SDimitry Andric unsigned NumClonesCreated = 0; 292606c3fb27SDimitry Andric auto CloneFuncIfNeeded = [&](unsigned NumClones) { 292706c3fb27SDimitry Andric // We should at least have version 0 which is the original copy. 292806c3fb27SDimitry Andric assert(NumClones > 0); 292906c3fb27SDimitry Andric // If only one copy needed use original. 293006c3fb27SDimitry Andric if (NumClones == 1) 293106c3fb27SDimitry Andric return; 293206c3fb27SDimitry Andric // If we already performed cloning of this function, confirm that the 293306c3fb27SDimitry Andric // requested number of clones matches (the thin link should ensure the 293406c3fb27SDimitry Andric // number of clones for each constituent callsite is consistent within 293506c3fb27SDimitry Andric // each function), before returning. 293606c3fb27SDimitry Andric if (ClonesCreated) { 293706c3fb27SDimitry Andric assert(NumClonesCreated == NumClones); 293806c3fb27SDimitry Andric return; 293906c3fb27SDimitry Andric } 294006c3fb27SDimitry Andric VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap); 294106c3fb27SDimitry Andric // The first "clone" is the original copy, which doesn't have a VMap. 294206c3fb27SDimitry Andric assert(VMaps.size() == NumClones - 1); 294306c3fb27SDimitry Andric Changed = true; 294406c3fb27SDimitry Andric ClonesCreated = true; 294506c3fb27SDimitry Andric NumClonesCreated = NumClones; 294606c3fb27SDimitry Andric }; 294706c3fb27SDimitry Andric 294806c3fb27SDimitry Andric // Locate the summary for F. 294906c3fb27SDimitry Andric ValueInfo TheFnVI = findValueInfoForFunc(F, M, ImportSummary); 295006c3fb27SDimitry Andric // If not found, this could be an imported local (see comment in 295106c3fb27SDimitry Andric // findValueInfoForFunc). Skip for now as it will be cloned in its original 295206c3fb27SDimitry Andric // module (where it would have been promoted to global scope so should 295306c3fb27SDimitry Andric // satisfy any reference in this module). 295406c3fb27SDimitry Andric if (!TheFnVI) 295506c3fb27SDimitry Andric continue; 295606c3fb27SDimitry Andric 295706c3fb27SDimitry Andric auto *GVSummary = 295806c3fb27SDimitry Andric ImportSummary->findSummaryInModule(TheFnVI, M.getModuleIdentifier()); 295906c3fb27SDimitry Andric if (!GVSummary) 296006c3fb27SDimitry Andric // Must have been imported, use the first summary (might be multiple if 296106c3fb27SDimitry Andric // this was a linkonce_odr). 296206c3fb27SDimitry Andric GVSummary = TheFnVI.getSummaryList().front().get(); 296306c3fb27SDimitry Andric 296406c3fb27SDimitry Andric // If this was an imported alias skip it as we won't have the function 296506c3fb27SDimitry Andric // summary, and it should be cloned in the original module. 296606c3fb27SDimitry Andric if (isa<AliasSummary>(GVSummary)) 296706c3fb27SDimitry Andric continue; 296806c3fb27SDimitry Andric 296906c3fb27SDimitry Andric auto *FS = cast<FunctionSummary>(GVSummary->getBaseObject()); 297006c3fb27SDimitry Andric 297106c3fb27SDimitry Andric if (FS->allocs().empty() && FS->callsites().empty()) 297206c3fb27SDimitry Andric continue; 297306c3fb27SDimitry Andric 297406c3fb27SDimitry Andric auto SI = FS->callsites().begin(); 297506c3fb27SDimitry Andric auto AI = FS->allocs().begin(); 297606c3fb27SDimitry Andric 297706c3fb27SDimitry Andric // Assume for now that the instructions are in the exact same order 297806c3fb27SDimitry Andric // as when the summary was created, but confirm this is correct by 297906c3fb27SDimitry Andric // matching the stack ids. 298006c3fb27SDimitry Andric for (auto &BB : F) { 298106c3fb27SDimitry Andric for (auto &I : BB) { 298206c3fb27SDimitry Andric auto *CB = dyn_cast<CallBase>(&I); 298306c3fb27SDimitry Andric // Same handling as when creating module summary. 298406c3fb27SDimitry Andric if (!mayHaveMemprofSummary(CB)) 298506c3fb27SDimitry Andric continue; 298606c3fb27SDimitry Andric 2987*5f757f3fSDimitry Andric auto *CalledValue = CB->getCalledOperand(); 2988*5f757f3fSDimitry Andric auto *CalledFunction = CB->getCalledFunction(); 2989*5f757f3fSDimitry Andric if (CalledValue && !CalledFunction) { 2990*5f757f3fSDimitry Andric CalledValue = CalledValue->stripPointerCasts(); 2991*5f757f3fSDimitry Andric // Stripping pointer casts can reveal a called function. 2992*5f757f3fSDimitry Andric CalledFunction = dyn_cast<Function>(CalledValue); 2993*5f757f3fSDimitry Andric } 2994*5f757f3fSDimitry Andric // Check if this is an alias to a function. If so, get the 2995*5f757f3fSDimitry Andric // called aliasee for the checks below. 2996*5f757f3fSDimitry Andric if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) { 2997*5f757f3fSDimitry Andric assert(!CalledFunction && 2998*5f757f3fSDimitry Andric "Expected null called function in callsite for alias"); 2999*5f757f3fSDimitry Andric CalledFunction = dyn_cast<Function>(GA->getAliaseeObject()); 3000*5f757f3fSDimitry Andric } 3001*5f757f3fSDimitry Andric 300206c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext( 300306c3fb27SDimitry Andric I.getMetadata(LLVMContext::MD_callsite)); 300406c3fb27SDimitry Andric auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof); 300506c3fb27SDimitry Andric 300606c3fb27SDimitry Andric // Include allocs that were already assigned a memprof function 300706c3fb27SDimitry Andric // attribute in the statistics. 300806c3fb27SDimitry Andric if (CB->getAttributes().hasFnAttr("memprof")) { 300906c3fb27SDimitry Andric assert(!MemProfMD); 301006c3fb27SDimitry Andric CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold" 301106c3fb27SDimitry Andric ? AllocTypeColdThinBackend++ 301206c3fb27SDimitry Andric : AllocTypeNotColdThinBackend++; 301306c3fb27SDimitry Andric OrigAllocsThinBackend++; 301406c3fb27SDimitry Andric AllocVersionsThinBackend++; 301506c3fb27SDimitry Andric if (!MaxAllocVersionsThinBackend) 301606c3fb27SDimitry Andric MaxAllocVersionsThinBackend = 1; 301706c3fb27SDimitry Andric // Remove any remaining callsite metadata and we can skip the rest of 301806c3fb27SDimitry Andric // the handling for this instruction, since no cloning needed. 301906c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_callsite, nullptr); 302006c3fb27SDimitry Andric continue; 302106c3fb27SDimitry Andric } 302206c3fb27SDimitry Andric 302306c3fb27SDimitry Andric if (MemProfMD) { 302406c3fb27SDimitry Andric // Consult the next alloc node. 302506c3fb27SDimitry Andric assert(AI != FS->allocs().end()); 302606c3fb27SDimitry Andric auto &AllocNode = *(AI++); 302706c3fb27SDimitry Andric 302806c3fb27SDimitry Andric // Sanity check that the MIB stack ids match between the summary and 302906c3fb27SDimitry Andric // instruction metadata. 303006c3fb27SDimitry Andric auto MIBIter = AllocNode.MIBs.begin(); 303106c3fb27SDimitry Andric for (auto &MDOp : MemProfMD->operands()) { 303206c3fb27SDimitry Andric assert(MIBIter != AllocNode.MIBs.end()); 303306c3fb27SDimitry Andric LLVM_ATTRIBUTE_UNUSED auto StackIdIndexIter = 303406c3fb27SDimitry Andric MIBIter->StackIdIndices.begin(); 303506c3fb27SDimitry Andric auto *MIBMD = cast<const MDNode>(MDOp); 303606c3fb27SDimitry Andric MDNode *StackMDNode = getMIBStackNode(MIBMD); 303706c3fb27SDimitry Andric assert(StackMDNode); 303806c3fb27SDimitry Andric SmallVector<unsigned> StackIdsFromMetadata; 303906c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> StackContext(StackMDNode); 304006c3fb27SDimitry Andric for (auto ContextIter = 304106c3fb27SDimitry Andric StackContext.beginAfterSharedPrefix(CallsiteContext); 304206c3fb27SDimitry Andric ContextIter != StackContext.end(); ++ContextIter) { 304306c3fb27SDimitry Andric // If this is a direct recursion, simply skip the duplicate 304406c3fb27SDimitry Andric // entries, to be consistent with how the summary ids were 304506c3fb27SDimitry Andric // generated during ModuleSummaryAnalysis. 304606c3fb27SDimitry Andric if (!StackIdsFromMetadata.empty() && 304706c3fb27SDimitry Andric StackIdsFromMetadata.back() == *ContextIter) 304806c3fb27SDimitry Andric continue; 304906c3fb27SDimitry Andric assert(StackIdIndexIter != MIBIter->StackIdIndices.end()); 305006c3fb27SDimitry Andric assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) == 305106c3fb27SDimitry Andric *ContextIter); 305206c3fb27SDimitry Andric StackIdIndexIter++; 305306c3fb27SDimitry Andric } 305406c3fb27SDimitry Andric MIBIter++; 305506c3fb27SDimitry Andric } 305606c3fb27SDimitry Andric 305706c3fb27SDimitry Andric // Perform cloning if not yet done. 305806c3fb27SDimitry Andric CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size()); 305906c3fb27SDimitry Andric 306006c3fb27SDimitry Andric OrigAllocsThinBackend++; 306106c3fb27SDimitry Andric AllocVersionsThinBackend += AllocNode.Versions.size(); 306206c3fb27SDimitry Andric if (MaxAllocVersionsThinBackend < AllocNode.Versions.size()) 306306c3fb27SDimitry Andric MaxAllocVersionsThinBackend = AllocNode.Versions.size(); 306406c3fb27SDimitry Andric 306506c3fb27SDimitry Andric // If there is only one version that means we didn't end up 306606c3fb27SDimitry Andric // considering this function for cloning, and in that case the alloc 306706c3fb27SDimitry Andric // will still be none type or should have gotten the default NotCold. 306806c3fb27SDimitry Andric // Skip that after calling clone helper since that does some sanity 306906c3fb27SDimitry Andric // checks that confirm we haven't decided yet that we need cloning. 307006c3fb27SDimitry Andric if (AllocNode.Versions.size() == 1) { 307106c3fb27SDimitry Andric assert((AllocationType)AllocNode.Versions[0] == 307206c3fb27SDimitry Andric AllocationType::NotCold || 307306c3fb27SDimitry Andric (AllocationType)AllocNode.Versions[0] == 307406c3fb27SDimitry Andric AllocationType::None); 307506c3fb27SDimitry Andric UnclonableAllocsThinBackend++; 307606c3fb27SDimitry Andric continue; 307706c3fb27SDimitry Andric } 307806c3fb27SDimitry Andric 307906c3fb27SDimitry Andric // All versions should have a singular allocation type. 308006c3fb27SDimitry Andric assert(llvm::none_of(AllocNode.Versions, [](uint8_t Type) { 308106c3fb27SDimitry Andric return Type == ((uint8_t)AllocationType::NotCold | 308206c3fb27SDimitry Andric (uint8_t)AllocationType::Cold); 308306c3fb27SDimitry Andric })); 308406c3fb27SDimitry Andric 308506c3fb27SDimitry Andric // Update the allocation types per the summary info. 308606c3fb27SDimitry Andric for (unsigned J = 0; J < AllocNode.Versions.size(); J++) { 308706c3fb27SDimitry Andric // Ignore any that didn't get an assigned allocation type. 308806c3fb27SDimitry Andric if (AllocNode.Versions[J] == (uint8_t)AllocationType::None) 308906c3fb27SDimitry Andric continue; 309006c3fb27SDimitry Andric AllocationType AllocTy = (AllocationType)AllocNode.Versions[J]; 309106c3fb27SDimitry Andric AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++ 309206c3fb27SDimitry Andric : AllocTypeNotColdThinBackend++; 309306c3fb27SDimitry Andric std::string AllocTypeString = getAllocTypeAttributeString(AllocTy); 309406c3fb27SDimitry Andric auto A = llvm::Attribute::get(F.getContext(), "memprof", 309506c3fb27SDimitry Andric AllocTypeString); 309606c3fb27SDimitry Andric CallBase *CBClone; 309706c3fb27SDimitry Andric // Copy 0 is the original function. 309806c3fb27SDimitry Andric if (!J) 309906c3fb27SDimitry Andric CBClone = CB; 310006c3fb27SDimitry Andric else 310106c3fb27SDimitry Andric // Since VMaps are only created for new clones, we index with 310206c3fb27SDimitry Andric // clone J-1 (J==0 is the original clone and does not have a VMaps 310306c3fb27SDimitry Andric // entry). 310406c3fb27SDimitry Andric CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); 310506c3fb27SDimitry Andric CBClone->addFnAttr(A); 310606c3fb27SDimitry Andric ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", CBClone) 310706c3fb27SDimitry Andric << ore::NV("AllocationCall", CBClone) << " in clone " 310806c3fb27SDimitry Andric << ore::NV("Caller", CBClone->getFunction()) 310906c3fb27SDimitry Andric << " marked with memprof allocation attribute " 311006c3fb27SDimitry Andric << ore::NV("Attribute", AllocTypeString)); 311106c3fb27SDimitry Andric } 311206c3fb27SDimitry Andric } else if (!CallsiteContext.empty()) { 311306c3fb27SDimitry Andric // Consult the next callsite node. 311406c3fb27SDimitry Andric assert(SI != FS->callsites().end()); 311506c3fb27SDimitry Andric auto &StackNode = *(SI++); 311606c3fb27SDimitry Andric 311706c3fb27SDimitry Andric #ifndef NDEBUG 311806c3fb27SDimitry Andric // Sanity check that the stack ids match between the summary and 311906c3fb27SDimitry Andric // instruction metadata. 312006c3fb27SDimitry Andric auto StackIdIndexIter = StackNode.StackIdIndices.begin(); 312106c3fb27SDimitry Andric for (auto StackId : CallsiteContext) { 312206c3fb27SDimitry Andric assert(StackIdIndexIter != StackNode.StackIdIndices.end()); 312306c3fb27SDimitry Andric assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) == 312406c3fb27SDimitry Andric StackId); 312506c3fb27SDimitry Andric StackIdIndexIter++; 312606c3fb27SDimitry Andric } 312706c3fb27SDimitry Andric #endif 312806c3fb27SDimitry Andric 312906c3fb27SDimitry Andric // Perform cloning if not yet done. 313006c3fb27SDimitry Andric CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size()); 313106c3fb27SDimitry Andric 313206c3fb27SDimitry Andric // Should have skipped indirect calls via mayHaveMemprofSummary. 3133*5f757f3fSDimitry Andric assert(CalledFunction); 3134*5f757f3fSDimitry Andric assert(!IsMemProfClone(*CalledFunction)); 313506c3fb27SDimitry Andric 313606c3fb27SDimitry Andric // Update the calls per the summary info. 313706c3fb27SDimitry Andric // Save orig name since it gets updated in the first iteration 313806c3fb27SDimitry Andric // below. 3139*5f757f3fSDimitry Andric auto CalleeOrigName = CalledFunction->getName(); 314006c3fb27SDimitry Andric for (unsigned J = 0; J < StackNode.Clones.size(); J++) { 314106c3fb27SDimitry Andric // Do nothing if this version calls the original version of its 314206c3fb27SDimitry Andric // callee. 314306c3fb27SDimitry Andric if (!StackNode.Clones[J]) 314406c3fb27SDimitry Andric continue; 314506c3fb27SDimitry Andric auto NewF = M.getOrInsertFunction( 314606c3fb27SDimitry Andric getMemProfFuncName(CalleeOrigName, StackNode.Clones[J]), 3147*5f757f3fSDimitry Andric CalledFunction->getFunctionType()); 314806c3fb27SDimitry Andric CallBase *CBClone; 314906c3fb27SDimitry Andric // Copy 0 is the original function. 315006c3fb27SDimitry Andric if (!J) 315106c3fb27SDimitry Andric CBClone = CB; 315206c3fb27SDimitry Andric else 315306c3fb27SDimitry Andric CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); 315406c3fb27SDimitry Andric CBClone->setCalledFunction(NewF); 315506c3fb27SDimitry Andric ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone) 315606c3fb27SDimitry Andric << ore::NV("Call", CBClone) << " in clone " 315706c3fb27SDimitry Andric << ore::NV("Caller", CBClone->getFunction()) 315806c3fb27SDimitry Andric << " assigned to call function clone " 315906c3fb27SDimitry Andric << ore::NV("Callee", NewF.getCallee())); 316006c3fb27SDimitry Andric } 316106c3fb27SDimitry Andric } 316206c3fb27SDimitry Andric // Memprof and callsite metadata on memory allocations no longer needed. 316306c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_memprof, nullptr); 316406c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_callsite, nullptr); 316506c3fb27SDimitry Andric } 316606c3fb27SDimitry Andric } 316706c3fb27SDimitry Andric } 316806c3fb27SDimitry Andric 316906c3fb27SDimitry Andric return Changed; 317006c3fb27SDimitry Andric } 317106c3fb27SDimitry Andric 317206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 317306c3fb27SDimitry Andric bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() { 317406c3fb27SDimitry Andric if (DumpCCG) { 317506c3fb27SDimitry Andric dbgs() << "CCG before cloning:\n"; 317606c3fb27SDimitry Andric dbgs() << *this; 317706c3fb27SDimitry Andric } 317806c3fb27SDimitry Andric if (ExportToDot) 317906c3fb27SDimitry Andric exportToDot("postbuild"); 318006c3fb27SDimitry Andric 318106c3fb27SDimitry Andric if (VerifyCCG) { 318206c3fb27SDimitry Andric check(); 318306c3fb27SDimitry Andric } 318406c3fb27SDimitry Andric 318506c3fb27SDimitry Andric identifyClones(); 318606c3fb27SDimitry Andric 318706c3fb27SDimitry Andric if (VerifyCCG) { 318806c3fb27SDimitry Andric check(); 318906c3fb27SDimitry Andric } 319006c3fb27SDimitry Andric 319106c3fb27SDimitry Andric if (DumpCCG) { 319206c3fb27SDimitry Andric dbgs() << "CCG after cloning:\n"; 319306c3fb27SDimitry Andric dbgs() << *this; 319406c3fb27SDimitry Andric } 319506c3fb27SDimitry Andric if (ExportToDot) 319606c3fb27SDimitry Andric exportToDot("cloned"); 319706c3fb27SDimitry Andric 319806c3fb27SDimitry Andric bool Changed = assignFunctions(); 319906c3fb27SDimitry Andric 320006c3fb27SDimitry Andric if (DumpCCG) { 320106c3fb27SDimitry Andric dbgs() << "CCG after assigning function clones:\n"; 320206c3fb27SDimitry Andric dbgs() << *this; 320306c3fb27SDimitry Andric } 320406c3fb27SDimitry Andric if (ExportToDot) 320506c3fb27SDimitry Andric exportToDot("clonefuncassign"); 320606c3fb27SDimitry Andric 320706c3fb27SDimitry Andric return Changed; 320806c3fb27SDimitry Andric } 320906c3fb27SDimitry Andric 321006c3fb27SDimitry Andric bool MemProfContextDisambiguation::processModule( 321106c3fb27SDimitry Andric Module &M, 321206c3fb27SDimitry Andric function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) { 321306c3fb27SDimitry Andric 321406c3fb27SDimitry Andric // If we have an import summary, then the cloning decisions were made during 321506c3fb27SDimitry Andric // the thin link on the index. Apply them and return. 321606c3fb27SDimitry Andric if (ImportSummary) 321706c3fb27SDimitry Andric return applyImport(M); 321806c3fb27SDimitry Andric 321906c3fb27SDimitry Andric // TODO: If/when other types of memprof cloning are enabled beyond just for 322006c3fb27SDimitry Andric // hot and cold, we will need to change this to individually control the 322106c3fb27SDimitry Andric // AllocationType passed to addStackNodesForMIB during CCG construction. 322206c3fb27SDimitry Andric // Note that we specifically check this after applying imports above, so that 322306c3fb27SDimitry Andric // the option isn't needed to be passed to distributed ThinLTO backend 322406c3fb27SDimitry Andric // clang processes, which won't necessarily have visibility into the linker 322506c3fb27SDimitry Andric // dependences. Instead the information is communicated from the LTO link to 322606c3fb27SDimitry Andric // the backends via the combined summary index. 322706c3fb27SDimitry Andric if (!SupportsHotColdNew) 322806c3fb27SDimitry Andric return false; 322906c3fb27SDimitry Andric 323006c3fb27SDimitry Andric ModuleCallsiteContextGraph CCG(M, OREGetter); 323106c3fb27SDimitry Andric return CCG.process(); 323206c3fb27SDimitry Andric } 323306c3fb27SDimitry Andric 323406c3fb27SDimitry Andric MemProfContextDisambiguation::MemProfContextDisambiguation( 323506c3fb27SDimitry Andric const ModuleSummaryIndex *Summary) 323606c3fb27SDimitry Andric : ImportSummary(Summary) { 323706c3fb27SDimitry Andric if (ImportSummary) { 323806c3fb27SDimitry Andric // The MemProfImportSummary should only be used for testing ThinLTO 323906c3fb27SDimitry Andric // distributed backend handling via opt, in which case we don't have a 324006c3fb27SDimitry Andric // summary from the pass pipeline. 324106c3fb27SDimitry Andric assert(MemProfImportSummary.empty()); 324206c3fb27SDimitry Andric return; 324306c3fb27SDimitry Andric } 324406c3fb27SDimitry Andric if (MemProfImportSummary.empty()) 324506c3fb27SDimitry Andric return; 324606c3fb27SDimitry Andric 324706c3fb27SDimitry Andric auto ReadSummaryFile = 324806c3fb27SDimitry Andric errorOrToExpected(MemoryBuffer::getFile(MemProfImportSummary)); 324906c3fb27SDimitry Andric if (!ReadSummaryFile) { 325006c3fb27SDimitry Andric logAllUnhandledErrors(ReadSummaryFile.takeError(), errs(), 325106c3fb27SDimitry Andric "Error loading file '" + MemProfImportSummary + 325206c3fb27SDimitry Andric "': "); 325306c3fb27SDimitry Andric return; 325406c3fb27SDimitry Andric } 325506c3fb27SDimitry Andric auto ImportSummaryForTestingOrErr = getModuleSummaryIndex(**ReadSummaryFile); 325606c3fb27SDimitry Andric if (!ImportSummaryForTestingOrErr) { 325706c3fb27SDimitry Andric logAllUnhandledErrors(ImportSummaryForTestingOrErr.takeError(), errs(), 325806c3fb27SDimitry Andric "Error parsing file '" + MemProfImportSummary + 325906c3fb27SDimitry Andric "': "); 326006c3fb27SDimitry Andric return; 326106c3fb27SDimitry Andric } 326206c3fb27SDimitry Andric ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr); 326306c3fb27SDimitry Andric ImportSummary = ImportSummaryForTesting.get(); 326406c3fb27SDimitry Andric } 326506c3fb27SDimitry Andric 326606c3fb27SDimitry Andric PreservedAnalyses MemProfContextDisambiguation::run(Module &M, 326706c3fb27SDimitry Andric ModuleAnalysisManager &AM) { 326806c3fb27SDimitry Andric auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 326906c3fb27SDimitry Andric auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & { 327006c3fb27SDimitry Andric return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); 327106c3fb27SDimitry Andric }; 327206c3fb27SDimitry Andric if (!processModule(M, OREGetter)) 327306c3fb27SDimitry Andric return PreservedAnalyses::all(); 327406c3fb27SDimitry Andric return PreservedAnalyses::none(); 327506c3fb27SDimitry Andric } 327606c3fb27SDimitry Andric 327706c3fb27SDimitry Andric void MemProfContextDisambiguation::run( 327806c3fb27SDimitry Andric ModuleSummaryIndex &Index, 327906c3fb27SDimitry Andric function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 328006c3fb27SDimitry Andric isPrevailing) { 328106c3fb27SDimitry Andric // TODO: If/when other types of memprof cloning are enabled beyond just for 328206c3fb27SDimitry Andric // hot and cold, we will need to change this to individually control the 328306c3fb27SDimitry Andric // AllocationType passed to addStackNodesForMIB during CCG construction. 328406c3fb27SDimitry Andric // The index was set from the option, so these should be in sync. 328506c3fb27SDimitry Andric assert(Index.withSupportsHotColdNew() == SupportsHotColdNew); 328606c3fb27SDimitry Andric if (!SupportsHotColdNew) 328706c3fb27SDimitry Andric return; 328806c3fb27SDimitry Andric 328906c3fb27SDimitry Andric IndexCallsiteContextGraph CCG(Index, isPrevailing); 329006c3fb27SDimitry Andric CCG.process(); 329106c3fb27SDimitry Andric } 3292