106c3fb27SDimitry Andric //==-- MemProfContextDisambiguation.cpp - Disambiguate contexts -------------=// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric // 906c3fb27SDimitry Andric // This file implements support for context disambiguation of allocation 1006c3fb27SDimitry Andric // calls for profile guided heap optimization. Specifically, it uses Memprof 1106c3fb27SDimitry Andric // profiles which indicate context specific allocation behavior (currently 1206c3fb27SDimitry Andric // distinguishing cold vs hot memory allocations). Cloning is performed to 1306c3fb27SDimitry Andric // expose the cold allocation call contexts, and the allocation calls are 1406c3fb27SDimitry Andric // subsequently annotated with an attribute for later transformation. 1506c3fb27SDimitry Andric // 1606c3fb27SDimitry Andric // The transformations can be performed either directly on IR (regular LTO), or 1706c3fb27SDimitry Andric // on a ThinLTO index (and later applied to the IR during the ThinLTO backend). 1806c3fb27SDimitry Andric // Both types of LTO operate on a the same base graph representation, which 1906c3fb27SDimitry Andric // uses CRTP to support either IR or Index formats. 2006c3fb27SDimitry Andric // 2106c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 2206c3fb27SDimitry Andric 2306c3fb27SDimitry Andric #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" 2406c3fb27SDimitry Andric #include "llvm/ADT/DenseMap.h" 2506c3fb27SDimitry Andric #include "llvm/ADT/DenseSet.h" 2606c3fb27SDimitry Andric #include "llvm/ADT/MapVector.h" 2706c3fb27SDimitry Andric #include "llvm/ADT/SetOperations.h" 2806c3fb27SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 2906c3fb27SDimitry Andric #include "llvm/ADT/SmallSet.h" 3006c3fb27SDimitry Andric #include "llvm/ADT/SmallVector.h" 3106c3fb27SDimitry Andric #include "llvm/ADT/Statistic.h" 3206c3fb27SDimitry Andric #include "llvm/Analysis/MemoryProfileInfo.h" 3306c3fb27SDimitry Andric #include "llvm/Analysis/ModuleSummaryAnalysis.h" 3406c3fb27SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 3506c3fb27SDimitry Andric #include "llvm/Bitcode/BitcodeReader.h" 3606c3fb27SDimitry Andric #include "llvm/IR/Constants.h" 3706c3fb27SDimitry Andric #include "llvm/IR/Instructions.h" 3806c3fb27SDimitry Andric #include "llvm/IR/Module.h" 3906c3fb27SDimitry Andric #include "llvm/IR/ModuleSummaryIndex.h" 4006c3fb27SDimitry Andric #include "llvm/Pass.h" 4106c3fb27SDimitry Andric #include "llvm/Support/CommandLine.h" 4206c3fb27SDimitry Andric #include "llvm/Support/FileSystem.h" 4306c3fb27SDimitry Andric #include "llvm/Support/GraphWriter.h" 4406c3fb27SDimitry Andric #include "llvm/Support/raw_ostream.h" 4506c3fb27SDimitry Andric #include "llvm/Transforms/IPO.h" 4606c3fb27SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h" 47*0fca6ea1SDimitry Andric #include <deque> 4806c3fb27SDimitry Andric #include <sstream> 497a6dacacSDimitry Andric #include <unordered_map> 5006c3fb27SDimitry Andric #include <vector> 5106c3fb27SDimitry Andric using namespace llvm; 5206c3fb27SDimitry Andric using namespace llvm::memprof; 5306c3fb27SDimitry Andric 5406c3fb27SDimitry Andric #define DEBUG_TYPE "memprof-context-disambiguation" 5506c3fb27SDimitry Andric 5606c3fb27SDimitry Andric STATISTIC(FunctionClonesAnalysis, 5706c3fb27SDimitry Andric "Number of function clones created during whole program analysis"); 5806c3fb27SDimitry Andric STATISTIC(FunctionClonesThinBackend, 5906c3fb27SDimitry Andric "Number of function clones created during ThinLTO backend"); 6006c3fb27SDimitry Andric STATISTIC(FunctionsClonedThinBackend, 6106c3fb27SDimitry Andric "Number of functions that had clones created during ThinLTO backend"); 6206c3fb27SDimitry Andric STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly " 6306c3fb27SDimitry Andric "cloned) during whole program analysis"); 6406c3fb27SDimitry Andric STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) " 6506c3fb27SDimitry Andric "during whole program analysis"); 6606c3fb27SDimitry Andric STATISTIC(AllocTypeNotColdThinBackend, 6706c3fb27SDimitry Andric "Number of not cold static allocations (possibly cloned) during " 6806c3fb27SDimitry Andric "ThinLTO backend"); 6906c3fb27SDimitry Andric STATISTIC(AllocTypeColdThinBackend, "Number of cold static allocations " 7006c3fb27SDimitry Andric "(possibly cloned) during ThinLTO backend"); 7106c3fb27SDimitry Andric STATISTIC(OrigAllocsThinBackend, 7206c3fb27SDimitry Andric "Number of original (not cloned) allocations with memprof profiles " 7306c3fb27SDimitry Andric "during ThinLTO backend"); 7406c3fb27SDimitry Andric STATISTIC( 7506c3fb27SDimitry Andric AllocVersionsThinBackend, 7606c3fb27SDimitry Andric "Number of allocation versions (including clones) during ThinLTO backend"); 7706c3fb27SDimitry Andric STATISTIC(MaxAllocVersionsThinBackend, 7806c3fb27SDimitry Andric "Maximum number of allocation versions created for an original " 7906c3fb27SDimitry Andric "allocation during ThinLTO backend"); 8006c3fb27SDimitry Andric STATISTIC(UnclonableAllocsThinBackend, 8106c3fb27SDimitry Andric "Number of unclonable ambigous allocations during ThinLTO backend"); 82297eecfbSDimitry Andric STATISTIC(RemovedEdgesWithMismatchedCallees, 83297eecfbSDimitry Andric "Number of edges removed due to mismatched callees (profiled vs IR)"); 84297eecfbSDimitry Andric STATISTIC(FoundProfiledCalleeCount, 85297eecfbSDimitry Andric "Number of profiled callees found via tail calls"); 86297eecfbSDimitry Andric STATISTIC(FoundProfiledCalleeDepth, 87297eecfbSDimitry Andric "Aggregate depth of profiled callees found via tail calls"); 88297eecfbSDimitry Andric STATISTIC(FoundProfiledCalleeMaxDepth, 89297eecfbSDimitry Andric "Maximum depth of profiled callees found via tail calls"); 90297eecfbSDimitry Andric STATISTIC(FoundProfiledCalleeNonUniquelyCount, 91297eecfbSDimitry Andric "Number of profiled callees found via multiple tail call chains"); 9206c3fb27SDimitry Andric 9306c3fb27SDimitry Andric static cl::opt<std::string> DotFilePathPrefix( 9406c3fb27SDimitry Andric "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, 9506c3fb27SDimitry Andric cl::value_desc("filename"), 9606c3fb27SDimitry Andric cl::desc("Specify the path prefix of the MemProf dot files.")); 9706c3fb27SDimitry Andric 9806c3fb27SDimitry Andric static cl::opt<bool> ExportToDot("memprof-export-to-dot", cl::init(false), 9906c3fb27SDimitry Andric cl::Hidden, 10006c3fb27SDimitry Andric cl::desc("Export graph to dot files.")); 10106c3fb27SDimitry Andric 10206c3fb27SDimitry Andric static cl::opt<bool> 10306c3fb27SDimitry Andric DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, 10406c3fb27SDimitry Andric cl::desc("Dump CallingContextGraph to stdout after each stage.")); 10506c3fb27SDimitry Andric 10606c3fb27SDimitry Andric static cl::opt<bool> 10706c3fb27SDimitry Andric VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, 10806c3fb27SDimitry Andric cl::desc("Perform verification checks on CallingContextGraph.")); 10906c3fb27SDimitry Andric 11006c3fb27SDimitry Andric static cl::opt<bool> 11106c3fb27SDimitry Andric VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, 11206c3fb27SDimitry Andric cl::desc("Perform frequent verification checks on nodes.")); 11306c3fb27SDimitry Andric 11406c3fb27SDimitry Andric static cl::opt<std::string> MemProfImportSummary( 11506c3fb27SDimitry Andric "memprof-import-summary", 11606c3fb27SDimitry Andric cl::desc("Import summary to use for testing the ThinLTO backend via opt"), 11706c3fb27SDimitry Andric cl::Hidden); 11806c3fb27SDimitry Andric 119297eecfbSDimitry Andric static cl::opt<unsigned> 120297eecfbSDimitry Andric TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), 121297eecfbSDimitry Andric cl::Hidden, 122297eecfbSDimitry Andric cl::desc("Max depth to recursively search for missing " 123297eecfbSDimitry Andric "frames through tail calls.")); 124297eecfbSDimitry Andric 1255f757f3fSDimitry Andric namespace llvm { 126*0fca6ea1SDimitry Andric cl::opt<bool> EnableMemProfContextDisambiguation( 127*0fca6ea1SDimitry Andric "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden, 128*0fca6ea1SDimitry Andric cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation")); 129*0fca6ea1SDimitry Andric 13006c3fb27SDimitry Andric // Indicate we are linking with an allocator that supports hot/cold operator 13106c3fb27SDimitry Andric // new interfaces. 13206c3fb27SDimitry Andric cl::opt<bool> SupportsHotColdNew( 13306c3fb27SDimitry Andric "supports-hot-cold-new", cl::init(false), cl::Hidden, 13406c3fb27SDimitry Andric cl::desc("Linking with hot/cold operator new interfaces")); 1355f757f3fSDimitry Andric } // namespace llvm 13606c3fb27SDimitry Andric 137*0fca6ea1SDimitry Andric extern cl::opt<bool> MemProfReportHintedSizes; 138*0fca6ea1SDimitry Andric 13906c3fb27SDimitry Andric namespace { 14006c3fb27SDimitry Andric /// CRTP base for graphs built from either IR or ThinLTO summary index. 14106c3fb27SDimitry Andric /// 14206c3fb27SDimitry Andric /// The graph represents the call contexts in all memprof metadata on allocation 14306c3fb27SDimitry Andric /// calls, with nodes for the allocations themselves, as well as for the calls 14406c3fb27SDimitry Andric /// in each context. The graph is initially built from the allocation memprof 14506c3fb27SDimitry Andric /// metadata (or summary) MIBs. It is then updated to match calls with callsite 14606c3fb27SDimitry Andric /// metadata onto the nodes, updating it to reflect any inlining performed on 14706c3fb27SDimitry Andric /// those calls. 14806c3fb27SDimitry Andric /// 14906c3fb27SDimitry Andric /// Each MIB (representing an allocation's call context with allocation 15006c3fb27SDimitry Andric /// behavior) is assigned a unique context id during the graph build. The edges 15106c3fb27SDimitry Andric /// and nodes in the graph are decorated with the context ids they carry. This 15206c3fb27SDimitry Andric /// is used to correctly update the graph when cloning is performed so that we 15306c3fb27SDimitry Andric /// can uniquify the context for a single (possibly cloned) allocation. 15406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 15506c3fb27SDimitry Andric class CallsiteContextGraph { 15606c3fb27SDimitry Andric public: 15706c3fb27SDimitry Andric CallsiteContextGraph() = default; 15806c3fb27SDimitry Andric CallsiteContextGraph(const CallsiteContextGraph &) = default; 15906c3fb27SDimitry Andric CallsiteContextGraph(CallsiteContextGraph &&) = default; 16006c3fb27SDimitry Andric 16106c3fb27SDimitry Andric /// Main entry point to perform analysis and transformations on graph. 16206c3fb27SDimitry Andric bool process(); 16306c3fb27SDimitry Andric 16406c3fb27SDimitry Andric /// Perform cloning on the graph necessary to uniquely identify the allocation 16506c3fb27SDimitry Andric /// behavior of an allocation based on its context. 16606c3fb27SDimitry Andric void identifyClones(); 16706c3fb27SDimitry Andric 16806c3fb27SDimitry Andric /// Assign callsite clones to functions, cloning functions as needed to 16906c3fb27SDimitry Andric /// accommodate the combinations of their callsite clones reached by callers. 17006c3fb27SDimitry Andric /// For regular LTO this clones functions and callsites in the IR, but for 17106c3fb27SDimitry Andric /// ThinLTO the cloning decisions are noted in the summaries and later applied 17206c3fb27SDimitry Andric /// in applyImport. 17306c3fb27SDimitry Andric bool assignFunctions(); 17406c3fb27SDimitry Andric 17506c3fb27SDimitry Andric void dump() const; 17606c3fb27SDimitry Andric void print(raw_ostream &OS) const; 177*0fca6ea1SDimitry Andric void printTotalSizes(raw_ostream &OS) const; 17806c3fb27SDimitry Andric 17906c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, 18006c3fb27SDimitry Andric const CallsiteContextGraph &CCG) { 18106c3fb27SDimitry Andric CCG.print(OS); 18206c3fb27SDimitry Andric return OS; 18306c3fb27SDimitry Andric } 18406c3fb27SDimitry Andric 18506c3fb27SDimitry Andric friend struct GraphTraits< 18606c3fb27SDimitry Andric const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>; 18706c3fb27SDimitry Andric friend struct DOTGraphTraits< 18806c3fb27SDimitry Andric const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>; 18906c3fb27SDimitry Andric 19006c3fb27SDimitry Andric void exportToDot(std::string Label) const; 19106c3fb27SDimitry Andric 19206c3fb27SDimitry Andric /// Represents a function clone via FuncTy pointer and clone number pair. 19306c3fb27SDimitry Andric struct FuncInfo final 19406c3fb27SDimitry Andric : public std::pair<FuncTy *, unsigned /*Clone number*/> { 19506c3fb27SDimitry Andric using Base = std::pair<FuncTy *, unsigned>; 19606c3fb27SDimitry Andric FuncInfo(const Base &B) : Base(B) {} 19706c3fb27SDimitry Andric FuncInfo(FuncTy *F = nullptr, unsigned CloneNo = 0) : Base(F, CloneNo) {} 19806c3fb27SDimitry Andric explicit operator bool() const { return this->first != nullptr; } 19906c3fb27SDimitry Andric FuncTy *func() const { return this->first; } 20006c3fb27SDimitry Andric unsigned cloneNo() const { return this->second; } 20106c3fb27SDimitry Andric }; 20206c3fb27SDimitry Andric 20306c3fb27SDimitry Andric /// Represents a callsite clone via CallTy and clone number pair. 20406c3fb27SDimitry Andric struct CallInfo final : public std::pair<CallTy, unsigned /*Clone number*/> { 20506c3fb27SDimitry Andric using Base = std::pair<CallTy, unsigned>; 20606c3fb27SDimitry Andric CallInfo(const Base &B) : Base(B) {} 20706c3fb27SDimitry Andric CallInfo(CallTy Call = nullptr, unsigned CloneNo = 0) 20806c3fb27SDimitry Andric : Base(Call, CloneNo) {} 20906c3fb27SDimitry Andric explicit operator bool() const { return (bool)this->first; } 21006c3fb27SDimitry Andric CallTy call() const { return this->first; } 21106c3fb27SDimitry Andric unsigned cloneNo() const { return this->second; } 21206c3fb27SDimitry Andric void setCloneNo(unsigned N) { this->second = N; } 21306c3fb27SDimitry Andric void print(raw_ostream &OS) const { 21406c3fb27SDimitry Andric if (!operator bool()) { 21506c3fb27SDimitry Andric assert(!cloneNo()); 21606c3fb27SDimitry Andric OS << "null Call"; 21706c3fb27SDimitry Andric return; 21806c3fb27SDimitry Andric } 21906c3fb27SDimitry Andric call()->print(OS); 22006c3fb27SDimitry Andric OS << "\t(clone " << cloneNo() << ")"; 22106c3fb27SDimitry Andric } 22206c3fb27SDimitry Andric void dump() const { 22306c3fb27SDimitry Andric print(dbgs()); 22406c3fb27SDimitry Andric dbgs() << "\n"; 22506c3fb27SDimitry Andric } 22606c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, const CallInfo &Call) { 22706c3fb27SDimitry Andric Call.print(OS); 22806c3fb27SDimitry Andric return OS; 22906c3fb27SDimitry Andric } 23006c3fb27SDimitry Andric }; 23106c3fb27SDimitry Andric 23206c3fb27SDimitry Andric struct ContextEdge; 23306c3fb27SDimitry Andric 23406c3fb27SDimitry Andric /// Node in the Callsite Context Graph 23506c3fb27SDimitry Andric struct ContextNode { 23606c3fb27SDimitry Andric // Keep this for now since in the IR case where we have an Instruction* it 23706c3fb27SDimitry Andric // is not as immediately discoverable. Used for printing richer information 23806c3fb27SDimitry Andric // when dumping graph. 23906c3fb27SDimitry Andric bool IsAllocation; 24006c3fb27SDimitry Andric 24106c3fb27SDimitry Andric // Keeps track of when the Call was reset to null because there was 24206c3fb27SDimitry Andric // recursion. 24306c3fb27SDimitry Andric bool Recursive = false; 24406c3fb27SDimitry Andric 24506c3fb27SDimitry Andric // The corresponding allocation or interior call. 24606c3fb27SDimitry Andric CallInfo Call; 24706c3fb27SDimitry Andric 24806c3fb27SDimitry Andric // For alloc nodes this is a unique id assigned when constructed, and for 24906c3fb27SDimitry Andric // callsite stack nodes it is the original stack id when the node is 25006c3fb27SDimitry Andric // constructed from the memprof MIB metadata on the alloc nodes. Note that 25106c3fb27SDimitry Andric // this is only used when matching callsite metadata onto the stack nodes 25206c3fb27SDimitry Andric // created when processing the allocation memprof MIBs, and for labeling 25306c3fb27SDimitry Andric // nodes in the dot graph. Therefore we don't bother to assign a value for 25406c3fb27SDimitry Andric // clones. 25506c3fb27SDimitry Andric uint64_t OrigStackOrAllocId = 0; 25606c3fb27SDimitry Andric 25706c3fb27SDimitry Andric // This will be formed by ORing together the AllocationType enum values 25806c3fb27SDimitry Andric // for contexts including this node. 25906c3fb27SDimitry Andric uint8_t AllocTypes = 0; 26006c3fb27SDimitry Andric 26106c3fb27SDimitry Andric // Edges to all callees in the profiled call stacks. 26206c3fb27SDimitry Andric // TODO: Should this be a map (from Callee node) for more efficient lookup? 26306c3fb27SDimitry Andric std::vector<std::shared_ptr<ContextEdge>> CalleeEdges; 26406c3fb27SDimitry Andric 26506c3fb27SDimitry Andric // Edges to all callers in the profiled call stacks. 26606c3fb27SDimitry Andric // TODO: Should this be a map (from Caller node) for more efficient lookup? 26706c3fb27SDimitry Andric std::vector<std::shared_ptr<ContextEdge>> CallerEdges; 26806c3fb27SDimitry Andric 269*0fca6ea1SDimitry Andric // Get the list of edges from which we can compute allocation information 270*0fca6ea1SDimitry Andric // such as the context ids and allocation type of this node. 271*0fca6ea1SDimitry Andric const std::vector<std::shared_ptr<ContextEdge>> * 272*0fca6ea1SDimitry Andric getEdgesWithAllocInfo() const { 273*0fca6ea1SDimitry Andric // If node has any callees, compute from those, otherwise compute from 274*0fca6ea1SDimitry Andric // callers (i.e. if this is the leaf allocation node). 275*0fca6ea1SDimitry Andric if (!CalleeEdges.empty()) 276*0fca6ea1SDimitry Andric return &CalleeEdges; 277*0fca6ea1SDimitry Andric if (!CallerEdges.empty()) { 278*0fca6ea1SDimitry Andric // A node with caller edges but no callee edges must be the allocation 279*0fca6ea1SDimitry Andric // node. 280*0fca6ea1SDimitry Andric assert(IsAllocation); 281*0fca6ea1SDimitry Andric return &CallerEdges; 282*0fca6ea1SDimitry Andric } 283*0fca6ea1SDimitry Andric return nullptr; 284*0fca6ea1SDimitry Andric } 285*0fca6ea1SDimitry Andric 286*0fca6ea1SDimitry Andric // Compute the context ids for this node from the union of its edge context 287*0fca6ea1SDimitry Andric // ids. 288*0fca6ea1SDimitry Andric DenseSet<uint32_t> getContextIds() const { 28906c3fb27SDimitry Andric DenseSet<uint32_t> ContextIds; 290*0fca6ea1SDimitry Andric auto *Edges = getEdgesWithAllocInfo(); 291*0fca6ea1SDimitry Andric if (!Edges) 292*0fca6ea1SDimitry Andric return {}; 293*0fca6ea1SDimitry Andric unsigned Count = 0; 294*0fca6ea1SDimitry Andric for (auto &Edge : *Edges) 295*0fca6ea1SDimitry Andric Count += Edge->getContextIds().size(); 296*0fca6ea1SDimitry Andric ContextIds.reserve(Count); 297*0fca6ea1SDimitry Andric for (auto &Edge : *Edges) 298*0fca6ea1SDimitry Andric ContextIds.insert(Edge->getContextIds().begin(), 299*0fca6ea1SDimitry Andric Edge->getContextIds().end()); 300*0fca6ea1SDimitry Andric return ContextIds; 301*0fca6ea1SDimitry Andric } 302*0fca6ea1SDimitry Andric 303*0fca6ea1SDimitry Andric // Compute the allocation type for this node from the OR of its edge 304*0fca6ea1SDimitry Andric // allocation types. 305*0fca6ea1SDimitry Andric uint8_t computeAllocType() const { 306*0fca6ea1SDimitry Andric auto *Edges = getEdgesWithAllocInfo(); 307*0fca6ea1SDimitry Andric if (!Edges) 308*0fca6ea1SDimitry Andric return (uint8_t)AllocationType::None; 309*0fca6ea1SDimitry Andric uint8_t BothTypes = 310*0fca6ea1SDimitry Andric (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold; 311*0fca6ea1SDimitry Andric uint8_t AllocType = (uint8_t)AllocationType::None; 312*0fca6ea1SDimitry Andric for (auto &Edge : *Edges) { 313*0fca6ea1SDimitry Andric AllocType |= Edge->AllocTypes; 314*0fca6ea1SDimitry Andric // Bail early if alloc type reached both, no further refinement. 315*0fca6ea1SDimitry Andric if (AllocType == BothTypes) 316*0fca6ea1SDimitry Andric return AllocType; 317*0fca6ea1SDimitry Andric } 318*0fca6ea1SDimitry Andric return AllocType; 319*0fca6ea1SDimitry Andric } 320*0fca6ea1SDimitry Andric 321*0fca6ea1SDimitry Andric // The context ids set for this node is empty if its edge context ids are 322*0fca6ea1SDimitry Andric // also all empty. 323*0fca6ea1SDimitry Andric bool emptyContextIds() const { 324*0fca6ea1SDimitry Andric auto *Edges = getEdgesWithAllocInfo(); 325*0fca6ea1SDimitry Andric if (!Edges) 326*0fca6ea1SDimitry Andric return true; 327*0fca6ea1SDimitry Andric for (auto &Edge : *Edges) { 328*0fca6ea1SDimitry Andric if (!Edge->getContextIds().empty()) 329*0fca6ea1SDimitry Andric return false; 330*0fca6ea1SDimitry Andric } 331*0fca6ea1SDimitry Andric return true; 332*0fca6ea1SDimitry Andric } 33306c3fb27SDimitry Andric 33406c3fb27SDimitry Andric // List of clones of this ContextNode, initially empty. 33506c3fb27SDimitry Andric std::vector<ContextNode *> Clones; 33606c3fb27SDimitry Andric 33706c3fb27SDimitry Andric // If a clone, points to the original uncloned node. 33806c3fb27SDimitry Andric ContextNode *CloneOf = nullptr; 33906c3fb27SDimitry Andric 34006c3fb27SDimitry Andric ContextNode(bool IsAllocation) : IsAllocation(IsAllocation), Call() {} 34106c3fb27SDimitry Andric 34206c3fb27SDimitry Andric ContextNode(bool IsAllocation, CallInfo C) 34306c3fb27SDimitry Andric : IsAllocation(IsAllocation), Call(C) {} 34406c3fb27SDimitry Andric 34506c3fb27SDimitry Andric void addClone(ContextNode *Clone) { 34606c3fb27SDimitry Andric if (CloneOf) { 34706c3fb27SDimitry Andric CloneOf->Clones.push_back(Clone); 34806c3fb27SDimitry Andric Clone->CloneOf = CloneOf; 34906c3fb27SDimitry Andric } else { 35006c3fb27SDimitry Andric Clones.push_back(Clone); 35106c3fb27SDimitry Andric assert(!Clone->CloneOf); 35206c3fb27SDimitry Andric Clone->CloneOf = this; 35306c3fb27SDimitry Andric } 35406c3fb27SDimitry Andric } 35506c3fb27SDimitry Andric 35606c3fb27SDimitry Andric ContextNode *getOrigNode() { 35706c3fb27SDimitry Andric if (!CloneOf) 35806c3fb27SDimitry Andric return this; 35906c3fb27SDimitry Andric return CloneOf; 36006c3fb27SDimitry Andric } 36106c3fb27SDimitry Andric 36206c3fb27SDimitry Andric void addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, 36306c3fb27SDimitry Andric unsigned int ContextId); 36406c3fb27SDimitry Andric 36506c3fb27SDimitry Andric ContextEdge *findEdgeFromCallee(const ContextNode *Callee); 36606c3fb27SDimitry Andric ContextEdge *findEdgeFromCaller(const ContextNode *Caller); 36706c3fb27SDimitry Andric void eraseCalleeEdge(const ContextEdge *Edge); 36806c3fb27SDimitry Andric void eraseCallerEdge(const ContextEdge *Edge); 36906c3fb27SDimitry Andric 37006c3fb27SDimitry Andric void setCall(CallInfo C) { Call = C; } 37106c3fb27SDimitry Andric 37206c3fb27SDimitry Andric bool hasCall() const { return (bool)Call.call(); } 37306c3fb27SDimitry Andric 37406c3fb27SDimitry Andric void printCall(raw_ostream &OS) const { Call.print(OS); } 37506c3fb27SDimitry Andric 37606c3fb27SDimitry Andric // True if this node was effectively removed from the graph, in which case 377*0fca6ea1SDimitry Andric // it should have an allocation type of None and empty context ids. 37806c3fb27SDimitry Andric bool isRemoved() const { 379*0fca6ea1SDimitry Andric assert((AllocTypes == (uint8_t)AllocationType::None) == 380*0fca6ea1SDimitry Andric emptyContextIds()); 381*0fca6ea1SDimitry Andric return AllocTypes == (uint8_t)AllocationType::None; 38206c3fb27SDimitry Andric } 38306c3fb27SDimitry Andric 38406c3fb27SDimitry Andric void dump() const; 38506c3fb27SDimitry Andric void print(raw_ostream &OS) const; 38606c3fb27SDimitry Andric 38706c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, const ContextNode &Node) { 38806c3fb27SDimitry Andric Node.print(OS); 38906c3fb27SDimitry Andric return OS; 39006c3fb27SDimitry Andric } 39106c3fb27SDimitry Andric }; 39206c3fb27SDimitry Andric 39306c3fb27SDimitry Andric /// Edge in the Callsite Context Graph from a ContextNode N to a caller or 39406c3fb27SDimitry Andric /// callee. 39506c3fb27SDimitry Andric struct ContextEdge { 39606c3fb27SDimitry Andric ContextNode *Callee; 39706c3fb27SDimitry Andric ContextNode *Caller; 39806c3fb27SDimitry Andric 39906c3fb27SDimitry Andric // This will be formed by ORing together the AllocationType enum values 40006c3fb27SDimitry Andric // for contexts including this edge. 40106c3fb27SDimitry Andric uint8_t AllocTypes = 0; 40206c3fb27SDimitry Andric 40306c3fb27SDimitry Andric // The set of IDs for contexts including this edge. 40406c3fb27SDimitry Andric DenseSet<uint32_t> ContextIds; 40506c3fb27SDimitry Andric 40606c3fb27SDimitry Andric ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t AllocType, 40706c3fb27SDimitry Andric DenseSet<uint32_t> ContextIds) 40806c3fb27SDimitry Andric : Callee(Callee), Caller(Caller), AllocTypes(AllocType), 409*0fca6ea1SDimitry Andric ContextIds(std::move(ContextIds)) {} 41006c3fb27SDimitry Andric 41106c3fb27SDimitry Andric DenseSet<uint32_t> &getContextIds() { return ContextIds; } 41206c3fb27SDimitry Andric 41306c3fb27SDimitry Andric void dump() const; 41406c3fb27SDimitry Andric void print(raw_ostream &OS) const; 41506c3fb27SDimitry Andric 41606c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, const ContextEdge &Edge) { 41706c3fb27SDimitry Andric Edge.print(OS); 41806c3fb27SDimitry Andric return OS; 41906c3fb27SDimitry Andric } 42006c3fb27SDimitry Andric }; 42106c3fb27SDimitry Andric 422*0fca6ea1SDimitry Andric /// Helpers to remove callee edges that have allocation type None (due to not 42306c3fb27SDimitry Andric /// carrying any context ids) after transformations. 42406c3fb27SDimitry Andric void removeNoneTypeCalleeEdges(ContextNode *Node); 425*0fca6ea1SDimitry Andric void 426*0fca6ea1SDimitry Andric recursivelyRemoveNoneTypeCalleeEdges(ContextNode *Node, 427*0fca6ea1SDimitry Andric DenseSet<const ContextNode *> &Visited); 42806c3fb27SDimitry Andric 42906c3fb27SDimitry Andric protected: 43006c3fb27SDimitry Andric /// Get a list of nodes corresponding to the stack ids in the given callsite 43106c3fb27SDimitry Andric /// context. 43206c3fb27SDimitry Andric template <class NodeT, class IteratorT> 43306c3fb27SDimitry Andric std::vector<uint64_t> 43406c3fb27SDimitry Andric getStackIdsWithContextNodes(CallStack<NodeT, IteratorT> &CallsiteContext); 43506c3fb27SDimitry Andric 43606c3fb27SDimitry Andric /// Adds nodes for the given allocation and any stack ids on its memprof MIB 43706c3fb27SDimitry Andric /// metadata (or summary). 43806c3fb27SDimitry Andric ContextNode *addAllocNode(CallInfo Call, const FuncTy *F); 43906c3fb27SDimitry Andric 44006c3fb27SDimitry Andric /// Adds nodes for the given MIB stack ids. 44106c3fb27SDimitry Andric template <class NodeT, class IteratorT> 44206c3fb27SDimitry Andric void addStackNodesForMIB(ContextNode *AllocNode, 44306c3fb27SDimitry Andric CallStack<NodeT, IteratorT> &StackContext, 44406c3fb27SDimitry Andric CallStack<NodeT, IteratorT> &CallsiteContext, 445*0fca6ea1SDimitry Andric AllocationType AllocType, uint64_t TotalSize); 44606c3fb27SDimitry Andric 44706c3fb27SDimitry Andric /// Matches all callsite metadata (or summary) to the nodes created for 44806c3fb27SDimitry Andric /// allocation memprof MIB metadata, synthesizing new nodes to reflect any 44906c3fb27SDimitry Andric /// inlining performed on those callsite instructions. 45006c3fb27SDimitry Andric void updateStackNodes(); 45106c3fb27SDimitry Andric 45206c3fb27SDimitry Andric /// Update graph to conservatively handle any callsite stack nodes that target 45306c3fb27SDimitry Andric /// multiple different callee target functions. 45406c3fb27SDimitry Andric void handleCallsitesWithMultipleTargets(); 45506c3fb27SDimitry Andric 45606c3fb27SDimitry Andric /// Save lists of calls with MemProf metadata in each function, for faster 45706c3fb27SDimitry Andric /// iteration. 458297eecfbSDimitry Andric MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata; 45906c3fb27SDimitry Andric 46006c3fb27SDimitry Andric /// Map from callsite node to the enclosing caller function. 46106c3fb27SDimitry Andric std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc; 46206c3fb27SDimitry Andric 46306c3fb27SDimitry Andric private: 46406c3fb27SDimitry Andric using EdgeIter = typename std::vector<std::shared_ptr<ContextEdge>>::iterator; 46506c3fb27SDimitry Andric 46606c3fb27SDimitry Andric using CallContextInfo = std::tuple<CallTy, std::vector<uint64_t>, 46706c3fb27SDimitry Andric const FuncTy *, DenseSet<uint32_t>>; 46806c3fb27SDimitry Andric 46906c3fb27SDimitry Andric /// Assigns the given Node to calls at or inlined into the location with 47006c3fb27SDimitry Andric /// the Node's stack id, after post order traversing and processing its 47106c3fb27SDimitry Andric /// caller nodes. Uses the call information recorded in the given 47206c3fb27SDimitry Andric /// StackIdToMatchingCalls map, and creates new nodes for inlined sequences 47306c3fb27SDimitry Andric /// as needed. Called by updateStackNodes which sets up the given 47406c3fb27SDimitry Andric /// StackIdToMatchingCalls map. 47506c3fb27SDimitry Andric void assignStackNodesPostOrder( 47606c3fb27SDimitry Andric ContextNode *Node, DenseSet<const ContextNode *> &Visited, 47706c3fb27SDimitry Andric DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls); 47806c3fb27SDimitry Andric 47906c3fb27SDimitry Andric /// Duplicates the given set of context ids, updating the provided 48006c3fb27SDimitry Andric /// map from each original id with the newly generated context ids, 48106c3fb27SDimitry Andric /// and returning the new duplicated id set. 48206c3fb27SDimitry Andric DenseSet<uint32_t> duplicateContextIds( 48306c3fb27SDimitry Andric const DenseSet<uint32_t> &StackSequenceContextIds, 48406c3fb27SDimitry Andric DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds); 48506c3fb27SDimitry Andric 48606c3fb27SDimitry Andric /// Propagates all duplicated context ids across the graph. 48706c3fb27SDimitry Andric void propagateDuplicateContextIds( 48806c3fb27SDimitry Andric const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds); 48906c3fb27SDimitry Andric 49006c3fb27SDimitry Andric /// Connect the NewNode to OrigNode's callees if TowardsCallee is true, 49106c3fb27SDimitry Andric /// else to its callers. Also updates OrigNode's edges to remove any context 49206c3fb27SDimitry Andric /// ids moved to the newly created edge. 49306c3fb27SDimitry Andric void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode, 494*0fca6ea1SDimitry Andric bool TowardsCallee, 495*0fca6ea1SDimitry Andric DenseSet<uint32_t> RemainingContextIds); 49606c3fb27SDimitry Andric 49706c3fb27SDimitry Andric /// Get the stack id corresponding to the given Id or Index (for IR this will 49806c3fb27SDimitry Andric /// return itself, for a summary index this will return the id recorded in the 49906c3fb27SDimitry Andric /// index for that stack id index value). 50006c3fb27SDimitry Andric uint64_t getStackId(uint64_t IdOrIndex) const { 50106c3fb27SDimitry Andric return static_cast<const DerivedCCG *>(this)->getStackId(IdOrIndex); 50206c3fb27SDimitry Andric } 50306c3fb27SDimitry Andric 504297eecfbSDimitry Andric /// Returns true if the given call targets the callee of the given edge, or if 505297eecfbSDimitry Andric /// we were able to identify the call chain through intermediate tail calls. 506297eecfbSDimitry Andric /// In the latter case new context nodes are added to the graph for the 507297eecfbSDimitry Andric /// identified tail calls, and their synthesized nodes are added to 508*0fca6ea1SDimitry Andric /// TailCallToContextNodeMap. The EdgeIter is updated in the latter case for 509*0fca6ea1SDimitry Andric /// the updated edges and to prepare it for an increment in the caller. 510297eecfbSDimitry Andric bool 511297eecfbSDimitry Andric calleesMatch(CallTy Call, EdgeIter &EI, 512297eecfbSDimitry Andric MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap); 513297eecfbSDimitry Andric 514297eecfbSDimitry Andric /// Returns true if the given call targets the given function, or if we were 515297eecfbSDimitry Andric /// able to identify the call chain through intermediate tail calls (in which 516297eecfbSDimitry Andric /// case FoundCalleeChain will be populated). 517297eecfbSDimitry Andric bool calleeMatchesFunc( 518297eecfbSDimitry Andric CallTy Call, const FuncTy *Func, const FuncTy *CallerFunc, 519297eecfbSDimitry Andric std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) { 520297eecfbSDimitry Andric return static_cast<DerivedCCG *>(this)->calleeMatchesFunc( 521297eecfbSDimitry Andric Call, Func, CallerFunc, FoundCalleeChain); 52206c3fb27SDimitry Andric } 52306c3fb27SDimitry Andric 52406c3fb27SDimitry Andric /// Get a list of nodes corresponding to the stack ids in the given 52506c3fb27SDimitry Andric /// callsite's context. 52606c3fb27SDimitry Andric std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) { 52706c3fb27SDimitry Andric return static_cast<DerivedCCG *>(this)->getStackIdsWithContextNodesForCall( 52806c3fb27SDimitry Andric Call); 52906c3fb27SDimitry Andric } 53006c3fb27SDimitry Andric 53106c3fb27SDimitry Andric /// Get the last stack id in the context for callsite. 53206c3fb27SDimitry Andric uint64_t getLastStackId(CallTy Call) { 53306c3fb27SDimitry Andric return static_cast<DerivedCCG *>(this)->getLastStackId(Call); 53406c3fb27SDimitry Andric } 53506c3fb27SDimitry Andric 53606c3fb27SDimitry Andric /// Update the allocation call to record type of allocated memory. 53706c3fb27SDimitry Andric void updateAllocationCall(CallInfo &Call, AllocationType AllocType) { 53806c3fb27SDimitry Andric AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++; 53906c3fb27SDimitry Andric static_cast<DerivedCCG *>(this)->updateAllocationCall(Call, AllocType); 54006c3fb27SDimitry Andric } 54106c3fb27SDimitry Andric 54206c3fb27SDimitry Andric /// Update non-allocation call to invoke (possibly cloned) function 54306c3fb27SDimitry Andric /// CalleeFunc. 54406c3fb27SDimitry Andric void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) { 54506c3fb27SDimitry Andric static_cast<DerivedCCG *>(this)->updateCall(CallerCall, CalleeFunc); 54606c3fb27SDimitry Andric } 54706c3fb27SDimitry Andric 54806c3fb27SDimitry Andric /// Clone the given function for the given callsite, recording mapping of all 54906c3fb27SDimitry Andric /// of the functions tracked calls to their new versions in the CallMap. 55006c3fb27SDimitry Andric /// Assigns new clones to clone number CloneNo. 55106c3fb27SDimitry Andric FuncInfo cloneFunctionForCallsite( 55206c3fb27SDimitry Andric FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap, 55306c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) { 55406c3fb27SDimitry Andric return static_cast<DerivedCCG *>(this)->cloneFunctionForCallsite( 55506c3fb27SDimitry Andric Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo); 55606c3fb27SDimitry Andric } 55706c3fb27SDimitry Andric 55806c3fb27SDimitry Andric /// Gets a label to use in the dot graph for the given call clone in the given 55906c3fb27SDimitry Andric /// function. 56006c3fb27SDimitry Andric std::string getLabel(const FuncTy *Func, const CallTy Call, 56106c3fb27SDimitry Andric unsigned CloneNo) const { 56206c3fb27SDimitry Andric return static_cast<const DerivedCCG *>(this)->getLabel(Func, Call, CloneNo); 56306c3fb27SDimitry Andric } 56406c3fb27SDimitry Andric 56506c3fb27SDimitry Andric /// Helpers to find the node corresponding to the given call or stackid. 56606c3fb27SDimitry Andric ContextNode *getNodeForInst(const CallInfo &C); 56706c3fb27SDimitry Andric ContextNode *getNodeForAlloc(const CallInfo &C); 56806c3fb27SDimitry Andric ContextNode *getNodeForStackId(uint64_t StackId); 56906c3fb27SDimitry Andric 57006c3fb27SDimitry Andric /// Computes the alloc type corresponding to the given context ids, by 57106c3fb27SDimitry Andric /// unioning their recorded alloc types. 57206c3fb27SDimitry Andric uint8_t computeAllocType(DenseSet<uint32_t> &ContextIds); 57306c3fb27SDimitry Andric 574*0fca6ea1SDimitry Andric /// Returns the allocation type of the intersection of the contexts of two 57506c3fb27SDimitry Andric /// nodes (based on their provided context id sets), optimized for the case 57606c3fb27SDimitry Andric /// when Node1Ids is smaller than Node2Ids. 57706c3fb27SDimitry Andric uint8_t intersectAllocTypesImpl(const DenseSet<uint32_t> &Node1Ids, 57806c3fb27SDimitry Andric const DenseSet<uint32_t> &Node2Ids); 57906c3fb27SDimitry Andric 580*0fca6ea1SDimitry Andric /// Returns the allocation type of the intersection of the contexts of two 58106c3fb27SDimitry Andric /// nodes (based on their provided context id sets). 58206c3fb27SDimitry Andric uint8_t intersectAllocTypes(const DenseSet<uint32_t> &Node1Ids, 58306c3fb27SDimitry Andric const DenseSet<uint32_t> &Node2Ids); 58406c3fb27SDimitry Andric 58506c3fb27SDimitry Andric /// Create a clone of Edge's callee and move Edge to that new callee node, 58606c3fb27SDimitry Andric /// performing the necessary context id and allocation type updates. 58706c3fb27SDimitry Andric /// If callee's caller edge iterator is supplied, it is updated when removing 588*0fca6ea1SDimitry Andric /// the edge from that list. If ContextIdsToMove is non-empty, only that 589*0fca6ea1SDimitry Andric /// subset of Edge's ids are moved to an edge to the new callee. 59006c3fb27SDimitry Andric ContextNode * 59106c3fb27SDimitry Andric moveEdgeToNewCalleeClone(const std::shared_ptr<ContextEdge> &Edge, 592*0fca6ea1SDimitry Andric EdgeIter *CallerEdgeI = nullptr, 593*0fca6ea1SDimitry Andric DenseSet<uint32_t> ContextIdsToMove = {}); 59406c3fb27SDimitry Andric 59506c3fb27SDimitry Andric /// Change the callee of Edge to existing callee clone NewCallee, performing 59606c3fb27SDimitry Andric /// the necessary context id and allocation type updates. 59706c3fb27SDimitry Andric /// If callee's caller edge iterator is supplied, it is updated when removing 598*0fca6ea1SDimitry Andric /// the edge from that list. If ContextIdsToMove is non-empty, only that 599*0fca6ea1SDimitry Andric /// subset of Edge's ids are moved to an edge to the new callee. 60006c3fb27SDimitry Andric void moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge, 60106c3fb27SDimitry Andric ContextNode *NewCallee, 60206c3fb27SDimitry Andric EdgeIter *CallerEdgeI = nullptr, 603*0fca6ea1SDimitry Andric bool NewClone = false, 604*0fca6ea1SDimitry Andric DenseSet<uint32_t> ContextIdsToMove = {}); 60506c3fb27SDimitry Andric 60606c3fb27SDimitry Andric /// Recursively perform cloning on the graph for the given Node and its 60706c3fb27SDimitry Andric /// callers, in order to uniquely identify the allocation behavior of an 608*0fca6ea1SDimitry Andric /// allocation given its context. The context ids of the allocation being 609*0fca6ea1SDimitry Andric /// processed are given in AllocContextIds. 610*0fca6ea1SDimitry Andric void identifyClones(ContextNode *Node, DenseSet<const ContextNode *> &Visited, 611*0fca6ea1SDimitry Andric const DenseSet<uint32_t> &AllocContextIds); 61206c3fb27SDimitry Andric 61306c3fb27SDimitry Andric /// Map from each context ID to the AllocationType assigned to that context. 614*0fca6ea1SDimitry Andric DenseMap<uint32_t, AllocationType> ContextIdToAllocationType; 615*0fca6ea1SDimitry Andric 616*0fca6ea1SDimitry Andric /// Map from each contextID to the profiled aggregate allocation size, 617*0fca6ea1SDimitry Andric /// optionally populated when requested (via MemProfReportHintedSizes). 618*0fca6ea1SDimitry Andric DenseMap<uint32_t, uint64_t> ContextIdToTotalSize; 61906c3fb27SDimitry Andric 62006c3fb27SDimitry Andric /// Identifies the context node created for a stack id when adding the MIB 62106c3fb27SDimitry Andric /// contexts to the graph. This is used to locate the context nodes when 62206c3fb27SDimitry Andric /// trying to assign the corresponding callsites with those stack ids to these 62306c3fb27SDimitry Andric /// nodes. 624*0fca6ea1SDimitry Andric DenseMap<uint64_t, ContextNode *> StackEntryIdToContextNodeMap; 62506c3fb27SDimitry Andric 62606c3fb27SDimitry Andric /// Maps to track the calls to their corresponding nodes in the graph. 62706c3fb27SDimitry Andric MapVector<CallInfo, ContextNode *> AllocationCallToContextNodeMap; 62806c3fb27SDimitry Andric MapVector<CallInfo, ContextNode *> NonAllocationCallToContextNodeMap; 62906c3fb27SDimitry Andric 63006c3fb27SDimitry Andric /// Owner of all ContextNode unique_ptrs. 63106c3fb27SDimitry Andric std::vector<std::unique_ptr<ContextNode>> NodeOwner; 63206c3fb27SDimitry Andric 63306c3fb27SDimitry Andric /// Perform sanity checks on graph when requested. 63406c3fb27SDimitry Andric void check() const; 63506c3fb27SDimitry Andric 63606c3fb27SDimitry Andric /// Keeps track of the last unique context id assigned. 63706c3fb27SDimitry Andric unsigned int LastContextId = 0; 63806c3fb27SDimitry Andric }; 63906c3fb27SDimitry Andric 64006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 64106c3fb27SDimitry Andric using ContextNode = 64206c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode; 64306c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 64406c3fb27SDimitry Andric using ContextEdge = 64506c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge; 64606c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 64706c3fb27SDimitry Andric using FuncInfo = 64806c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo; 64906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 65006c3fb27SDimitry Andric using CallInfo = 65106c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo; 65206c3fb27SDimitry Andric 65306c3fb27SDimitry Andric /// CRTP derived class for graphs built from IR (regular LTO). 65406c3fb27SDimitry Andric class ModuleCallsiteContextGraph 65506c3fb27SDimitry Andric : public CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 65606c3fb27SDimitry Andric Instruction *> { 65706c3fb27SDimitry Andric public: 65806c3fb27SDimitry Andric ModuleCallsiteContextGraph( 65906c3fb27SDimitry Andric Module &M, 6607a6dacacSDimitry Andric llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter); 66106c3fb27SDimitry Andric 66206c3fb27SDimitry Andric private: 66306c3fb27SDimitry Andric friend CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 66406c3fb27SDimitry Andric Instruction *>; 66506c3fb27SDimitry Andric 66606c3fb27SDimitry Andric uint64_t getStackId(uint64_t IdOrIndex) const; 667297eecfbSDimitry Andric bool calleeMatchesFunc( 668297eecfbSDimitry Andric Instruction *Call, const Function *Func, const Function *CallerFunc, 669297eecfbSDimitry Andric std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain); 670297eecfbSDimitry Andric bool findProfiledCalleeThroughTailCalls( 671297eecfbSDimitry Andric const Function *ProfiledCallee, Value *CurCallee, unsigned Depth, 672297eecfbSDimitry Andric std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain, 673297eecfbSDimitry Andric bool &FoundMultipleCalleeChains); 67406c3fb27SDimitry Andric uint64_t getLastStackId(Instruction *Call); 67506c3fb27SDimitry Andric std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *Call); 67606c3fb27SDimitry Andric void updateAllocationCall(CallInfo &Call, AllocationType AllocType); 67706c3fb27SDimitry Andric void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc); 67806c3fb27SDimitry Andric CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 67906c3fb27SDimitry Andric Instruction *>::FuncInfo 68006c3fb27SDimitry Andric cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call, 68106c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap, 68206c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, 68306c3fb27SDimitry Andric unsigned CloneNo); 68406c3fb27SDimitry Andric std::string getLabel(const Function *Func, const Instruction *Call, 68506c3fb27SDimitry Andric unsigned CloneNo) const; 68606c3fb27SDimitry Andric 68706c3fb27SDimitry Andric const Module &Mod; 6887a6dacacSDimitry Andric llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter; 68906c3fb27SDimitry Andric }; 69006c3fb27SDimitry Andric 69106c3fb27SDimitry Andric /// Represents a call in the summary index graph, which can either be an 69206c3fb27SDimitry Andric /// allocation or an interior callsite node in an allocation's context. 69306c3fb27SDimitry Andric /// Holds a pointer to the corresponding data structure in the index. 69406c3fb27SDimitry Andric struct IndexCall : public PointerUnion<CallsiteInfo *, AllocInfo *> { 69506c3fb27SDimitry Andric IndexCall() : PointerUnion() {} 69606c3fb27SDimitry Andric IndexCall(std::nullptr_t) : IndexCall() {} 69706c3fb27SDimitry Andric IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {} 69806c3fb27SDimitry Andric IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {} 69906c3fb27SDimitry Andric IndexCall(PointerUnion PT) : PointerUnion(PT) {} 70006c3fb27SDimitry Andric 70106c3fb27SDimitry Andric IndexCall *operator->() { return this; } 70206c3fb27SDimitry Andric 70306c3fb27SDimitry Andric PointerUnion<CallsiteInfo *, AllocInfo *> getBase() const { return *this; } 70406c3fb27SDimitry Andric 70506c3fb27SDimitry Andric void print(raw_ostream &OS) const { 70606c3fb27SDimitry Andric if (auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(getBase())) { 70706c3fb27SDimitry Andric OS << *AI; 70806c3fb27SDimitry Andric } else { 70906c3fb27SDimitry Andric auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(getBase()); 71006c3fb27SDimitry Andric assert(CI); 71106c3fb27SDimitry Andric OS << *CI; 71206c3fb27SDimitry Andric } 71306c3fb27SDimitry Andric } 71406c3fb27SDimitry Andric }; 71506c3fb27SDimitry Andric 71606c3fb27SDimitry Andric /// CRTP derived class for graphs built from summary index (ThinLTO). 71706c3fb27SDimitry Andric class IndexCallsiteContextGraph 71806c3fb27SDimitry Andric : public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 71906c3fb27SDimitry Andric IndexCall> { 72006c3fb27SDimitry Andric public: 72106c3fb27SDimitry Andric IndexCallsiteContextGraph( 72206c3fb27SDimitry Andric ModuleSummaryIndex &Index, 7237a6dacacSDimitry Andric llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 72406c3fb27SDimitry Andric isPrevailing); 72506c3fb27SDimitry Andric 726297eecfbSDimitry Andric ~IndexCallsiteContextGraph() { 727297eecfbSDimitry Andric // Now that we are done with the graph it is safe to add the new 728297eecfbSDimitry Andric // CallsiteInfo structs to the function summary vectors. The graph nodes 729297eecfbSDimitry Andric // point into locations within these vectors, so we don't want to add them 730297eecfbSDimitry Andric // any earlier. 731297eecfbSDimitry Andric for (auto &I : FunctionCalleesToSynthesizedCallsiteInfos) { 732297eecfbSDimitry Andric auto *FS = I.first; 733297eecfbSDimitry Andric for (auto &Callsite : I.second) 734297eecfbSDimitry Andric FS->addCallsite(*Callsite.second); 735297eecfbSDimitry Andric } 736297eecfbSDimitry Andric } 737297eecfbSDimitry Andric 73806c3fb27SDimitry Andric private: 73906c3fb27SDimitry Andric friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 74006c3fb27SDimitry Andric IndexCall>; 74106c3fb27SDimitry Andric 74206c3fb27SDimitry Andric uint64_t getStackId(uint64_t IdOrIndex) const; 743297eecfbSDimitry Andric bool calleeMatchesFunc( 744297eecfbSDimitry Andric IndexCall &Call, const FunctionSummary *Func, 745297eecfbSDimitry Andric const FunctionSummary *CallerFunc, 746297eecfbSDimitry Andric std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain); 747297eecfbSDimitry Andric bool findProfiledCalleeThroughTailCalls( 748297eecfbSDimitry Andric ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth, 749297eecfbSDimitry Andric std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain, 750297eecfbSDimitry Andric bool &FoundMultipleCalleeChains); 75106c3fb27SDimitry Andric uint64_t getLastStackId(IndexCall &Call); 75206c3fb27SDimitry Andric std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call); 75306c3fb27SDimitry Andric void updateAllocationCall(CallInfo &Call, AllocationType AllocType); 75406c3fb27SDimitry Andric void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc); 75506c3fb27SDimitry Andric CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 75606c3fb27SDimitry Andric IndexCall>::FuncInfo 75706c3fb27SDimitry Andric cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call, 75806c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap, 75906c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, 76006c3fb27SDimitry Andric unsigned CloneNo); 76106c3fb27SDimitry Andric std::string getLabel(const FunctionSummary *Func, const IndexCall &Call, 76206c3fb27SDimitry Andric unsigned CloneNo) const; 76306c3fb27SDimitry Andric 76406c3fb27SDimitry Andric // Saves mapping from function summaries containing memprof records back to 76506c3fb27SDimitry Andric // its VI, for use in checking and debugging. 76606c3fb27SDimitry Andric std::map<const FunctionSummary *, ValueInfo> FSToVIMap; 76706c3fb27SDimitry Andric 76806c3fb27SDimitry Andric const ModuleSummaryIndex &Index; 7697a6dacacSDimitry Andric llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 770297eecfbSDimitry Andric isPrevailing; 771297eecfbSDimitry Andric 772297eecfbSDimitry Andric // Saves/owns the callsite info structures synthesized for missing tail call 773297eecfbSDimitry Andric // frames that we discover while building the graph. 774297eecfbSDimitry Andric // It maps from the summary of the function making the tail call, to a map 775297eecfbSDimitry Andric // of callee ValueInfo to corresponding synthesized callsite info. 776297eecfbSDimitry Andric std::unordered_map<FunctionSummary *, 777297eecfbSDimitry Andric std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>> 778297eecfbSDimitry Andric FunctionCalleesToSynthesizedCallsiteInfos; 77906c3fb27SDimitry Andric }; 78006c3fb27SDimitry Andric } // namespace 78106c3fb27SDimitry Andric 78206c3fb27SDimitry Andric namespace llvm { 78306c3fb27SDimitry Andric template <> 78406c3fb27SDimitry Andric struct DenseMapInfo<typename CallsiteContextGraph< 78506c3fb27SDimitry Andric ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo> 78606c3fb27SDimitry Andric : public DenseMapInfo<std::pair<Instruction *, unsigned>> {}; 78706c3fb27SDimitry Andric template <> 78806c3fb27SDimitry Andric struct DenseMapInfo<typename CallsiteContextGraph< 78906c3fb27SDimitry Andric IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo> 79006c3fb27SDimitry Andric : public DenseMapInfo<std::pair<IndexCall, unsigned>> {}; 79106c3fb27SDimitry Andric template <> 79206c3fb27SDimitry Andric struct DenseMapInfo<IndexCall> 79306c3fb27SDimitry Andric : public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {}; 79406c3fb27SDimitry Andric } // end namespace llvm 79506c3fb27SDimitry Andric 79606c3fb27SDimitry Andric namespace { 79706c3fb27SDimitry Andric 79806c3fb27SDimitry Andric struct FieldSeparator { 79906c3fb27SDimitry Andric bool Skip = true; 80006c3fb27SDimitry Andric const char *Sep; 80106c3fb27SDimitry Andric 80206c3fb27SDimitry Andric FieldSeparator(const char *Sep = ", ") : Sep(Sep) {} 80306c3fb27SDimitry Andric }; 80406c3fb27SDimitry Andric 80506c3fb27SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) { 80606c3fb27SDimitry Andric if (FS.Skip) { 80706c3fb27SDimitry Andric FS.Skip = false; 80806c3fb27SDimitry Andric return OS; 80906c3fb27SDimitry Andric } 81006c3fb27SDimitry Andric return OS << FS.Sep; 81106c3fb27SDimitry Andric } 81206c3fb27SDimitry Andric 81306c3fb27SDimitry Andric // Map the uint8_t alloc types (which may contain NotCold|Cold) to the alloc 81406c3fb27SDimitry Andric // type we should actually use on the corresponding allocation. 81506c3fb27SDimitry Andric // If we can't clone a node that has NotCold+Cold alloc type, we will fall 81606c3fb27SDimitry Andric // back to using NotCold. So don't bother cloning to distinguish NotCold+Cold 81706c3fb27SDimitry Andric // from NotCold. 81806c3fb27SDimitry Andric AllocationType allocTypeToUse(uint8_t AllocTypes) { 81906c3fb27SDimitry Andric assert(AllocTypes != (uint8_t)AllocationType::None); 82006c3fb27SDimitry Andric if (AllocTypes == 82106c3fb27SDimitry Andric ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold)) 82206c3fb27SDimitry Andric return AllocationType::NotCold; 82306c3fb27SDimitry Andric else 82406c3fb27SDimitry Andric return (AllocationType)AllocTypes; 82506c3fb27SDimitry Andric } 82606c3fb27SDimitry Andric 82706c3fb27SDimitry Andric // Helper to check if the alloc types for all edges recorded in the 82806c3fb27SDimitry Andric // InAllocTypes vector match the alloc types for all edges in the Edges 82906c3fb27SDimitry Andric // vector. 83006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 83106c3fb27SDimitry Andric bool allocTypesMatch( 83206c3fb27SDimitry Andric const std::vector<uint8_t> &InAllocTypes, 83306c3fb27SDimitry Andric const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>> 83406c3fb27SDimitry Andric &Edges) { 83506c3fb27SDimitry Andric return std::equal( 83606c3fb27SDimitry Andric InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), 83706c3fb27SDimitry Andric [](const uint8_t &l, 83806c3fb27SDimitry Andric const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) { 83906c3fb27SDimitry Andric // Can share if one of the edges is None type - don't 84006c3fb27SDimitry Andric // care about the type along that edge as it doesn't 84106c3fb27SDimitry Andric // exist for those context ids. 84206c3fb27SDimitry Andric if (l == (uint8_t)AllocationType::None || 84306c3fb27SDimitry Andric r->AllocTypes == (uint8_t)AllocationType::None) 84406c3fb27SDimitry Andric return true; 84506c3fb27SDimitry Andric return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes); 84606c3fb27SDimitry Andric }); 84706c3fb27SDimitry Andric } 84806c3fb27SDimitry Andric 84906c3fb27SDimitry Andric } // end anonymous namespace 85006c3fb27SDimitry Andric 85106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 85206c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 85306c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst( 85406c3fb27SDimitry Andric const CallInfo &C) { 85506c3fb27SDimitry Andric ContextNode *Node = getNodeForAlloc(C); 85606c3fb27SDimitry Andric if (Node) 85706c3fb27SDimitry Andric return Node; 85806c3fb27SDimitry Andric 85906c3fb27SDimitry Andric return NonAllocationCallToContextNodeMap.lookup(C); 86006c3fb27SDimitry Andric } 86106c3fb27SDimitry Andric 86206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 86306c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 86406c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc( 86506c3fb27SDimitry Andric const CallInfo &C) { 86606c3fb27SDimitry Andric return AllocationCallToContextNodeMap.lookup(C); 86706c3fb27SDimitry Andric } 86806c3fb27SDimitry Andric 86906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 87006c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 87106c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId( 87206c3fb27SDimitry Andric uint64_t StackId) { 87306c3fb27SDimitry Andric auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId); 87406c3fb27SDimitry Andric if (StackEntryNode != StackEntryIdToContextNodeMap.end()) 87506c3fb27SDimitry Andric return StackEntryNode->second; 87606c3fb27SDimitry Andric return nullptr; 87706c3fb27SDimitry Andric } 87806c3fb27SDimitry Andric 87906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 88006c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 88106c3fb27SDimitry Andric addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, 88206c3fb27SDimitry Andric unsigned int ContextId) { 88306c3fb27SDimitry Andric for (auto &Edge : CallerEdges) { 88406c3fb27SDimitry Andric if (Edge->Caller == Caller) { 88506c3fb27SDimitry Andric Edge->AllocTypes |= (uint8_t)AllocType; 88606c3fb27SDimitry Andric Edge->getContextIds().insert(ContextId); 88706c3fb27SDimitry Andric return; 88806c3fb27SDimitry Andric } 88906c3fb27SDimitry Andric } 89006c3fb27SDimitry Andric std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>( 89106c3fb27SDimitry Andric this, Caller, (uint8_t)AllocType, DenseSet<uint32_t>({ContextId})); 89206c3fb27SDimitry Andric CallerEdges.push_back(Edge); 89306c3fb27SDimitry Andric Caller->CalleeEdges.push_back(Edge); 89406c3fb27SDimitry Andric } 89506c3fb27SDimitry Andric 89606c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 89706c3fb27SDimitry Andric void CallsiteContextGraph< 89806c3fb27SDimitry Andric DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) { 89906c3fb27SDimitry Andric for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) { 90006c3fb27SDimitry Andric auto Edge = *EI; 90106c3fb27SDimitry Andric if (Edge->AllocTypes == (uint8_t)AllocationType::None) { 90206c3fb27SDimitry Andric assert(Edge->ContextIds.empty()); 90306c3fb27SDimitry Andric Edge->Callee->eraseCallerEdge(Edge.get()); 90406c3fb27SDimitry Andric EI = Node->CalleeEdges.erase(EI); 90506c3fb27SDimitry Andric } else 90606c3fb27SDimitry Andric ++EI; 90706c3fb27SDimitry Andric } 90806c3fb27SDimitry Andric } 90906c3fb27SDimitry Andric 91006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 91106c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge * 91206c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 91306c3fb27SDimitry Andric findEdgeFromCallee(const ContextNode *Callee) { 91406c3fb27SDimitry Andric for (const auto &Edge : CalleeEdges) 91506c3fb27SDimitry Andric if (Edge->Callee == Callee) 91606c3fb27SDimitry Andric return Edge.get(); 91706c3fb27SDimitry Andric return nullptr; 91806c3fb27SDimitry Andric } 91906c3fb27SDimitry Andric 92006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 92106c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge * 92206c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 92306c3fb27SDimitry Andric findEdgeFromCaller(const ContextNode *Caller) { 92406c3fb27SDimitry Andric for (const auto &Edge : CallerEdges) 92506c3fb27SDimitry Andric if (Edge->Caller == Caller) 92606c3fb27SDimitry Andric return Edge.get(); 92706c3fb27SDimitry Andric return nullptr; 92806c3fb27SDimitry Andric } 92906c3fb27SDimitry Andric 93006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 93106c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 93206c3fb27SDimitry Andric eraseCalleeEdge(const ContextEdge *Edge) { 9335f757f3fSDimitry Andric auto EI = llvm::find_if( 9345f757f3fSDimitry Andric CalleeEdges, [Edge](const std::shared_ptr<ContextEdge> &CalleeEdge) { 93506c3fb27SDimitry Andric return CalleeEdge.get() == Edge; 93606c3fb27SDimitry Andric }); 93706c3fb27SDimitry Andric assert(EI != CalleeEdges.end()); 93806c3fb27SDimitry Andric CalleeEdges.erase(EI); 93906c3fb27SDimitry Andric } 94006c3fb27SDimitry Andric 94106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 94206c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 94306c3fb27SDimitry Andric eraseCallerEdge(const ContextEdge *Edge) { 9445f757f3fSDimitry Andric auto EI = llvm::find_if( 9455f757f3fSDimitry Andric CallerEdges, [Edge](const std::shared_ptr<ContextEdge> &CallerEdge) { 94606c3fb27SDimitry Andric return CallerEdge.get() == Edge; 94706c3fb27SDimitry Andric }); 94806c3fb27SDimitry Andric assert(EI != CallerEdges.end()); 94906c3fb27SDimitry Andric CallerEdges.erase(EI); 95006c3fb27SDimitry Andric } 95106c3fb27SDimitry Andric 95206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 95306c3fb27SDimitry Andric uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType( 95406c3fb27SDimitry Andric DenseSet<uint32_t> &ContextIds) { 95506c3fb27SDimitry Andric uint8_t BothTypes = 95606c3fb27SDimitry Andric (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold; 95706c3fb27SDimitry Andric uint8_t AllocType = (uint8_t)AllocationType::None; 95806c3fb27SDimitry Andric for (auto Id : ContextIds) { 95906c3fb27SDimitry Andric AllocType |= (uint8_t)ContextIdToAllocationType[Id]; 96006c3fb27SDimitry Andric // Bail early if alloc type reached both, no further refinement. 96106c3fb27SDimitry Andric if (AllocType == BothTypes) 96206c3fb27SDimitry Andric return AllocType; 96306c3fb27SDimitry Andric } 96406c3fb27SDimitry Andric return AllocType; 96506c3fb27SDimitry Andric } 96606c3fb27SDimitry Andric 96706c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 96806c3fb27SDimitry Andric uint8_t 96906c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl( 97006c3fb27SDimitry Andric const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) { 97106c3fb27SDimitry Andric uint8_t BothTypes = 97206c3fb27SDimitry Andric (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold; 97306c3fb27SDimitry Andric uint8_t AllocType = (uint8_t)AllocationType::None; 97406c3fb27SDimitry Andric for (auto Id : Node1Ids) { 97506c3fb27SDimitry Andric if (!Node2Ids.count(Id)) 97606c3fb27SDimitry Andric continue; 97706c3fb27SDimitry Andric AllocType |= (uint8_t)ContextIdToAllocationType[Id]; 97806c3fb27SDimitry Andric // Bail early if alloc type reached both, no further refinement. 97906c3fb27SDimitry Andric if (AllocType == BothTypes) 98006c3fb27SDimitry Andric return AllocType; 98106c3fb27SDimitry Andric } 98206c3fb27SDimitry Andric return AllocType; 98306c3fb27SDimitry Andric } 98406c3fb27SDimitry Andric 98506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 98606c3fb27SDimitry Andric uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes( 98706c3fb27SDimitry Andric const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) { 98806c3fb27SDimitry Andric if (Node1Ids.size() < Node2Ids.size()) 98906c3fb27SDimitry Andric return intersectAllocTypesImpl(Node1Ids, Node2Ids); 99006c3fb27SDimitry Andric else 99106c3fb27SDimitry Andric return intersectAllocTypesImpl(Node2Ids, Node1Ids); 99206c3fb27SDimitry Andric } 99306c3fb27SDimitry Andric 99406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 99506c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 99606c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode( 99706c3fb27SDimitry Andric CallInfo Call, const FuncTy *F) { 99806c3fb27SDimitry Andric assert(!getNodeForAlloc(Call)); 99906c3fb27SDimitry Andric NodeOwner.push_back( 100006c3fb27SDimitry Andric std::make_unique<ContextNode>(/*IsAllocation=*/true, Call)); 100106c3fb27SDimitry Andric ContextNode *AllocNode = NodeOwner.back().get(); 100206c3fb27SDimitry Andric AllocationCallToContextNodeMap[Call] = AllocNode; 100306c3fb27SDimitry Andric NodeToCallingFunc[AllocNode] = F; 100406c3fb27SDimitry Andric // Use LastContextId as a uniq id for MIB allocation nodes. 100506c3fb27SDimitry Andric AllocNode->OrigStackOrAllocId = LastContextId; 100606c3fb27SDimitry Andric // Alloc type should be updated as we add in the MIBs. We should assert 100706c3fb27SDimitry Andric // afterwards that it is not still None. 100806c3fb27SDimitry Andric AllocNode->AllocTypes = (uint8_t)AllocationType::None; 100906c3fb27SDimitry Andric 101006c3fb27SDimitry Andric return AllocNode; 101106c3fb27SDimitry Andric } 101206c3fb27SDimitry Andric 1013*0fca6ea1SDimitry Andric static std::string getAllocTypeString(uint8_t AllocTypes) { 1014*0fca6ea1SDimitry Andric if (!AllocTypes) 1015*0fca6ea1SDimitry Andric return "None"; 1016*0fca6ea1SDimitry Andric std::string Str; 1017*0fca6ea1SDimitry Andric if (AllocTypes & (uint8_t)AllocationType::NotCold) 1018*0fca6ea1SDimitry Andric Str += "NotCold"; 1019*0fca6ea1SDimitry Andric if (AllocTypes & (uint8_t)AllocationType::Cold) 1020*0fca6ea1SDimitry Andric Str += "Cold"; 1021*0fca6ea1SDimitry Andric return Str; 1022*0fca6ea1SDimitry Andric } 1023*0fca6ea1SDimitry Andric 102406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 102506c3fb27SDimitry Andric template <class NodeT, class IteratorT> 102606c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB( 102706c3fb27SDimitry Andric ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext, 1028*0fca6ea1SDimitry Andric CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType, 1029*0fca6ea1SDimitry Andric uint64_t TotalSize) { 1030*0fca6ea1SDimitry Andric assert(!MemProfReportHintedSizes || TotalSize > 0); 103106c3fb27SDimitry Andric // Treating the hot alloc type as NotCold before the disambiguation for "hot" 103206c3fb27SDimitry Andric // is done. 103306c3fb27SDimitry Andric if (AllocType == AllocationType::Hot) 103406c3fb27SDimitry Andric AllocType = AllocationType::NotCold; 103506c3fb27SDimitry Andric 103606c3fb27SDimitry Andric ContextIdToAllocationType[++LastContextId] = AllocType; 103706c3fb27SDimitry Andric 1038*0fca6ea1SDimitry Andric if (MemProfReportHintedSizes) { 1039*0fca6ea1SDimitry Andric assert(TotalSize); 1040*0fca6ea1SDimitry Andric ContextIdToTotalSize[LastContextId] = TotalSize; 1041*0fca6ea1SDimitry Andric } 1042*0fca6ea1SDimitry Andric 104306c3fb27SDimitry Andric // Update alloc type and context ids for this MIB. 104406c3fb27SDimitry Andric AllocNode->AllocTypes |= (uint8_t)AllocType; 104506c3fb27SDimitry Andric 104606c3fb27SDimitry Andric // Now add or update nodes for each stack id in alloc's context. 104706c3fb27SDimitry Andric // Later when processing the stack ids on non-alloc callsites we will adjust 104806c3fb27SDimitry Andric // for any inlining in the context. 104906c3fb27SDimitry Andric ContextNode *PrevNode = AllocNode; 105006c3fb27SDimitry Andric // Look for recursion (direct recursion should have been collapsed by 105106c3fb27SDimitry Andric // module summary analysis, here we should just be detecting mutual 105206c3fb27SDimitry Andric // recursion). Mark these nodes so we don't try to clone. 105306c3fb27SDimitry Andric SmallSet<uint64_t, 8> StackIdSet; 105406c3fb27SDimitry Andric // Skip any on the allocation call (inlining). 105506c3fb27SDimitry Andric for (auto ContextIter = StackContext.beginAfterSharedPrefix(CallsiteContext); 105606c3fb27SDimitry Andric ContextIter != StackContext.end(); ++ContextIter) { 105706c3fb27SDimitry Andric auto StackId = getStackId(*ContextIter); 105806c3fb27SDimitry Andric ContextNode *StackNode = getNodeForStackId(StackId); 105906c3fb27SDimitry Andric if (!StackNode) { 106006c3fb27SDimitry Andric NodeOwner.push_back( 106106c3fb27SDimitry Andric std::make_unique<ContextNode>(/*IsAllocation=*/false)); 106206c3fb27SDimitry Andric StackNode = NodeOwner.back().get(); 106306c3fb27SDimitry Andric StackEntryIdToContextNodeMap[StackId] = StackNode; 106406c3fb27SDimitry Andric StackNode->OrigStackOrAllocId = StackId; 106506c3fb27SDimitry Andric } 106606c3fb27SDimitry Andric auto Ins = StackIdSet.insert(StackId); 106706c3fb27SDimitry Andric if (!Ins.second) 106806c3fb27SDimitry Andric StackNode->Recursive = true; 106906c3fb27SDimitry Andric StackNode->AllocTypes |= (uint8_t)AllocType; 107006c3fb27SDimitry Andric PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId); 107106c3fb27SDimitry Andric PrevNode = StackNode; 107206c3fb27SDimitry Andric } 107306c3fb27SDimitry Andric } 107406c3fb27SDimitry Andric 107506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 107606c3fb27SDimitry Andric DenseSet<uint32_t> 107706c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds( 107806c3fb27SDimitry Andric const DenseSet<uint32_t> &StackSequenceContextIds, 107906c3fb27SDimitry Andric DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) { 108006c3fb27SDimitry Andric DenseSet<uint32_t> NewContextIds; 108106c3fb27SDimitry Andric for (auto OldId : StackSequenceContextIds) { 108206c3fb27SDimitry Andric NewContextIds.insert(++LastContextId); 108306c3fb27SDimitry Andric OldToNewContextIds[OldId].insert(LastContextId); 108406c3fb27SDimitry Andric assert(ContextIdToAllocationType.count(OldId)); 108506c3fb27SDimitry Andric // The new context has the same allocation type as original. 108606c3fb27SDimitry Andric ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId]; 1087*0fca6ea1SDimitry Andric // For now set this to 0 so we don't duplicate sizes. Not clear how to divvy 1088*0fca6ea1SDimitry Andric // up the size. Assume that if we are able to duplicate context ids that we 1089*0fca6ea1SDimitry Andric // will be able to disambiguate all copies. 1090*0fca6ea1SDimitry Andric ContextIdToTotalSize[LastContextId] = 0; 109106c3fb27SDimitry Andric } 109206c3fb27SDimitry Andric return NewContextIds; 109306c3fb27SDimitry Andric } 109406c3fb27SDimitry Andric 109506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 109606c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: 109706c3fb27SDimitry Andric propagateDuplicateContextIds( 109806c3fb27SDimitry Andric const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) { 109906c3fb27SDimitry Andric // Build a set of duplicated context ids corresponding to the input id set. 110006c3fb27SDimitry Andric auto GetNewIds = [&OldToNewContextIds](const DenseSet<uint32_t> &ContextIds) { 110106c3fb27SDimitry Andric DenseSet<uint32_t> NewIds; 110206c3fb27SDimitry Andric for (auto Id : ContextIds) 110306c3fb27SDimitry Andric if (auto NewId = OldToNewContextIds.find(Id); 110406c3fb27SDimitry Andric NewId != OldToNewContextIds.end()) 110506c3fb27SDimitry Andric NewIds.insert(NewId->second.begin(), NewId->second.end()); 110606c3fb27SDimitry Andric return NewIds; 110706c3fb27SDimitry Andric }; 110806c3fb27SDimitry Andric 110906c3fb27SDimitry Andric // Recursively update context ids sets along caller edges. 111006c3fb27SDimitry Andric auto UpdateCallers = [&](ContextNode *Node, 111106c3fb27SDimitry Andric DenseSet<const ContextEdge *> &Visited, 111206c3fb27SDimitry Andric auto &&UpdateCallers) -> void { 111306c3fb27SDimitry Andric for (const auto &Edge : Node->CallerEdges) { 111406c3fb27SDimitry Andric auto Inserted = Visited.insert(Edge.get()); 111506c3fb27SDimitry Andric if (!Inserted.second) 111606c3fb27SDimitry Andric continue; 111706c3fb27SDimitry Andric ContextNode *NextNode = Edge->Caller; 111806c3fb27SDimitry Andric DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Edge->getContextIds()); 111906c3fb27SDimitry Andric // Only need to recursively iterate to NextNode via this caller edge if 112006c3fb27SDimitry Andric // it resulted in any added ids to NextNode. 112106c3fb27SDimitry Andric if (!NewIdsToAdd.empty()) { 112206c3fb27SDimitry Andric Edge->getContextIds().insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); 112306c3fb27SDimitry Andric UpdateCallers(NextNode, Visited, UpdateCallers); 112406c3fb27SDimitry Andric } 112506c3fb27SDimitry Andric } 112606c3fb27SDimitry Andric }; 112706c3fb27SDimitry Andric 112806c3fb27SDimitry Andric DenseSet<const ContextEdge *> Visited; 112906c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) { 113006c3fb27SDimitry Andric auto *Node = Entry.second; 113106c3fb27SDimitry Andric UpdateCallers(Node, Visited, UpdateCallers); 113206c3fb27SDimitry Andric } 113306c3fb27SDimitry Andric } 113406c3fb27SDimitry Andric 113506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 113606c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode( 1137*0fca6ea1SDimitry Andric ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee, 1138*0fca6ea1SDimitry Andric // This must be passed by value to make a copy since it will be adjusted 1139*0fca6ea1SDimitry Andric // as ids are moved. 1140*0fca6ea1SDimitry Andric DenseSet<uint32_t> RemainingContextIds) { 114106c3fb27SDimitry Andric auto &OrigEdges = 114206c3fb27SDimitry Andric TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges; 114306c3fb27SDimitry Andric // Increment iterator in loop so that we can remove edges as needed. 114406c3fb27SDimitry Andric for (auto EI = OrigEdges.begin(); EI != OrigEdges.end();) { 114506c3fb27SDimitry Andric auto Edge = *EI; 114606c3fb27SDimitry Andric // Remove any matching context ids from Edge, return set that were found and 114706c3fb27SDimitry Andric // removed, these are the new edge's context ids. Also update the remaining 114806c3fb27SDimitry Andric // (not found ids). 114906c3fb27SDimitry Andric DenseSet<uint32_t> NewEdgeContextIds, NotFoundContextIds; 115006c3fb27SDimitry Andric set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds, 115106c3fb27SDimitry Andric NotFoundContextIds); 115206c3fb27SDimitry Andric RemainingContextIds.swap(NotFoundContextIds); 115306c3fb27SDimitry Andric // If no matching context ids for this edge, skip it. 115406c3fb27SDimitry Andric if (NewEdgeContextIds.empty()) { 115506c3fb27SDimitry Andric ++EI; 115606c3fb27SDimitry Andric continue; 115706c3fb27SDimitry Andric } 115806c3fb27SDimitry Andric if (TowardsCallee) { 1159*0fca6ea1SDimitry Andric uint8_t NewAllocType = computeAllocType(NewEdgeContextIds); 116006c3fb27SDimitry Andric auto NewEdge = std::make_shared<ContextEdge>( 1161*0fca6ea1SDimitry Andric Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds)); 116206c3fb27SDimitry Andric NewNode->CalleeEdges.push_back(NewEdge); 116306c3fb27SDimitry Andric NewEdge->Callee->CallerEdges.push_back(NewEdge); 116406c3fb27SDimitry Andric } else { 1165*0fca6ea1SDimitry Andric uint8_t NewAllocType = computeAllocType(NewEdgeContextIds); 116606c3fb27SDimitry Andric auto NewEdge = std::make_shared<ContextEdge>( 1167*0fca6ea1SDimitry Andric NewNode, Edge->Caller, NewAllocType, std::move(NewEdgeContextIds)); 116806c3fb27SDimitry Andric NewNode->CallerEdges.push_back(NewEdge); 116906c3fb27SDimitry Andric NewEdge->Caller->CalleeEdges.push_back(NewEdge); 117006c3fb27SDimitry Andric } 117106c3fb27SDimitry Andric // Remove old edge if context ids empty. 117206c3fb27SDimitry Andric if (Edge->getContextIds().empty()) { 117306c3fb27SDimitry Andric if (TowardsCallee) { 117406c3fb27SDimitry Andric Edge->Callee->eraseCallerEdge(Edge.get()); 117506c3fb27SDimitry Andric EI = OrigNode->CalleeEdges.erase(EI); 117606c3fb27SDimitry Andric } else { 117706c3fb27SDimitry Andric Edge->Caller->eraseCalleeEdge(Edge.get()); 117806c3fb27SDimitry Andric EI = OrigNode->CallerEdges.erase(EI); 117906c3fb27SDimitry Andric } 118006c3fb27SDimitry Andric continue; 118106c3fb27SDimitry Andric } 118206c3fb27SDimitry Andric ++EI; 118306c3fb27SDimitry Andric } 118406c3fb27SDimitry Andric } 118506c3fb27SDimitry Andric 118606c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1187*0fca6ea1SDimitry Andric static void checkEdge( 1188*0fca6ea1SDimitry Andric const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) { 1189*0fca6ea1SDimitry Andric // Confirm that alloc type is not None and that we have at least one context 1190*0fca6ea1SDimitry Andric // id. 1191*0fca6ea1SDimitry Andric assert(Edge->AllocTypes != (uint8_t)AllocationType::None); 1192*0fca6ea1SDimitry Andric assert(!Edge->ContextIds.empty()); 1193*0fca6ea1SDimitry Andric } 1194*0fca6ea1SDimitry Andric 1195*0fca6ea1SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1196*0fca6ea1SDimitry Andric static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node, 1197*0fca6ea1SDimitry Andric bool CheckEdges = true) { 1198*0fca6ea1SDimitry Andric if (Node->isRemoved()) 1199*0fca6ea1SDimitry Andric return; 1200*0fca6ea1SDimitry Andric #ifndef NDEBUG 1201*0fca6ea1SDimitry Andric // Compute node's context ids once for use in asserts. 1202*0fca6ea1SDimitry Andric auto NodeContextIds = Node->getContextIds(); 1203*0fca6ea1SDimitry Andric #endif 1204*0fca6ea1SDimitry Andric // Node's context ids should be the union of both its callee and caller edge 1205*0fca6ea1SDimitry Andric // context ids. 1206*0fca6ea1SDimitry Andric if (Node->CallerEdges.size()) { 1207*0fca6ea1SDimitry Andric DenseSet<uint32_t> CallerEdgeContextIds( 1208*0fca6ea1SDimitry Andric Node->CallerEdges.front()->ContextIds); 1209*0fca6ea1SDimitry Andric for (const auto &Edge : llvm::drop_begin(Node->CallerEdges)) { 1210*0fca6ea1SDimitry Andric if (CheckEdges) 1211*0fca6ea1SDimitry Andric checkEdge<DerivedCCG, FuncTy, CallTy>(Edge); 1212*0fca6ea1SDimitry Andric set_union(CallerEdgeContextIds, Edge->ContextIds); 1213*0fca6ea1SDimitry Andric } 1214*0fca6ea1SDimitry Andric // Node can have more context ids than callers if some contexts terminate at 1215*0fca6ea1SDimitry Andric // node and some are longer. 1216*0fca6ea1SDimitry Andric assert(NodeContextIds == CallerEdgeContextIds || 1217*0fca6ea1SDimitry Andric set_is_subset(CallerEdgeContextIds, NodeContextIds)); 1218*0fca6ea1SDimitry Andric } 1219*0fca6ea1SDimitry Andric if (Node->CalleeEdges.size()) { 1220*0fca6ea1SDimitry Andric DenseSet<uint32_t> CalleeEdgeContextIds( 1221*0fca6ea1SDimitry Andric Node->CalleeEdges.front()->ContextIds); 1222*0fca6ea1SDimitry Andric for (const auto &Edge : llvm::drop_begin(Node->CalleeEdges)) { 1223*0fca6ea1SDimitry Andric if (CheckEdges) 1224*0fca6ea1SDimitry Andric checkEdge<DerivedCCG, FuncTy, CallTy>(Edge); 1225*0fca6ea1SDimitry Andric set_union(CalleeEdgeContextIds, Edge->getContextIds()); 1226*0fca6ea1SDimitry Andric } 1227*0fca6ea1SDimitry Andric assert(NodeContextIds == CalleeEdgeContextIds); 1228*0fca6ea1SDimitry Andric } 1229*0fca6ea1SDimitry Andric } 1230*0fca6ea1SDimitry Andric 1231*0fca6ea1SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 123206c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: 123306c3fb27SDimitry Andric assignStackNodesPostOrder(ContextNode *Node, 123406c3fb27SDimitry Andric DenseSet<const ContextNode *> &Visited, 123506c3fb27SDimitry Andric DenseMap<uint64_t, std::vector<CallContextInfo>> 123606c3fb27SDimitry Andric &StackIdToMatchingCalls) { 123706c3fb27SDimitry Andric auto Inserted = Visited.insert(Node); 123806c3fb27SDimitry Andric if (!Inserted.second) 123906c3fb27SDimitry Andric return; 124006c3fb27SDimitry Andric // Post order traversal. Iterate over a copy since we may add nodes and 124106c3fb27SDimitry Andric // therefore new callers during the recursive call, invalidating any 124206c3fb27SDimitry Andric // iterator over the original edge vector. We don't need to process these 124306c3fb27SDimitry Andric // new nodes as they were already processed on creation. 124406c3fb27SDimitry Andric auto CallerEdges = Node->CallerEdges; 124506c3fb27SDimitry Andric for (auto &Edge : CallerEdges) { 124606c3fb27SDimitry Andric // Skip any that have been removed during the recursion. 124706c3fb27SDimitry Andric if (!Edge) 124806c3fb27SDimitry Andric continue; 124906c3fb27SDimitry Andric assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls); 125006c3fb27SDimitry Andric } 125106c3fb27SDimitry Andric 125206c3fb27SDimitry Andric // If this node's stack id is in the map, update the graph to contain new 125306c3fb27SDimitry Andric // nodes representing any inlining at interior callsites. Note we move the 125406c3fb27SDimitry Andric // associated context ids over to the new nodes. 125506c3fb27SDimitry Andric 125606c3fb27SDimitry Andric // Ignore this node if it is for an allocation or we didn't record any 125706c3fb27SDimitry Andric // stack id lists ending at it. 125806c3fb27SDimitry Andric if (Node->IsAllocation || 125906c3fb27SDimitry Andric !StackIdToMatchingCalls.count(Node->OrigStackOrAllocId)) 126006c3fb27SDimitry Andric return; 126106c3fb27SDimitry Andric 126206c3fb27SDimitry Andric auto &Calls = StackIdToMatchingCalls[Node->OrigStackOrAllocId]; 126306c3fb27SDimitry Andric // Handle the simple case first. A single call with a single stack id. 126406c3fb27SDimitry Andric // In this case there is no need to create any new context nodes, simply 126506c3fb27SDimitry Andric // assign the context node for stack id to this Call. 126606c3fb27SDimitry Andric if (Calls.size() == 1) { 126706c3fb27SDimitry Andric auto &[Call, Ids, Func, SavedContextIds] = Calls[0]; 126806c3fb27SDimitry Andric if (Ids.size() == 1) { 126906c3fb27SDimitry Andric assert(SavedContextIds.empty()); 127006c3fb27SDimitry Andric // It should be this Node 127106c3fb27SDimitry Andric assert(Node == getNodeForStackId(Ids[0])); 127206c3fb27SDimitry Andric if (Node->Recursive) 127306c3fb27SDimitry Andric return; 127406c3fb27SDimitry Andric Node->setCall(Call); 127506c3fb27SDimitry Andric NonAllocationCallToContextNodeMap[Call] = Node; 127606c3fb27SDimitry Andric NodeToCallingFunc[Node] = Func; 127706c3fb27SDimitry Andric return; 127806c3fb27SDimitry Andric } 127906c3fb27SDimitry Andric } 128006c3fb27SDimitry Andric 128106c3fb27SDimitry Andric // Find the node for the last stack id, which should be the same 128206c3fb27SDimitry Andric // across all calls recorded for this id, and is this node's id. 128306c3fb27SDimitry Andric uint64_t LastId = Node->OrigStackOrAllocId; 128406c3fb27SDimitry Andric ContextNode *LastNode = getNodeForStackId(LastId); 128506c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 128606c3fb27SDimitry Andric assert(LastNode); 128706c3fb27SDimitry Andric 128806c3fb27SDimitry Andric for (unsigned I = 0; I < Calls.size(); I++) { 128906c3fb27SDimitry Andric auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; 129006c3fb27SDimitry Andric // Skip any for which we didn't assign any ids, these don't get a node in 129106c3fb27SDimitry Andric // the graph. 129206c3fb27SDimitry Andric if (SavedContextIds.empty()) 129306c3fb27SDimitry Andric continue; 129406c3fb27SDimitry Andric 129506c3fb27SDimitry Andric assert(LastId == Ids.back()); 129606c3fb27SDimitry Andric 129706c3fb27SDimitry Andric ContextNode *FirstNode = getNodeForStackId(Ids[0]); 129806c3fb27SDimitry Andric assert(FirstNode); 129906c3fb27SDimitry Andric 130006c3fb27SDimitry Andric // Recompute the context ids for this stack id sequence (the 130106c3fb27SDimitry Andric // intersection of the context ids of the corresponding nodes). 130206c3fb27SDimitry Andric // Start with the ids we saved in the map for this call, which could be 130306c3fb27SDimitry Andric // duplicated context ids. We have to recompute as we might have overlap 130406c3fb27SDimitry Andric // overlap between the saved context ids for different last nodes, and 130506c3fb27SDimitry Andric // removed them already during the post order traversal. 1306*0fca6ea1SDimitry Andric set_intersect(SavedContextIds, FirstNode->getContextIds()); 130706c3fb27SDimitry Andric ContextNode *PrevNode = nullptr; 130806c3fb27SDimitry Andric for (auto Id : Ids) { 130906c3fb27SDimitry Andric ContextNode *CurNode = getNodeForStackId(Id); 131006c3fb27SDimitry Andric // We should only have kept stack ids that had nodes and weren't 131106c3fb27SDimitry Andric // recursive. 131206c3fb27SDimitry Andric assert(CurNode); 131306c3fb27SDimitry Andric assert(!CurNode->Recursive); 131406c3fb27SDimitry Andric if (!PrevNode) { 131506c3fb27SDimitry Andric PrevNode = CurNode; 131606c3fb27SDimitry Andric continue; 131706c3fb27SDimitry Andric } 131806c3fb27SDimitry Andric auto *Edge = CurNode->findEdgeFromCallee(PrevNode); 131906c3fb27SDimitry Andric if (!Edge) { 132006c3fb27SDimitry Andric SavedContextIds.clear(); 132106c3fb27SDimitry Andric break; 132206c3fb27SDimitry Andric } 132306c3fb27SDimitry Andric PrevNode = CurNode; 132406c3fb27SDimitry Andric set_intersect(SavedContextIds, Edge->getContextIds()); 132506c3fb27SDimitry Andric 132606c3fb27SDimitry Andric // If we now have no context ids for clone, skip this call. 132706c3fb27SDimitry Andric if (SavedContextIds.empty()) 132806c3fb27SDimitry Andric break; 132906c3fb27SDimitry Andric } 133006c3fb27SDimitry Andric if (SavedContextIds.empty()) 133106c3fb27SDimitry Andric continue; 133206c3fb27SDimitry Andric 133306c3fb27SDimitry Andric // Create new context node. 133406c3fb27SDimitry Andric NodeOwner.push_back( 133506c3fb27SDimitry Andric std::make_unique<ContextNode>(/*IsAllocation=*/false, Call)); 133606c3fb27SDimitry Andric ContextNode *NewNode = NodeOwner.back().get(); 133706c3fb27SDimitry Andric NodeToCallingFunc[NewNode] = Func; 133806c3fb27SDimitry Andric NonAllocationCallToContextNodeMap[Call] = NewNode; 1339*0fca6ea1SDimitry Andric NewNode->AllocTypes = computeAllocType(SavedContextIds); 134006c3fb27SDimitry Andric 134106c3fb27SDimitry Andric // Connect to callees of innermost stack frame in inlined call chain. 134206c3fb27SDimitry Andric // This updates context ids for FirstNode's callee's to reflect those 134306c3fb27SDimitry Andric // moved to NewNode. 1344*0fca6ea1SDimitry Andric connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true, SavedContextIds); 134506c3fb27SDimitry Andric 134606c3fb27SDimitry Andric // Connect to callers of outermost stack frame in inlined call chain. 134706c3fb27SDimitry Andric // This updates context ids for FirstNode's caller's to reflect those 134806c3fb27SDimitry Andric // moved to NewNode. 1349*0fca6ea1SDimitry Andric connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false, SavedContextIds); 135006c3fb27SDimitry Andric 135106c3fb27SDimitry Andric // Now we need to remove context ids from edges/nodes between First and 135206c3fb27SDimitry Andric // Last Node. 135306c3fb27SDimitry Andric PrevNode = nullptr; 135406c3fb27SDimitry Andric for (auto Id : Ids) { 135506c3fb27SDimitry Andric ContextNode *CurNode = getNodeForStackId(Id); 135606c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 135706c3fb27SDimitry Andric assert(CurNode); 135806c3fb27SDimitry Andric 135906c3fb27SDimitry Andric // Remove the context ids moved to NewNode from CurNode, and the 136006c3fb27SDimitry Andric // edge from the prior node. 136106c3fb27SDimitry Andric if (PrevNode) { 136206c3fb27SDimitry Andric auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode); 136306c3fb27SDimitry Andric assert(PrevEdge); 1364*0fca6ea1SDimitry Andric set_subtract(PrevEdge->getContextIds(), SavedContextIds); 136506c3fb27SDimitry Andric if (PrevEdge->getContextIds().empty()) { 136606c3fb27SDimitry Andric PrevNode->eraseCallerEdge(PrevEdge); 136706c3fb27SDimitry Andric CurNode->eraseCalleeEdge(PrevEdge); 136806c3fb27SDimitry Andric } 136906c3fb27SDimitry Andric } 1370*0fca6ea1SDimitry Andric // Since we update the edges from leaf to tail, only look at the callee 1371*0fca6ea1SDimitry Andric // edges. This isn't an alloc node, so if there are no callee edges, the 1372*0fca6ea1SDimitry Andric // alloc type is None. 1373*0fca6ea1SDimitry Andric CurNode->AllocTypes = CurNode->CalleeEdges.empty() 1374*0fca6ea1SDimitry Andric ? (uint8_t)AllocationType::None 1375*0fca6ea1SDimitry Andric : CurNode->computeAllocType(); 137606c3fb27SDimitry Andric PrevNode = CurNode; 137706c3fb27SDimitry Andric } 1378*0fca6ea1SDimitry Andric if (VerifyNodes) { 1379*0fca6ea1SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(NewNode, /*CheckEdges=*/true); 1380*0fca6ea1SDimitry Andric for (auto Id : Ids) { 1381*0fca6ea1SDimitry Andric ContextNode *CurNode = getNodeForStackId(Id); 1382*0fca6ea1SDimitry Andric // We should only have kept stack ids that had nodes. 1383*0fca6ea1SDimitry Andric assert(CurNode); 1384*0fca6ea1SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(CurNode, /*CheckEdges=*/true); 1385*0fca6ea1SDimitry Andric } 1386*0fca6ea1SDimitry Andric } 138706c3fb27SDimitry Andric } 138806c3fb27SDimitry Andric } 138906c3fb27SDimitry Andric 139006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 139106c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() { 139206c3fb27SDimitry Andric // Map of stack id to all calls with that as the last (outermost caller) 139306c3fb27SDimitry Andric // callsite id that has a context node (some might not due to pruning 139406c3fb27SDimitry Andric // performed during matching of the allocation profile contexts). 139506c3fb27SDimitry Andric // The CallContextInfo contains the Call and a list of its stack ids with 139606c3fb27SDimitry Andric // ContextNodes, the function containing Call, and the set of context ids 139706c3fb27SDimitry Andric // the analysis will eventually identify for use in any new node created 139806c3fb27SDimitry Andric // for that callsite. 139906c3fb27SDimitry Andric DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls; 140006c3fb27SDimitry Andric for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) { 140106c3fb27SDimitry Andric for (auto &Call : CallsWithMetadata) { 140206c3fb27SDimitry Andric // Ignore allocations, already handled. 140306c3fb27SDimitry Andric if (AllocationCallToContextNodeMap.count(Call)) 140406c3fb27SDimitry Andric continue; 140506c3fb27SDimitry Andric auto StackIdsWithContextNodes = 140606c3fb27SDimitry Andric getStackIdsWithContextNodesForCall(Call.call()); 140706c3fb27SDimitry Andric // If there were no nodes created for MIBs on allocs (maybe this was in 140806c3fb27SDimitry Andric // the unambiguous part of the MIB stack that was pruned), ignore. 140906c3fb27SDimitry Andric if (StackIdsWithContextNodes.empty()) 141006c3fb27SDimitry Andric continue; 141106c3fb27SDimitry Andric // Otherwise, record this Call along with the list of ids for the last 141206c3fb27SDimitry Andric // (outermost caller) stack id with a node. 141306c3fb27SDimitry Andric StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back( 141406c3fb27SDimitry Andric {Call.call(), StackIdsWithContextNodes, Func, {}}); 141506c3fb27SDimitry Andric } 141606c3fb27SDimitry Andric } 141706c3fb27SDimitry Andric 141806c3fb27SDimitry Andric // First make a pass through all stack ids that correspond to a call, 141906c3fb27SDimitry Andric // as identified in the above loop. Compute the context ids corresponding to 142006c3fb27SDimitry Andric // each of these calls when they correspond to multiple stack ids due to 142106c3fb27SDimitry Andric // due to inlining. Perform any duplication of context ids required when 142206c3fb27SDimitry Andric // there is more than one call with the same stack ids. Their (possibly newly 142306c3fb27SDimitry Andric // duplicated) context ids are saved in the StackIdToMatchingCalls map. 142406c3fb27SDimitry Andric DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds; 142506c3fb27SDimitry Andric for (auto &It : StackIdToMatchingCalls) { 142606c3fb27SDimitry Andric auto &Calls = It.getSecond(); 142706c3fb27SDimitry Andric // Skip single calls with a single stack id. These don't need a new node. 142806c3fb27SDimitry Andric if (Calls.size() == 1) { 142906c3fb27SDimitry Andric auto &Ids = std::get<1>(Calls[0]); 143006c3fb27SDimitry Andric if (Ids.size() == 1) 143106c3fb27SDimitry Andric continue; 143206c3fb27SDimitry Andric } 143306c3fb27SDimitry Andric // In order to do the best and maximal matching of inlined calls to context 143406c3fb27SDimitry Andric // node sequences we will sort the vectors of stack ids in descending order 143506c3fb27SDimitry Andric // of length, and within each length, lexicographically by stack id. The 143606c3fb27SDimitry Andric // latter is so that we can specially handle calls that have identical stack 143706c3fb27SDimitry Andric // id sequences (either due to cloning or artificially because of the MIB 143806c3fb27SDimitry Andric // context pruning). 143906c3fb27SDimitry Andric std::stable_sort(Calls.begin(), Calls.end(), 144006c3fb27SDimitry Andric [](const CallContextInfo &A, const CallContextInfo &B) { 144106c3fb27SDimitry Andric auto &IdsA = std::get<1>(A); 144206c3fb27SDimitry Andric auto &IdsB = std::get<1>(B); 144306c3fb27SDimitry Andric return IdsA.size() > IdsB.size() || 144406c3fb27SDimitry Andric (IdsA.size() == IdsB.size() && IdsA < IdsB); 144506c3fb27SDimitry Andric }); 144606c3fb27SDimitry Andric 144706c3fb27SDimitry Andric // Find the node for the last stack id, which should be the same 144806c3fb27SDimitry Andric // across all calls recorded for this id, and is the id for this 144906c3fb27SDimitry Andric // entry in the StackIdToMatchingCalls map. 145006c3fb27SDimitry Andric uint64_t LastId = It.getFirst(); 145106c3fb27SDimitry Andric ContextNode *LastNode = getNodeForStackId(LastId); 145206c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 145306c3fb27SDimitry Andric assert(LastNode); 145406c3fb27SDimitry Andric 145506c3fb27SDimitry Andric if (LastNode->Recursive) 145606c3fb27SDimitry Andric continue; 145706c3fb27SDimitry Andric 145806c3fb27SDimitry Andric // Initialize the context ids with the last node's. We will subsequently 145906c3fb27SDimitry Andric // refine the context ids by computing the intersection along all edges. 1460*0fca6ea1SDimitry Andric DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds(); 146106c3fb27SDimitry Andric assert(!LastNodeContextIds.empty()); 146206c3fb27SDimitry Andric 146306c3fb27SDimitry Andric for (unsigned I = 0; I < Calls.size(); I++) { 146406c3fb27SDimitry Andric auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; 146506c3fb27SDimitry Andric assert(SavedContextIds.empty()); 146606c3fb27SDimitry Andric assert(LastId == Ids.back()); 146706c3fb27SDimitry Andric 146806c3fb27SDimitry Andric // First compute the context ids for this stack id sequence (the 146906c3fb27SDimitry Andric // intersection of the context ids of the corresponding nodes). 147006c3fb27SDimitry Andric // Start with the remaining saved ids for the last node. 147106c3fb27SDimitry Andric assert(!LastNodeContextIds.empty()); 147206c3fb27SDimitry Andric DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds; 147306c3fb27SDimitry Andric 147406c3fb27SDimitry Andric ContextNode *PrevNode = LastNode; 147506c3fb27SDimitry Andric ContextNode *CurNode = LastNode; 147606c3fb27SDimitry Andric bool Skip = false; 147706c3fb27SDimitry Andric 147806c3fb27SDimitry Andric // Iterate backwards through the stack Ids, starting after the last Id 147906c3fb27SDimitry Andric // in the list, which was handled once outside for all Calls. 148006c3fb27SDimitry Andric for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) { 148106c3fb27SDimitry Andric auto Id = *IdIter; 148206c3fb27SDimitry Andric CurNode = getNodeForStackId(Id); 148306c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 148406c3fb27SDimitry Andric assert(CurNode); 148506c3fb27SDimitry Andric 148606c3fb27SDimitry Andric if (CurNode->Recursive) { 148706c3fb27SDimitry Andric Skip = true; 148806c3fb27SDimitry Andric break; 148906c3fb27SDimitry Andric } 149006c3fb27SDimitry Andric 149106c3fb27SDimitry Andric auto *Edge = CurNode->findEdgeFromCaller(PrevNode); 149206c3fb27SDimitry Andric // If there is no edge then the nodes belong to different MIB contexts, 149306c3fb27SDimitry Andric // and we should skip this inlined context sequence. For example, this 149406c3fb27SDimitry Andric // particular inlined context may include stack ids A->B, and we may 149506c3fb27SDimitry Andric // indeed have nodes for both A and B, but it is possible that they were 149606c3fb27SDimitry Andric // never profiled in sequence in a single MIB for any allocation (i.e. 149706c3fb27SDimitry Andric // we might have profiled an allocation that involves the callsite A, 149806c3fb27SDimitry Andric // but through a different one of its callee callsites, and we might 149906c3fb27SDimitry Andric // have profiled an allocation that involves callsite B, but reached 150006c3fb27SDimitry Andric // from a different caller callsite). 150106c3fb27SDimitry Andric if (!Edge) { 150206c3fb27SDimitry Andric Skip = true; 150306c3fb27SDimitry Andric break; 150406c3fb27SDimitry Andric } 150506c3fb27SDimitry Andric PrevNode = CurNode; 150606c3fb27SDimitry Andric 150706c3fb27SDimitry Andric // Update the context ids, which is the intersection of the ids along 150806c3fb27SDimitry Andric // all edges in the sequence. 150906c3fb27SDimitry Andric set_intersect(StackSequenceContextIds, Edge->getContextIds()); 151006c3fb27SDimitry Andric 151106c3fb27SDimitry Andric // If we now have no context ids for clone, skip this call. 151206c3fb27SDimitry Andric if (StackSequenceContextIds.empty()) { 151306c3fb27SDimitry Andric Skip = true; 151406c3fb27SDimitry Andric break; 151506c3fb27SDimitry Andric } 151606c3fb27SDimitry Andric } 151706c3fb27SDimitry Andric if (Skip) 151806c3fb27SDimitry Andric continue; 151906c3fb27SDimitry Andric 152006c3fb27SDimitry Andric // If some of this call's stack ids did not have corresponding nodes (due 152106c3fb27SDimitry Andric // to pruning), don't include any context ids for contexts that extend 152206c3fb27SDimitry Andric // beyond these nodes. Otherwise we would be matching part of unrelated / 152306c3fb27SDimitry Andric // not fully matching stack contexts. To do this, subtract any context ids 152406c3fb27SDimitry Andric // found in caller nodes of the last node found above. 152506c3fb27SDimitry Andric if (Ids.back() != getLastStackId(Call)) { 1526*0fca6ea1SDimitry Andric for (const auto &PE : LastNode->CallerEdges) { 152706c3fb27SDimitry Andric set_subtract(StackSequenceContextIds, PE->getContextIds()); 152806c3fb27SDimitry Andric if (StackSequenceContextIds.empty()) 152906c3fb27SDimitry Andric break; 153006c3fb27SDimitry Andric } 153106c3fb27SDimitry Andric // If we now have no context ids for clone, skip this call. 153206c3fb27SDimitry Andric if (StackSequenceContextIds.empty()) 153306c3fb27SDimitry Andric continue; 153406c3fb27SDimitry Andric } 153506c3fb27SDimitry Andric 153606c3fb27SDimitry Andric // Check if the next set of stack ids is the same (since the Calls vector 153706c3fb27SDimitry Andric // of tuples is sorted by the stack ids we can just look at the next one). 153806c3fb27SDimitry Andric bool DuplicateContextIds = false; 153906c3fb27SDimitry Andric if (I + 1 < Calls.size()) { 154006c3fb27SDimitry Andric auto NextIds = std::get<1>(Calls[I + 1]); 154106c3fb27SDimitry Andric DuplicateContextIds = Ids == NextIds; 154206c3fb27SDimitry Andric } 154306c3fb27SDimitry Andric 154406c3fb27SDimitry Andric // If we don't have duplicate context ids, then we can assign all the 154506c3fb27SDimitry Andric // context ids computed for the original node sequence to this call. 154606c3fb27SDimitry Andric // If there are duplicate calls with the same stack ids then we synthesize 154706c3fb27SDimitry Andric // new context ids that are duplicates of the originals. These are 154806c3fb27SDimitry Andric // assigned to SavedContextIds, which is a reference into the map entry 154906c3fb27SDimitry Andric // for this call, allowing us to access these ids later on. 155006c3fb27SDimitry Andric OldToNewContextIds.reserve(OldToNewContextIds.size() + 155106c3fb27SDimitry Andric StackSequenceContextIds.size()); 155206c3fb27SDimitry Andric SavedContextIds = 155306c3fb27SDimitry Andric DuplicateContextIds 155406c3fb27SDimitry Andric ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds) 155506c3fb27SDimitry Andric : StackSequenceContextIds; 155606c3fb27SDimitry Andric assert(!SavedContextIds.empty()); 155706c3fb27SDimitry Andric 155806c3fb27SDimitry Andric if (!DuplicateContextIds) { 155906c3fb27SDimitry Andric // Update saved last node's context ids to remove those that are 156006c3fb27SDimitry Andric // assigned to other calls, so that it is ready for the next call at 156106c3fb27SDimitry Andric // this stack id. 156206c3fb27SDimitry Andric set_subtract(LastNodeContextIds, StackSequenceContextIds); 156306c3fb27SDimitry Andric if (LastNodeContextIds.empty()) 156406c3fb27SDimitry Andric break; 156506c3fb27SDimitry Andric } 156606c3fb27SDimitry Andric } 156706c3fb27SDimitry Andric } 156806c3fb27SDimitry Andric 156906c3fb27SDimitry Andric // Propagate the duplicate context ids over the graph. 157006c3fb27SDimitry Andric propagateDuplicateContextIds(OldToNewContextIds); 157106c3fb27SDimitry Andric 157206c3fb27SDimitry Andric if (VerifyCCG) 157306c3fb27SDimitry Andric check(); 157406c3fb27SDimitry Andric 157506c3fb27SDimitry Andric // Now perform a post-order traversal over the graph, starting with the 157606c3fb27SDimitry Andric // allocation nodes, essentially processing nodes from callers to callees. 157706c3fb27SDimitry Andric // For any that contains an id in the map, update the graph to contain new 157806c3fb27SDimitry Andric // nodes representing any inlining at interior callsites. Note we move the 157906c3fb27SDimitry Andric // associated context ids over to the new nodes. 158006c3fb27SDimitry Andric DenseSet<const ContextNode *> Visited; 158106c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) 158206c3fb27SDimitry Andric assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls); 1583*0fca6ea1SDimitry Andric if (VerifyCCG) 1584*0fca6ea1SDimitry Andric check(); 158506c3fb27SDimitry Andric } 158606c3fb27SDimitry Andric 158706c3fb27SDimitry Andric uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) { 158806c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext( 158906c3fb27SDimitry Andric Call->getMetadata(LLVMContext::MD_callsite)); 159006c3fb27SDimitry Andric return CallsiteContext.back(); 159106c3fb27SDimitry Andric } 159206c3fb27SDimitry Andric 159306c3fb27SDimitry Andric uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) { 159406c3fb27SDimitry Andric assert(isa<CallsiteInfo *>(Call.getBase())); 159506c3fb27SDimitry Andric CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator> 159606c3fb27SDimitry Andric CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase())); 159706c3fb27SDimitry Andric // Need to convert index into stack id. 159806c3fb27SDimitry Andric return Index.getStackIdAtIndex(CallsiteContext.back()); 159906c3fb27SDimitry Andric } 160006c3fb27SDimitry Andric 160106c3fb27SDimitry Andric static const std::string MemProfCloneSuffix = ".memprof."; 160206c3fb27SDimitry Andric 160306c3fb27SDimitry Andric static std::string getMemProfFuncName(Twine Base, unsigned CloneNo) { 160406c3fb27SDimitry Andric // We use CloneNo == 0 to refer to the original version, which doesn't get 160506c3fb27SDimitry Andric // renamed with a suffix. 160606c3fb27SDimitry Andric if (!CloneNo) 160706c3fb27SDimitry Andric return Base.str(); 160806c3fb27SDimitry Andric return (Base + MemProfCloneSuffix + Twine(CloneNo)).str(); 160906c3fb27SDimitry Andric } 161006c3fb27SDimitry Andric 161106c3fb27SDimitry Andric std::string ModuleCallsiteContextGraph::getLabel(const Function *Func, 161206c3fb27SDimitry Andric const Instruction *Call, 161306c3fb27SDimitry Andric unsigned CloneNo) const { 161406c3fb27SDimitry Andric return (Twine(Call->getFunction()->getName()) + " -> " + 161506c3fb27SDimitry Andric cast<CallBase>(Call)->getCalledFunction()->getName()) 161606c3fb27SDimitry Andric .str(); 161706c3fb27SDimitry Andric } 161806c3fb27SDimitry Andric 161906c3fb27SDimitry Andric std::string IndexCallsiteContextGraph::getLabel(const FunctionSummary *Func, 162006c3fb27SDimitry Andric const IndexCall &Call, 162106c3fb27SDimitry Andric unsigned CloneNo) const { 162206c3fb27SDimitry Andric auto VI = FSToVIMap.find(Func); 162306c3fb27SDimitry Andric assert(VI != FSToVIMap.end()); 162406c3fb27SDimitry Andric if (isa<AllocInfo *>(Call.getBase())) 162506c3fb27SDimitry Andric return (VI->second.name() + " -> alloc").str(); 162606c3fb27SDimitry Andric else { 162706c3fb27SDimitry Andric auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(Call.getBase()); 162806c3fb27SDimitry Andric return (VI->second.name() + " -> " + 162906c3fb27SDimitry Andric getMemProfFuncName(Callsite->Callee.name(), 163006c3fb27SDimitry Andric Callsite->Clones[CloneNo])) 163106c3fb27SDimitry Andric .str(); 163206c3fb27SDimitry Andric } 163306c3fb27SDimitry Andric } 163406c3fb27SDimitry Andric 163506c3fb27SDimitry Andric std::vector<uint64_t> 163606c3fb27SDimitry Andric ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall( 163706c3fb27SDimitry Andric Instruction *Call) { 163806c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext( 163906c3fb27SDimitry Andric Call->getMetadata(LLVMContext::MD_callsite)); 164006c3fb27SDimitry Andric return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>( 164106c3fb27SDimitry Andric CallsiteContext); 164206c3fb27SDimitry Andric } 164306c3fb27SDimitry Andric 164406c3fb27SDimitry Andric std::vector<uint64_t> 164506c3fb27SDimitry Andric IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) { 164606c3fb27SDimitry Andric assert(isa<CallsiteInfo *>(Call.getBase())); 164706c3fb27SDimitry Andric CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator> 164806c3fb27SDimitry Andric CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase())); 164906c3fb27SDimitry Andric return getStackIdsWithContextNodes<CallsiteInfo, 165006c3fb27SDimitry Andric SmallVector<unsigned>::const_iterator>( 165106c3fb27SDimitry Andric CallsiteContext); 165206c3fb27SDimitry Andric } 165306c3fb27SDimitry Andric 165406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 165506c3fb27SDimitry Andric template <class NodeT, class IteratorT> 165606c3fb27SDimitry Andric std::vector<uint64_t> 165706c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes( 165806c3fb27SDimitry Andric CallStack<NodeT, IteratorT> &CallsiteContext) { 165906c3fb27SDimitry Andric std::vector<uint64_t> StackIds; 166006c3fb27SDimitry Andric for (auto IdOrIndex : CallsiteContext) { 166106c3fb27SDimitry Andric auto StackId = getStackId(IdOrIndex); 166206c3fb27SDimitry Andric ContextNode *Node = getNodeForStackId(StackId); 166306c3fb27SDimitry Andric if (!Node) 166406c3fb27SDimitry Andric break; 166506c3fb27SDimitry Andric StackIds.push_back(StackId); 166606c3fb27SDimitry Andric } 166706c3fb27SDimitry Andric return StackIds; 166806c3fb27SDimitry Andric } 166906c3fb27SDimitry Andric 167006c3fb27SDimitry Andric ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( 16717a6dacacSDimitry Andric Module &M, 16727a6dacacSDimitry Andric llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) 167306c3fb27SDimitry Andric : Mod(M), OREGetter(OREGetter) { 167406c3fb27SDimitry Andric for (auto &F : M) { 167506c3fb27SDimitry Andric std::vector<CallInfo> CallsWithMetadata; 167606c3fb27SDimitry Andric for (auto &BB : F) { 167706c3fb27SDimitry Andric for (auto &I : BB) { 167806c3fb27SDimitry Andric if (!isa<CallBase>(I)) 167906c3fb27SDimitry Andric continue; 168006c3fb27SDimitry Andric if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) { 168106c3fb27SDimitry Andric CallsWithMetadata.push_back(&I); 168206c3fb27SDimitry Andric auto *AllocNode = addAllocNode(&I, &F); 168306c3fb27SDimitry Andric auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite); 168406c3fb27SDimitry Andric assert(CallsiteMD); 168506c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext(CallsiteMD); 168606c3fb27SDimitry Andric // Add all of the MIBs and their stack nodes. 168706c3fb27SDimitry Andric for (auto &MDOp : MemProfMD->operands()) { 168806c3fb27SDimitry Andric auto *MIBMD = cast<const MDNode>(MDOp); 168906c3fb27SDimitry Andric MDNode *StackNode = getMIBStackNode(MIBMD); 169006c3fb27SDimitry Andric assert(StackNode); 169106c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode); 169206c3fb27SDimitry Andric addStackNodesForMIB<MDNode, MDNode::op_iterator>( 169306c3fb27SDimitry Andric AllocNode, StackContext, CallsiteContext, 1694*0fca6ea1SDimitry Andric getMIBAllocType(MIBMD), getMIBTotalSize(MIBMD)); 169506c3fb27SDimitry Andric } 169606c3fb27SDimitry Andric assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); 169706c3fb27SDimitry Andric // Memprof and callsite metadata on memory allocations no longer 169806c3fb27SDimitry Andric // needed. 169906c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_memprof, nullptr); 170006c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_callsite, nullptr); 170106c3fb27SDimitry Andric } 170206c3fb27SDimitry Andric // For callsite metadata, add to list for this function for later use. 170306c3fb27SDimitry Andric else if (I.getMetadata(LLVMContext::MD_callsite)) 170406c3fb27SDimitry Andric CallsWithMetadata.push_back(&I); 170506c3fb27SDimitry Andric } 170606c3fb27SDimitry Andric } 170706c3fb27SDimitry Andric if (!CallsWithMetadata.empty()) 1708297eecfbSDimitry Andric FuncToCallsWithMetadata[&F] = CallsWithMetadata; 170906c3fb27SDimitry Andric } 171006c3fb27SDimitry Andric 171106c3fb27SDimitry Andric if (DumpCCG) { 171206c3fb27SDimitry Andric dbgs() << "CCG before updating call stack chains:\n"; 171306c3fb27SDimitry Andric dbgs() << *this; 171406c3fb27SDimitry Andric } 171506c3fb27SDimitry Andric 171606c3fb27SDimitry Andric if (ExportToDot) 171706c3fb27SDimitry Andric exportToDot("prestackupdate"); 171806c3fb27SDimitry Andric 171906c3fb27SDimitry Andric updateStackNodes(); 172006c3fb27SDimitry Andric 172106c3fb27SDimitry Andric handleCallsitesWithMultipleTargets(); 172206c3fb27SDimitry Andric 172306c3fb27SDimitry Andric // Strip off remaining callsite metadata, no longer needed. 172406c3fb27SDimitry Andric for (auto &FuncEntry : FuncToCallsWithMetadata) 172506c3fb27SDimitry Andric for (auto &Call : FuncEntry.second) 172606c3fb27SDimitry Andric Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr); 172706c3fb27SDimitry Andric } 172806c3fb27SDimitry Andric 172906c3fb27SDimitry Andric IndexCallsiteContextGraph::IndexCallsiteContextGraph( 173006c3fb27SDimitry Andric ModuleSummaryIndex &Index, 17317a6dacacSDimitry Andric llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 173206c3fb27SDimitry Andric isPrevailing) 1733297eecfbSDimitry Andric : Index(Index), isPrevailing(isPrevailing) { 173406c3fb27SDimitry Andric for (auto &I : Index) { 173506c3fb27SDimitry Andric auto VI = Index.getValueInfo(I); 173606c3fb27SDimitry Andric for (auto &S : VI.getSummaryList()) { 173706c3fb27SDimitry Andric // We should only add the prevailing nodes. Otherwise we may try to clone 173806c3fb27SDimitry Andric // in a weak copy that won't be linked (and may be different than the 173906c3fb27SDimitry Andric // prevailing version). 174006c3fb27SDimitry Andric // We only keep the memprof summary on the prevailing copy now when 174106c3fb27SDimitry Andric // building the combined index, as a space optimization, however don't 174206c3fb27SDimitry Andric // rely on this optimization. The linker doesn't resolve local linkage 174306c3fb27SDimitry Andric // values so don't check whether those are prevailing. 174406c3fb27SDimitry Andric if (!GlobalValue::isLocalLinkage(S->linkage()) && 174506c3fb27SDimitry Andric !isPrevailing(VI.getGUID(), S.get())) 174606c3fb27SDimitry Andric continue; 174706c3fb27SDimitry Andric auto *FS = dyn_cast<FunctionSummary>(S.get()); 174806c3fb27SDimitry Andric if (!FS) 174906c3fb27SDimitry Andric continue; 175006c3fb27SDimitry Andric std::vector<CallInfo> CallsWithMetadata; 175106c3fb27SDimitry Andric if (!FS->allocs().empty()) { 175206c3fb27SDimitry Andric for (auto &AN : FS->mutableAllocs()) { 175306c3fb27SDimitry Andric // This can happen because of recursion elimination handling that 175406c3fb27SDimitry Andric // currently exists in ModuleSummaryAnalysis. Skip these for now. 175506c3fb27SDimitry Andric // We still added them to the summary because we need to be able to 175606c3fb27SDimitry Andric // correlate properly in applyImport in the backends. 175706c3fb27SDimitry Andric if (AN.MIBs.empty()) 175806c3fb27SDimitry Andric continue; 175906c3fb27SDimitry Andric CallsWithMetadata.push_back({&AN}); 176006c3fb27SDimitry Andric auto *AllocNode = addAllocNode({&AN}, FS); 176106c3fb27SDimitry Andric // Pass an empty CallStack to the CallsiteContext (second) 176206c3fb27SDimitry Andric // parameter, since for ThinLTO we already collapsed out the inlined 176306c3fb27SDimitry Andric // stack ids on the allocation call during ModuleSummaryAnalysis. 176406c3fb27SDimitry Andric CallStack<MIBInfo, SmallVector<unsigned>::const_iterator> 176506c3fb27SDimitry Andric EmptyContext; 1766*0fca6ea1SDimitry Andric unsigned I = 0; 1767*0fca6ea1SDimitry Andric assert(!MemProfReportHintedSizes || 1768*0fca6ea1SDimitry Andric AN.TotalSizes.size() == AN.MIBs.size()); 176906c3fb27SDimitry Andric // Now add all of the MIBs and their stack nodes. 177006c3fb27SDimitry Andric for (auto &MIB : AN.MIBs) { 177106c3fb27SDimitry Andric CallStack<MIBInfo, SmallVector<unsigned>::const_iterator> 177206c3fb27SDimitry Andric StackContext(&MIB); 1773*0fca6ea1SDimitry Andric uint64_t TotalSize = 0; 1774*0fca6ea1SDimitry Andric if (MemProfReportHintedSizes) 1775*0fca6ea1SDimitry Andric TotalSize = AN.TotalSizes[I]; 177606c3fb27SDimitry Andric addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>( 1777*0fca6ea1SDimitry Andric AllocNode, StackContext, EmptyContext, MIB.AllocType, 1778*0fca6ea1SDimitry Andric TotalSize); 1779*0fca6ea1SDimitry Andric I++; 178006c3fb27SDimitry Andric } 178106c3fb27SDimitry Andric assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); 178206c3fb27SDimitry Andric // Initialize version 0 on the summary alloc node to the current alloc 178306c3fb27SDimitry Andric // type, unless it has both types in which case make it default, so 178406c3fb27SDimitry Andric // that in the case where we aren't able to clone the original version 178506c3fb27SDimitry Andric // always ends up with the default allocation behavior. 178606c3fb27SDimitry Andric AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes); 178706c3fb27SDimitry Andric } 178806c3fb27SDimitry Andric } 178906c3fb27SDimitry Andric // For callsite metadata, add to list for this function for later use. 179006c3fb27SDimitry Andric if (!FS->callsites().empty()) 179106c3fb27SDimitry Andric for (auto &SN : FS->mutableCallsites()) 179206c3fb27SDimitry Andric CallsWithMetadata.push_back({&SN}); 179306c3fb27SDimitry Andric 179406c3fb27SDimitry Andric if (!CallsWithMetadata.empty()) 1795297eecfbSDimitry Andric FuncToCallsWithMetadata[FS] = CallsWithMetadata; 179606c3fb27SDimitry Andric 179706c3fb27SDimitry Andric if (!FS->allocs().empty() || !FS->callsites().empty()) 179806c3fb27SDimitry Andric FSToVIMap[FS] = VI; 179906c3fb27SDimitry Andric } 180006c3fb27SDimitry Andric } 180106c3fb27SDimitry Andric 180206c3fb27SDimitry Andric if (DumpCCG) { 180306c3fb27SDimitry Andric dbgs() << "CCG before updating call stack chains:\n"; 180406c3fb27SDimitry Andric dbgs() << *this; 180506c3fb27SDimitry Andric } 180606c3fb27SDimitry Andric 180706c3fb27SDimitry Andric if (ExportToDot) 180806c3fb27SDimitry Andric exportToDot("prestackupdate"); 180906c3fb27SDimitry Andric 181006c3fb27SDimitry Andric updateStackNodes(); 181106c3fb27SDimitry Andric 181206c3fb27SDimitry Andric handleCallsitesWithMultipleTargets(); 181306c3fb27SDimitry Andric } 181406c3fb27SDimitry Andric 181506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 181606c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, 181706c3fb27SDimitry Andric CallTy>::handleCallsitesWithMultipleTargets() { 181806c3fb27SDimitry Andric // Look for and workaround callsites that call multiple functions. 181906c3fb27SDimitry Andric // This can happen for indirect calls, which needs better handling, and in 182006c3fb27SDimitry Andric // more rare cases (e.g. macro expansion). 182106c3fb27SDimitry Andric // TODO: To fix this for indirect calls we will want to perform speculative 182206c3fb27SDimitry Andric // devirtualization using either the normal PGO info with ICP, or using the 182306c3fb27SDimitry Andric // information in the profiled MemProf contexts. We can do this prior to 182406c3fb27SDimitry Andric // this transformation for regular LTO, and for ThinLTO we can simulate that 182506c3fb27SDimitry Andric // effect in the summary and perform the actual speculative devirtualization 182606c3fb27SDimitry Andric // while cloning in the ThinLTO backend. 1827297eecfbSDimitry Andric 1828297eecfbSDimitry Andric // Keep track of the new nodes synthesized for discovered tail calls missing 1829297eecfbSDimitry Andric // from the profiled contexts. 1830297eecfbSDimitry Andric MapVector<CallInfo, ContextNode *> TailCallToContextNodeMap; 1831297eecfbSDimitry Andric 1832*0fca6ea1SDimitry Andric for (auto &Entry : NonAllocationCallToContextNodeMap) { 1833*0fca6ea1SDimitry Andric auto *Node = Entry.second; 183406c3fb27SDimitry Andric assert(Node->Clones.empty()); 183506c3fb27SDimitry Andric // Check all node callees and see if in the same function. 183606c3fb27SDimitry Andric auto Call = Node->Call.call(); 1837*0fca6ea1SDimitry Andric for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end(); 1838*0fca6ea1SDimitry Andric ++EI) { 1839297eecfbSDimitry Andric auto Edge = *EI; 1840*0fca6ea1SDimitry Andric if (!Edge->Callee->hasCall()) 184106c3fb27SDimitry Andric continue; 184206c3fb27SDimitry Andric assert(NodeToCallingFunc.count(Edge->Callee)); 184306c3fb27SDimitry Andric // Check if the called function matches that of the callee node. 1844297eecfbSDimitry Andric if (calleesMatch(Call, EI, TailCallToContextNodeMap)) 184506c3fb27SDimitry Andric continue; 1846297eecfbSDimitry Andric RemovedEdgesWithMismatchedCallees++; 184706c3fb27SDimitry Andric // Work around by setting Node to have a null call, so it gets 184806c3fb27SDimitry Andric // skipped during cloning. Otherwise assignFunctions will assert 184906c3fb27SDimitry Andric // because its data structures are not designed to handle this case. 185006c3fb27SDimitry Andric Node->setCall(CallInfo()); 185106c3fb27SDimitry Andric break; 185206c3fb27SDimitry Andric } 185306c3fb27SDimitry Andric } 1854297eecfbSDimitry Andric 1855*0fca6ea1SDimitry Andric // Remove all mismatched nodes identified in the above loop from the node map 1856*0fca6ea1SDimitry Andric // (checking whether they have a null call which is set above). For a 1857*0fca6ea1SDimitry Andric // MapVector like NonAllocationCallToContextNodeMap it is much more efficient 1858*0fca6ea1SDimitry Andric // to do the removal via remove_if than by individually erasing entries above. 1859*0fca6ea1SDimitry Andric NonAllocationCallToContextNodeMap.remove_if( 1860*0fca6ea1SDimitry Andric [](const auto &it) { return !it.second->hasCall(); }); 1861*0fca6ea1SDimitry Andric 1862297eecfbSDimitry Andric // Add the new nodes after the above loop so that the iteration is not 1863297eecfbSDimitry Andric // invalidated. 1864297eecfbSDimitry Andric for (auto &[Call, Node] : TailCallToContextNodeMap) 1865297eecfbSDimitry Andric NonAllocationCallToContextNodeMap[Call] = Node; 186606c3fb27SDimitry Andric } 186706c3fb27SDimitry Andric 186806c3fb27SDimitry Andric uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { 186906c3fb27SDimitry Andric // In the Module (IR) case this is already the Id. 187006c3fb27SDimitry Andric return IdOrIndex; 187106c3fb27SDimitry Andric } 187206c3fb27SDimitry Andric 187306c3fb27SDimitry Andric uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { 187406c3fb27SDimitry Andric // In the Index case this is an index into the stack id list in the summary 187506c3fb27SDimitry Andric // index, convert it to an Id. 187606c3fb27SDimitry Andric return Index.getStackIdAtIndex(IdOrIndex); 187706c3fb27SDimitry Andric } 187806c3fb27SDimitry Andric 1879297eecfbSDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1880297eecfbSDimitry Andric bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch( 1881297eecfbSDimitry Andric CallTy Call, EdgeIter &EI, 1882297eecfbSDimitry Andric MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) { 1883297eecfbSDimitry Andric auto Edge = *EI; 1884297eecfbSDimitry Andric const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee]; 1885297eecfbSDimitry Andric const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller]; 1886297eecfbSDimitry Andric // Will be populated in order of callee to caller if we find a chain of tail 1887297eecfbSDimitry Andric // calls between the profiled caller and callee. 1888297eecfbSDimitry Andric std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain; 1889297eecfbSDimitry Andric if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc, 1890*0fca6ea1SDimitry Andric FoundCalleeChain)) 1891297eecfbSDimitry Andric return false; 1892297eecfbSDimitry Andric 1893297eecfbSDimitry Andric // The usual case where the profiled callee matches that of the IR/summary. 1894*0fca6ea1SDimitry Andric if (FoundCalleeChain.empty()) 1895297eecfbSDimitry Andric return true; 1896297eecfbSDimitry Andric 1897297eecfbSDimitry Andric auto AddEdge = [Edge, &EI](ContextNode *Caller, ContextNode *Callee) { 1898297eecfbSDimitry Andric auto *CurEdge = Callee->findEdgeFromCaller(Caller); 1899297eecfbSDimitry Andric // If there is already an edge between these nodes, simply update it and 1900297eecfbSDimitry Andric // return. 1901297eecfbSDimitry Andric if (CurEdge) { 1902297eecfbSDimitry Andric CurEdge->ContextIds.insert(Edge->ContextIds.begin(), 1903297eecfbSDimitry Andric Edge->ContextIds.end()); 1904297eecfbSDimitry Andric CurEdge->AllocTypes |= Edge->AllocTypes; 1905297eecfbSDimitry Andric return; 1906297eecfbSDimitry Andric } 1907297eecfbSDimitry Andric // Otherwise, create a new edge and insert it into the caller and callee 1908297eecfbSDimitry Andric // lists. 1909297eecfbSDimitry Andric auto NewEdge = std::make_shared<ContextEdge>( 1910297eecfbSDimitry Andric Callee, Caller, Edge->AllocTypes, Edge->ContextIds); 1911297eecfbSDimitry Andric Callee->CallerEdges.push_back(NewEdge); 1912297eecfbSDimitry Andric if (Caller == Edge->Caller) { 1913297eecfbSDimitry Andric // If we are inserting the new edge into the current edge's caller, insert 1914297eecfbSDimitry Andric // the new edge before the current iterator position, and then increment 1915297eecfbSDimitry Andric // back to the current edge. 1916297eecfbSDimitry Andric EI = Caller->CalleeEdges.insert(EI, NewEdge); 1917297eecfbSDimitry Andric ++EI; 1918297eecfbSDimitry Andric assert(*EI == Edge && 1919297eecfbSDimitry Andric "Iterator position not restored after insert and increment"); 1920297eecfbSDimitry Andric } else 1921297eecfbSDimitry Andric Caller->CalleeEdges.push_back(NewEdge); 1922297eecfbSDimitry Andric }; 1923297eecfbSDimitry Andric 1924297eecfbSDimitry Andric // Create new nodes for each found callee and connect in between the profiled 1925297eecfbSDimitry Andric // caller and callee. 1926297eecfbSDimitry Andric auto *CurCalleeNode = Edge->Callee; 1927297eecfbSDimitry Andric for (auto &[NewCall, Func] : FoundCalleeChain) { 1928297eecfbSDimitry Andric ContextNode *NewNode = nullptr; 1929297eecfbSDimitry Andric // First check if we have already synthesized a node for this tail call. 1930297eecfbSDimitry Andric if (TailCallToContextNodeMap.count(NewCall)) { 1931297eecfbSDimitry Andric NewNode = TailCallToContextNodeMap[NewCall]; 1932297eecfbSDimitry Andric NewNode->AllocTypes |= Edge->AllocTypes; 1933297eecfbSDimitry Andric } else { 1934297eecfbSDimitry Andric FuncToCallsWithMetadata[Func].push_back({NewCall}); 1935297eecfbSDimitry Andric // Create Node and record node info. 1936297eecfbSDimitry Andric NodeOwner.push_back( 1937297eecfbSDimitry Andric std::make_unique<ContextNode>(/*IsAllocation=*/false, NewCall)); 1938297eecfbSDimitry Andric NewNode = NodeOwner.back().get(); 1939297eecfbSDimitry Andric NodeToCallingFunc[NewNode] = Func; 1940297eecfbSDimitry Andric TailCallToContextNodeMap[NewCall] = NewNode; 1941297eecfbSDimitry Andric NewNode->AllocTypes = Edge->AllocTypes; 1942297eecfbSDimitry Andric } 1943297eecfbSDimitry Andric 1944297eecfbSDimitry Andric // Hook up node to its callee node 1945297eecfbSDimitry Andric AddEdge(NewNode, CurCalleeNode); 1946297eecfbSDimitry Andric 1947297eecfbSDimitry Andric CurCalleeNode = NewNode; 1948297eecfbSDimitry Andric } 1949297eecfbSDimitry Andric 1950297eecfbSDimitry Andric // Hook up edge's original caller to new callee node. 1951297eecfbSDimitry Andric AddEdge(Edge->Caller, CurCalleeNode); 1952297eecfbSDimitry Andric 1953297eecfbSDimitry Andric // Remove old edge 1954297eecfbSDimitry Andric Edge->Callee->eraseCallerEdge(Edge.get()); 1955297eecfbSDimitry Andric EI = Edge->Caller->CalleeEdges.erase(EI); 1956297eecfbSDimitry Andric 1957*0fca6ea1SDimitry Andric // To simplify the increment of EI in the caller, subtract one from EI. 1958*0fca6ea1SDimitry Andric // In the final AddEdge call we would have either added a new callee edge, 1959*0fca6ea1SDimitry Andric // to Edge->Caller, or found an existing one. Either way we are guaranteed 1960*0fca6ea1SDimitry Andric // that there is at least one callee edge. 1961*0fca6ea1SDimitry Andric assert(!Edge->Caller->CalleeEdges.empty()); 1962*0fca6ea1SDimitry Andric --EI; 1963*0fca6ea1SDimitry Andric 1964297eecfbSDimitry Andric return true; 1965297eecfbSDimitry Andric } 1966297eecfbSDimitry Andric 1967297eecfbSDimitry Andric bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls( 1968297eecfbSDimitry Andric const Function *ProfiledCallee, Value *CurCallee, unsigned Depth, 1969297eecfbSDimitry Andric std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain, 1970297eecfbSDimitry Andric bool &FoundMultipleCalleeChains) { 1971297eecfbSDimitry Andric // Stop recursive search if we have already explored the maximum specified 1972297eecfbSDimitry Andric // depth. 1973297eecfbSDimitry Andric if (Depth > TailCallSearchDepth) 1974297eecfbSDimitry Andric return false; 1975297eecfbSDimitry Andric 1976297eecfbSDimitry Andric auto SaveCallsiteInfo = [&](Instruction *Callsite, Function *F) { 1977297eecfbSDimitry Andric FoundCalleeChain.push_back({Callsite, F}); 1978297eecfbSDimitry Andric }; 1979297eecfbSDimitry Andric 1980297eecfbSDimitry Andric auto *CalleeFunc = dyn_cast<Function>(CurCallee); 1981297eecfbSDimitry Andric if (!CalleeFunc) { 1982297eecfbSDimitry Andric auto *Alias = dyn_cast<GlobalAlias>(CurCallee); 1983297eecfbSDimitry Andric assert(Alias); 1984297eecfbSDimitry Andric CalleeFunc = dyn_cast<Function>(Alias->getAliasee()); 1985297eecfbSDimitry Andric assert(CalleeFunc); 1986297eecfbSDimitry Andric } 1987297eecfbSDimitry Andric 1988297eecfbSDimitry Andric // Look for tail calls in this function, and check if they either call the 1989297eecfbSDimitry Andric // profiled callee directly, or indirectly (via a recursive search). 1990297eecfbSDimitry Andric // Only succeed if there is a single unique tail call chain found between the 1991297eecfbSDimitry Andric // profiled caller and callee, otherwise we could perform incorrect cloning. 1992297eecfbSDimitry Andric bool FoundSingleCalleeChain = false; 1993297eecfbSDimitry Andric for (auto &BB : *CalleeFunc) { 1994297eecfbSDimitry Andric for (auto &I : BB) { 1995297eecfbSDimitry Andric auto *CB = dyn_cast<CallBase>(&I); 1996297eecfbSDimitry Andric if (!CB || !CB->isTailCall()) 1997297eecfbSDimitry Andric continue; 1998297eecfbSDimitry Andric auto *CalledValue = CB->getCalledOperand(); 1999297eecfbSDimitry Andric auto *CalledFunction = CB->getCalledFunction(); 2000297eecfbSDimitry Andric if (CalledValue && !CalledFunction) { 2001297eecfbSDimitry Andric CalledValue = CalledValue->stripPointerCasts(); 2002297eecfbSDimitry Andric // Stripping pointer casts can reveal a called function. 2003297eecfbSDimitry Andric CalledFunction = dyn_cast<Function>(CalledValue); 2004297eecfbSDimitry Andric } 2005297eecfbSDimitry Andric // Check if this is an alias to a function. If so, get the 2006297eecfbSDimitry Andric // called aliasee for the checks below. 2007297eecfbSDimitry Andric if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) { 2008297eecfbSDimitry Andric assert(!CalledFunction && 2009297eecfbSDimitry Andric "Expected null called function in callsite for alias"); 2010297eecfbSDimitry Andric CalledFunction = dyn_cast<Function>(GA->getAliaseeObject()); 2011297eecfbSDimitry Andric } 2012297eecfbSDimitry Andric if (!CalledFunction) 2013297eecfbSDimitry Andric continue; 2014297eecfbSDimitry Andric if (CalledFunction == ProfiledCallee) { 2015297eecfbSDimitry Andric if (FoundSingleCalleeChain) { 2016297eecfbSDimitry Andric FoundMultipleCalleeChains = true; 2017297eecfbSDimitry Andric return false; 2018297eecfbSDimitry Andric } 2019297eecfbSDimitry Andric FoundSingleCalleeChain = true; 2020297eecfbSDimitry Andric FoundProfiledCalleeCount++; 2021297eecfbSDimitry Andric FoundProfiledCalleeDepth += Depth; 2022297eecfbSDimitry Andric if (Depth > FoundProfiledCalleeMaxDepth) 2023297eecfbSDimitry Andric FoundProfiledCalleeMaxDepth = Depth; 2024297eecfbSDimitry Andric SaveCallsiteInfo(&I, CalleeFunc); 2025297eecfbSDimitry Andric } else if (findProfiledCalleeThroughTailCalls( 2026297eecfbSDimitry Andric ProfiledCallee, CalledFunction, Depth + 1, 2027297eecfbSDimitry Andric FoundCalleeChain, FoundMultipleCalleeChains)) { 2028*0fca6ea1SDimitry Andric // findProfiledCalleeThroughTailCalls should not have returned 2029*0fca6ea1SDimitry Andric // true if FoundMultipleCalleeChains. 2030*0fca6ea1SDimitry Andric assert(!FoundMultipleCalleeChains); 2031297eecfbSDimitry Andric if (FoundSingleCalleeChain) { 2032297eecfbSDimitry Andric FoundMultipleCalleeChains = true; 2033297eecfbSDimitry Andric return false; 2034297eecfbSDimitry Andric } 2035297eecfbSDimitry Andric FoundSingleCalleeChain = true; 2036297eecfbSDimitry Andric SaveCallsiteInfo(&I, CalleeFunc); 2037*0fca6ea1SDimitry Andric } else if (FoundMultipleCalleeChains) 2038*0fca6ea1SDimitry Andric return false; 2039297eecfbSDimitry Andric } 2040297eecfbSDimitry Andric } 2041297eecfbSDimitry Andric 2042297eecfbSDimitry Andric return FoundSingleCalleeChain; 2043297eecfbSDimitry Andric } 2044297eecfbSDimitry Andric 2045297eecfbSDimitry Andric bool ModuleCallsiteContextGraph::calleeMatchesFunc( 2046297eecfbSDimitry Andric Instruction *Call, const Function *Func, const Function *CallerFunc, 2047297eecfbSDimitry Andric std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) { 204806c3fb27SDimitry Andric auto *CB = dyn_cast<CallBase>(Call); 204906c3fb27SDimitry Andric if (!CB->getCalledOperand()) 205006c3fb27SDimitry Andric return false; 205106c3fb27SDimitry Andric auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts(); 205206c3fb27SDimitry Andric auto *CalleeFunc = dyn_cast<Function>(CalleeVal); 205306c3fb27SDimitry Andric if (CalleeFunc == Func) 205406c3fb27SDimitry Andric return true; 205506c3fb27SDimitry Andric auto *Alias = dyn_cast<GlobalAlias>(CalleeVal); 2056297eecfbSDimitry Andric if (Alias && Alias->getAliasee() == Func) 2057297eecfbSDimitry Andric return true; 2058297eecfbSDimitry Andric 2059297eecfbSDimitry Andric // Recursively search for the profiled callee through tail calls starting with 2060297eecfbSDimitry Andric // the actual Callee. The discovered tail call chain is saved in 2061297eecfbSDimitry Andric // FoundCalleeChain, and we will fixup the graph to include these callsites 2062297eecfbSDimitry Andric // after returning. 2063297eecfbSDimitry Andric // FIXME: We will currently redo the same recursive walk if we find the same 2064297eecfbSDimitry Andric // mismatched callee from another callsite. We can improve this with more 2065297eecfbSDimitry Andric // bookkeeping of the created chain of new nodes for each mismatch. 2066297eecfbSDimitry Andric unsigned Depth = 1; 2067297eecfbSDimitry Andric bool FoundMultipleCalleeChains = false; 2068297eecfbSDimitry Andric if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal, Depth, 2069297eecfbSDimitry Andric FoundCalleeChain, 2070297eecfbSDimitry Andric FoundMultipleCalleeChains)) { 2071297eecfbSDimitry Andric LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: " 2072297eecfbSDimitry Andric << Func->getName() << " from " << CallerFunc->getName() 2073297eecfbSDimitry Andric << " that actually called " << CalleeVal->getName() 2074297eecfbSDimitry Andric << (FoundMultipleCalleeChains 2075297eecfbSDimitry Andric ? " (found multiple possible chains)" 2076297eecfbSDimitry Andric : "") 2077297eecfbSDimitry Andric << "\n"); 2078297eecfbSDimitry Andric if (FoundMultipleCalleeChains) 2079297eecfbSDimitry Andric FoundProfiledCalleeNonUniquelyCount++; 2080297eecfbSDimitry Andric return false; 208106c3fb27SDimitry Andric } 208206c3fb27SDimitry Andric 2083297eecfbSDimitry Andric return true; 2084297eecfbSDimitry Andric } 2085297eecfbSDimitry Andric 2086297eecfbSDimitry Andric bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls( 2087297eecfbSDimitry Andric ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth, 2088297eecfbSDimitry Andric std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain, 2089297eecfbSDimitry Andric bool &FoundMultipleCalleeChains) { 2090297eecfbSDimitry Andric // Stop recursive search if we have already explored the maximum specified 2091297eecfbSDimitry Andric // depth. 2092297eecfbSDimitry Andric if (Depth > TailCallSearchDepth) 2093297eecfbSDimitry Andric return false; 2094297eecfbSDimitry Andric 2095297eecfbSDimitry Andric auto CreateAndSaveCallsiteInfo = [&](ValueInfo Callee, FunctionSummary *FS) { 2096297eecfbSDimitry Andric // Make a CallsiteInfo for each discovered callee, if one hasn't already 2097297eecfbSDimitry Andric // been synthesized. 2098297eecfbSDimitry Andric if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) || 2099297eecfbSDimitry Andric !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee)) 2100297eecfbSDimitry Andric // StackIds is empty (we don't have debug info available in the index for 2101297eecfbSDimitry Andric // these callsites) 2102297eecfbSDimitry Andric FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] = 2103297eecfbSDimitry Andric std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>()); 2104297eecfbSDimitry Andric CallsiteInfo *NewCallsiteInfo = 2105297eecfbSDimitry Andric FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee].get(); 2106297eecfbSDimitry Andric FoundCalleeChain.push_back({NewCallsiteInfo, FS}); 2107297eecfbSDimitry Andric }; 2108297eecfbSDimitry Andric 2109297eecfbSDimitry Andric // Look for tail calls in this function, and check if they either call the 2110297eecfbSDimitry Andric // profiled callee directly, or indirectly (via a recursive search). 2111297eecfbSDimitry Andric // Only succeed if there is a single unique tail call chain found between the 2112297eecfbSDimitry Andric // profiled caller and callee, otherwise we could perform incorrect cloning. 2113297eecfbSDimitry Andric bool FoundSingleCalleeChain = false; 2114297eecfbSDimitry Andric for (auto &S : CurCallee.getSummaryList()) { 2115297eecfbSDimitry Andric if (!GlobalValue::isLocalLinkage(S->linkage()) && 2116297eecfbSDimitry Andric !isPrevailing(CurCallee.getGUID(), S.get())) 2117297eecfbSDimitry Andric continue; 2118297eecfbSDimitry Andric auto *FS = dyn_cast<FunctionSummary>(S->getBaseObject()); 2119297eecfbSDimitry Andric if (!FS) 2120297eecfbSDimitry Andric continue; 2121297eecfbSDimitry Andric auto FSVI = CurCallee; 2122297eecfbSDimitry Andric auto *AS = dyn_cast<AliasSummary>(S.get()); 2123297eecfbSDimitry Andric if (AS) 2124297eecfbSDimitry Andric FSVI = AS->getAliaseeVI(); 2125297eecfbSDimitry Andric for (auto &CallEdge : FS->calls()) { 2126297eecfbSDimitry Andric if (!CallEdge.second.hasTailCall()) 2127297eecfbSDimitry Andric continue; 2128297eecfbSDimitry Andric if (CallEdge.first == ProfiledCallee) { 2129297eecfbSDimitry Andric if (FoundSingleCalleeChain) { 2130297eecfbSDimitry Andric FoundMultipleCalleeChains = true; 2131297eecfbSDimitry Andric return false; 2132297eecfbSDimitry Andric } 2133297eecfbSDimitry Andric FoundSingleCalleeChain = true; 2134297eecfbSDimitry Andric FoundProfiledCalleeCount++; 2135297eecfbSDimitry Andric FoundProfiledCalleeDepth += Depth; 2136297eecfbSDimitry Andric if (Depth > FoundProfiledCalleeMaxDepth) 2137297eecfbSDimitry Andric FoundProfiledCalleeMaxDepth = Depth; 2138297eecfbSDimitry Andric CreateAndSaveCallsiteInfo(CallEdge.first, FS); 2139297eecfbSDimitry Andric // Add FS to FSToVIMap in case it isn't already there. 2140297eecfbSDimitry Andric assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI); 2141297eecfbSDimitry Andric FSToVIMap[FS] = FSVI; 2142297eecfbSDimitry Andric } else if (findProfiledCalleeThroughTailCalls( 2143297eecfbSDimitry Andric ProfiledCallee, CallEdge.first, Depth + 1, 2144297eecfbSDimitry Andric FoundCalleeChain, FoundMultipleCalleeChains)) { 2145*0fca6ea1SDimitry Andric // findProfiledCalleeThroughTailCalls should not have returned 2146*0fca6ea1SDimitry Andric // true if FoundMultipleCalleeChains. 2147*0fca6ea1SDimitry Andric assert(!FoundMultipleCalleeChains); 2148297eecfbSDimitry Andric if (FoundSingleCalleeChain) { 2149297eecfbSDimitry Andric FoundMultipleCalleeChains = true; 2150297eecfbSDimitry Andric return false; 2151297eecfbSDimitry Andric } 2152297eecfbSDimitry Andric FoundSingleCalleeChain = true; 2153297eecfbSDimitry Andric CreateAndSaveCallsiteInfo(CallEdge.first, FS); 2154297eecfbSDimitry Andric // Add FS to FSToVIMap in case it isn't already there. 2155297eecfbSDimitry Andric assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI); 2156297eecfbSDimitry Andric FSToVIMap[FS] = FSVI; 2157*0fca6ea1SDimitry Andric } else if (FoundMultipleCalleeChains) 2158*0fca6ea1SDimitry Andric return false; 2159297eecfbSDimitry Andric } 2160297eecfbSDimitry Andric } 2161297eecfbSDimitry Andric 2162297eecfbSDimitry Andric return FoundSingleCalleeChain; 2163297eecfbSDimitry Andric } 2164297eecfbSDimitry Andric 2165297eecfbSDimitry Andric bool IndexCallsiteContextGraph::calleeMatchesFunc( 2166297eecfbSDimitry Andric IndexCall &Call, const FunctionSummary *Func, 2167297eecfbSDimitry Andric const FunctionSummary *CallerFunc, 2168297eecfbSDimitry Andric std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) { 216906c3fb27SDimitry Andric ValueInfo Callee = 217006c3fb27SDimitry Andric dyn_cast_if_present<CallsiteInfo *>(Call.getBase())->Callee; 217106c3fb27SDimitry Andric // If there is no summary list then this is a call to an externally defined 217206c3fb27SDimitry Andric // symbol. 217306c3fb27SDimitry Andric AliasSummary *Alias = 217406c3fb27SDimitry Andric Callee.getSummaryList().empty() 217506c3fb27SDimitry Andric ? nullptr 217606c3fb27SDimitry Andric : dyn_cast<AliasSummary>(Callee.getSummaryList()[0].get()); 217706c3fb27SDimitry Andric assert(FSToVIMap.count(Func)); 2178297eecfbSDimitry Andric auto FuncVI = FSToVIMap[Func]; 2179297eecfbSDimitry Andric if (Callee == FuncVI || 218006c3fb27SDimitry Andric // If callee is an alias, check the aliasee, since only function 218106c3fb27SDimitry Andric // summary base objects will contain the stack node summaries and thus 218206c3fb27SDimitry Andric // get a context node. 2183297eecfbSDimitry Andric (Alias && Alias->getAliaseeVI() == FuncVI)) 2184297eecfbSDimitry Andric return true; 2185297eecfbSDimitry Andric 2186297eecfbSDimitry Andric // Recursively search for the profiled callee through tail calls starting with 2187297eecfbSDimitry Andric // the actual Callee. The discovered tail call chain is saved in 2188297eecfbSDimitry Andric // FoundCalleeChain, and we will fixup the graph to include these callsites 2189297eecfbSDimitry Andric // after returning. 2190297eecfbSDimitry Andric // FIXME: We will currently redo the same recursive walk if we find the same 2191297eecfbSDimitry Andric // mismatched callee from another callsite. We can improve this with more 2192297eecfbSDimitry Andric // bookkeeping of the created chain of new nodes for each mismatch. 2193297eecfbSDimitry Andric unsigned Depth = 1; 2194297eecfbSDimitry Andric bool FoundMultipleCalleeChains = false; 2195297eecfbSDimitry Andric if (!findProfiledCalleeThroughTailCalls( 2196297eecfbSDimitry Andric FuncVI, Callee, Depth, FoundCalleeChain, FoundMultipleCalleeChains)) { 2197297eecfbSDimitry Andric LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: " << FuncVI 2198297eecfbSDimitry Andric << " from " << FSToVIMap[CallerFunc] 2199297eecfbSDimitry Andric << " that actually called " << Callee 2200297eecfbSDimitry Andric << (FoundMultipleCalleeChains 2201297eecfbSDimitry Andric ? " (found multiple possible chains)" 2202297eecfbSDimitry Andric : "") 2203297eecfbSDimitry Andric << "\n"); 2204297eecfbSDimitry Andric if (FoundMultipleCalleeChains) 2205297eecfbSDimitry Andric FoundProfiledCalleeNonUniquelyCount++; 2206297eecfbSDimitry Andric return false; 2207297eecfbSDimitry Andric } 2208297eecfbSDimitry Andric 2209297eecfbSDimitry Andric return true; 221006c3fb27SDimitry Andric } 221106c3fb27SDimitry Andric 221206c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 221306c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump() 221406c3fb27SDimitry Andric const { 221506c3fb27SDimitry Andric print(dbgs()); 221606c3fb27SDimitry Andric dbgs() << "\n"; 221706c3fb27SDimitry Andric } 221806c3fb27SDimitry Andric 221906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 222006c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print( 222106c3fb27SDimitry Andric raw_ostream &OS) const { 222206c3fb27SDimitry Andric OS << "Node " << this << "\n"; 222306c3fb27SDimitry Andric OS << "\t"; 222406c3fb27SDimitry Andric printCall(OS); 222506c3fb27SDimitry Andric if (Recursive) 222606c3fb27SDimitry Andric OS << " (recursive)"; 222706c3fb27SDimitry Andric OS << "\n"; 222806c3fb27SDimitry Andric OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n"; 222906c3fb27SDimitry Andric OS << "\tContextIds:"; 2230*0fca6ea1SDimitry Andric // Make a copy of the computed context ids that we can sort for stability. 2231*0fca6ea1SDimitry Andric auto ContextIds = getContextIds(); 223206c3fb27SDimitry Andric std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end()); 223306c3fb27SDimitry Andric std::sort(SortedIds.begin(), SortedIds.end()); 223406c3fb27SDimitry Andric for (auto Id : SortedIds) 223506c3fb27SDimitry Andric OS << " " << Id; 223606c3fb27SDimitry Andric OS << "\n"; 223706c3fb27SDimitry Andric OS << "\tCalleeEdges:\n"; 223806c3fb27SDimitry Andric for (auto &Edge : CalleeEdges) 223906c3fb27SDimitry Andric OS << "\t\t" << *Edge << "\n"; 224006c3fb27SDimitry Andric OS << "\tCallerEdges:\n"; 224106c3fb27SDimitry Andric for (auto &Edge : CallerEdges) 224206c3fb27SDimitry Andric OS << "\t\t" << *Edge << "\n"; 224306c3fb27SDimitry Andric if (!Clones.empty()) { 224406c3fb27SDimitry Andric OS << "\tClones: "; 224506c3fb27SDimitry Andric FieldSeparator FS; 224606c3fb27SDimitry Andric for (auto *Clone : Clones) 224706c3fb27SDimitry Andric OS << FS << Clone; 224806c3fb27SDimitry Andric OS << "\n"; 224906c3fb27SDimitry Andric } else if (CloneOf) { 225006c3fb27SDimitry Andric OS << "\tClone of " << CloneOf << "\n"; 225106c3fb27SDimitry Andric } 225206c3fb27SDimitry Andric } 225306c3fb27SDimitry Andric 225406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 225506c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump() 225606c3fb27SDimitry Andric const { 225706c3fb27SDimitry Andric print(dbgs()); 225806c3fb27SDimitry Andric dbgs() << "\n"; 225906c3fb27SDimitry Andric } 226006c3fb27SDimitry Andric 226106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 226206c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print( 226306c3fb27SDimitry Andric raw_ostream &OS) const { 226406c3fb27SDimitry Andric OS << "Edge from Callee " << Callee << " to Caller: " << Caller 226506c3fb27SDimitry Andric << " AllocTypes: " << getAllocTypeString(AllocTypes); 226606c3fb27SDimitry Andric OS << " ContextIds:"; 226706c3fb27SDimitry Andric std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end()); 226806c3fb27SDimitry Andric std::sort(SortedIds.begin(), SortedIds.end()); 226906c3fb27SDimitry Andric for (auto Id : SortedIds) 227006c3fb27SDimitry Andric OS << " " << Id; 227106c3fb27SDimitry Andric } 227206c3fb27SDimitry Andric 227306c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 227406c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump() const { 227506c3fb27SDimitry Andric print(dbgs()); 227606c3fb27SDimitry Andric } 227706c3fb27SDimitry Andric 227806c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 227906c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print( 228006c3fb27SDimitry Andric raw_ostream &OS) const { 228106c3fb27SDimitry Andric OS << "Callsite Context Graph:\n"; 228206c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 228306c3fb27SDimitry Andric for (const auto Node : nodes<GraphType>(this)) { 228406c3fb27SDimitry Andric if (Node->isRemoved()) 228506c3fb27SDimitry Andric continue; 228606c3fb27SDimitry Andric Node->print(OS); 228706c3fb27SDimitry Andric OS << "\n"; 228806c3fb27SDimitry Andric } 228906c3fb27SDimitry Andric } 229006c3fb27SDimitry Andric 229106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 2292*0fca6ea1SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes( 2293*0fca6ea1SDimitry Andric raw_ostream &OS) const { 2294*0fca6ea1SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 2295*0fca6ea1SDimitry Andric for (const auto Node : nodes<GraphType>(this)) { 229606c3fb27SDimitry Andric if (Node->isRemoved()) 2297*0fca6ea1SDimitry Andric continue; 2298*0fca6ea1SDimitry Andric if (!Node->IsAllocation) 2299*0fca6ea1SDimitry Andric continue; 2300*0fca6ea1SDimitry Andric DenseSet<uint32_t> ContextIds = Node->getContextIds(); 2301*0fca6ea1SDimitry Andric std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end()); 2302*0fca6ea1SDimitry Andric std::sort(SortedIds.begin(), SortedIds.end()); 2303*0fca6ea1SDimitry Andric for (auto Id : SortedIds) { 2304*0fca6ea1SDimitry Andric auto SizeI = ContextIdToTotalSize.find(Id); 2305*0fca6ea1SDimitry Andric assert(SizeI != ContextIdToTotalSize.end()); 2306*0fca6ea1SDimitry Andric auto TypeI = ContextIdToAllocationType.find(Id); 2307*0fca6ea1SDimitry Andric assert(TypeI != ContextIdToAllocationType.end()); 2308*0fca6ea1SDimitry Andric OS << getAllocTypeString((uint8_t)TypeI->second) << " context " << Id 2309*0fca6ea1SDimitry Andric << " with total size " << SizeI->second << " is " 2310*0fca6ea1SDimitry Andric << getAllocTypeString(Node->AllocTypes) << " after cloning\n"; 231106c3fb27SDimitry Andric } 231206c3fb27SDimitry Andric } 231306c3fb27SDimitry Andric } 231406c3fb27SDimitry Andric 231506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 231606c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const { 231706c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 231806c3fb27SDimitry Andric for (const auto Node : nodes<GraphType>(this)) { 231906c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false); 232006c3fb27SDimitry Andric for (auto &Edge : Node->CallerEdges) 232106c3fb27SDimitry Andric checkEdge<DerivedCCG, FuncTy, CallTy>(Edge); 232206c3fb27SDimitry Andric } 232306c3fb27SDimitry Andric } 232406c3fb27SDimitry Andric 232506c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 232606c3fb27SDimitry Andric struct GraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> { 232706c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 232806c3fb27SDimitry Andric using NodeRef = const ContextNode<DerivedCCG, FuncTy, CallTy> *; 232906c3fb27SDimitry Andric 233006c3fb27SDimitry Andric using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>; 233106c3fb27SDimitry Andric static NodeRef getNode(const NodePtrTy &P) { return P.get(); } 233206c3fb27SDimitry Andric 233306c3fb27SDimitry Andric using nodes_iterator = 233406c3fb27SDimitry Andric mapped_iterator<typename std::vector<NodePtrTy>::const_iterator, 233506c3fb27SDimitry Andric decltype(&getNode)>; 233606c3fb27SDimitry Andric 233706c3fb27SDimitry Andric static nodes_iterator nodes_begin(GraphType G) { 233806c3fb27SDimitry Andric return nodes_iterator(G->NodeOwner.begin(), &getNode); 233906c3fb27SDimitry Andric } 234006c3fb27SDimitry Andric 234106c3fb27SDimitry Andric static nodes_iterator nodes_end(GraphType G) { 234206c3fb27SDimitry Andric return nodes_iterator(G->NodeOwner.end(), &getNode); 234306c3fb27SDimitry Andric } 234406c3fb27SDimitry Andric 234506c3fb27SDimitry Andric static NodeRef getEntryNode(GraphType G) { 234606c3fb27SDimitry Andric return G->NodeOwner.begin()->get(); 234706c3fb27SDimitry Andric } 234806c3fb27SDimitry Andric 234906c3fb27SDimitry Andric using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>; 235006c3fb27SDimitry Andric static const ContextNode<DerivedCCG, FuncTy, CallTy> * 235106c3fb27SDimitry Andric GetCallee(const EdgePtrTy &P) { 235206c3fb27SDimitry Andric return P->Callee; 235306c3fb27SDimitry Andric } 235406c3fb27SDimitry Andric 235506c3fb27SDimitry Andric using ChildIteratorType = 235606c3fb27SDimitry Andric mapped_iterator<typename std::vector<std::shared_ptr<ContextEdge< 235706c3fb27SDimitry Andric DerivedCCG, FuncTy, CallTy>>>::const_iterator, 235806c3fb27SDimitry Andric decltype(&GetCallee)>; 235906c3fb27SDimitry Andric 236006c3fb27SDimitry Andric static ChildIteratorType child_begin(NodeRef N) { 236106c3fb27SDimitry Andric return ChildIteratorType(N->CalleeEdges.begin(), &GetCallee); 236206c3fb27SDimitry Andric } 236306c3fb27SDimitry Andric 236406c3fb27SDimitry Andric static ChildIteratorType child_end(NodeRef N) { 236506c3fb27SDimitry Andric return ChildIteratorType(N->CalleeEdges.end(), &GetCallee); 236606c3fb27SDimitry Andric } 236706c3fb27SDimitry Andric }; 236806c3fb27SDimitry Andric 236906c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 237006c3fb27SDimitry Andric struct DOTGraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> 237106c3fb27SDimitry Andric : public DefaultDOTGraphTraits { 237206c3fb27SDimitry Andric DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} 237306c3fb27SDimitry Andric 237406c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 237506c3fb27SDimitry Andric using GTraits = GraphTraits<GraphType>; 237606c3fb27SDimitry Andric using NodeRef = typename GTraits::NodeRef; 237706c3fb27SDimitry Andric using ChildIteratorType = typename GTraits::ChildIteratorType; 237806c3fb27SDimitry Andric 237906c3fb27SDimitry Andric static std::string getNodeLabel(NodeRef Node, GraphType G) { 238006c3fb27SDimitry Andric std::string LabelString = 238106c3fb27SDimitry Andric (Twine("OrigId: ") + (Node->IsAllocation ? "Alloc" : "") + 238206c3fb27SDimitry Andric Twine(Node->OrigStackOrAllocId)) 238306c3fb27SDimitry Andric .str(); 238406c3fb27SDimitry Andric LabelString += "\n"; 238506c3fb27SDimitry Andric if (Node->hasCall()) { 238606c3fb27SDimitry Andric auto Func = G->NodeToCallingFunc.find(Node); 238706c3fb27SDimitry Andric assert(Func != G->NodeToCallingFunc.end()); 238806c3fb27SDimitry Andric LabelString += 238906c3fb27SDimitry Andric G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo()); 239006c3fb27SDimitry Andric } else { 239106c3fb27SDimitry Andric LabelString += "null call"; 239206c3fb27SDimitry Andric if (Node->Recursive) 239306c3fb27SDimitry Andric LabelString += " (recursive)"; 239406c3fb27SDimitry Andric else 239506c3fb27SDimitry Andric LabelString += " (external)"; 239606c3fb27SDimitry Andric } 239706c3fb27SDimitry Andric return LabelString; 239806c3fb27SDimitry Andric } 239906c3fb27SDimitry Andric 240006c3fb27SDimitry Andric static std::string getNodeAttributes(NodeRef Node, GraphType) { 240106c3fb27SDimitry Andric std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " + 2402*0fca6ea1SDimitry Andric getContextIds(Node->getContextIds()) + "\"") 240306c3fb27SDimitry Andric .str(); 240406c3fb27SDimitry Andric AttributeString += 240506c3fb27SDimitry Andric (Twine(",fillcolor=\"") + getColor(Node->AllocTypes) + "\"").str(); 240606c3fb27SDimitry Andric AttributeString += ",style=\"filled\""; 240706c3fb27SDimitry Andric if (Node->CloneOf) { 240806c3fb27SDimitry Andric AttributeString += ",color=\"blue\""; 240906c3fb27SDimitry Andric AttributeString += ",style=\"filled,bold,dashed\""; 241006c3fb27SDimitry Andric } else 241106c3fb27SDimitry Andric AttributeString += ",style=\"filled\""; 241206c3fb27SDimitry Andric return AttributeString; 241306c3fb27SDimitry Andric } 241406c3fb27SDimitry Andric 241506c3fb27SDimitry Andric static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, 241606c3fb27SDimitry Andric GraphType) { 241706c3fb27SDimitry Andric auto &Edge = *(ChildIter.getCurrent()); 241806c3fb27SDimitry Andric return (Twine("tooltip=\"") + getContextIds(Edge->ContextIds) + "\"" + 241906c3fb27SDimitry Andric Twine(",fillcolor=\"") + getColor(Edge->AllocTypes) + "\"") 242006c3fb27SDimitry Andric .str(); 242106c3fb27SDimitry Andric } 242206c3fb27SDimitry Andric 242306c3fb27SDimitry Andric // Since the NodeOwners list includes nodes that are no longer connected to 242406c3fb27SDimitry Andric // the graph, skip them here. 242506c3fb27SDimitry Andric static bool isNodeHidden(NodeRef Node, GraphType) { 242606c3fb27SDimitry Andric return Node->isRemoved(); 242706c3fb27SDimitry Andric } 242806c3fb27SDimitry Andric 242906c3fb27SDimitry Andric private: 243006c3fb27SDimitry Andric static std::string getContextIds(const DenseSet<uint32_t> &ContextIds) { 243106c3fb27SDimitry Andric std::string IdString = "ContextIds:"; 243206c3fb27SDimitry Andric if (ContextIds.size() < 100) { 243306c3fb27SDimitry Andric std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end()); 243406c3fb27SDimitry Andric std::sort(SortedIds.begin(), SortedIds.end()); 243506c3fb27SDimitry Andric for (auto Id : SortedIds) 243606c3fb27SDimitry Andric IdString += (" " + Twine(Id)).str(); 243706c3fb27SDimitry Andric } else { 243806c3fb27SDimitry Andric IdString += (" (" + Twine(ContextIds.size()) + " ids)").str(); 243906c3fb27SDimitry Andric } 244006c3fb27SDimitry Andric return IdString; 244106c3fb27SDimitry Andric } 244206c3fb27SDimitry Andric 244306c3fb27SDimitry Andric static std::string getColor(uint8_t AllocTypes) { 244406c3fb27SDimitry Andric if (AllocTypes == (uint8_t)AllocationType::NotCold) 244506c3fb27SDimitry Andric // Color "brown1" actually looks like a lighter red. 244606c3fb27SDimitry Andric return "brown1"; 244706c3fb27SDimitry Andric if (AllocTypes == (uint8_t)AllocationType::Cold) 244806c3fb27SDimitry Andric return "cyan"; 244906c3fb27SDimitry Andric if (AllocTypes == 245006c3fb27SDimitry Andric ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold)) 245106c3fb27SDimitry Andric // Lighter purple. 245206c3fb27SDimitry Andric return "mediumorchid1"; 245306c3fb27SDimitry Andric return "gray"; 245406c3fb27SDimitry Andric } 245506c3fb27SDimitry Andric 245606c3fb27SDimitry Andric static std::string getNodeId(NodeRef Node) { 245706c3fb27SDimitry Andric std::stringstream SStream; 245806c3fb27SDimitry Andric SStream << std::hex << "N0x" << (unsigned long long)Node; 245906c3fb27SDimitry Andric std::string Result = SStream.str(); 246006c3fb27SDimitry Andric return Result; 246106c3fb27SDimitry Andric } 246206c3fb27SDimitry Andric }; 246306c3fb27SDimitry Andric 246406c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 246506c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot( 246606c3fb27SDimitry Andric std::string Label) const { 246706c3fb27SDimitry Andric WriteGraph(this, "", false, Label, 246806c3fb27SDimitry Andric DotFilePathPrefix + "ccg." + Label + ".dot"); 246906c3fb27SDimitry Andric } 247006c3fb27SDimitry Andric 247106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 247206c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 247306c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone( 2474*0fca6ea1SDimitry Andric const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI, 2475*0fca6ea1SDimitry Andric DenseSet<uint32_t> ContextIdsToMove) { 247606c3fb27SDimitry Andric ContextNode *Node = Edge->Callee; 247706c3fb27SDimitry Andric NodeOwner.push_back( 247806c3fb27SDimitry Andric std::make_unique<ContextNode>(Node->IsAllocation, Node->Call)); 247906c3fb27SDimitry Andric ContextNode *Clone = NodeOwner.back().get(); 248006c3fb27SDimitry Andric Node->addClone(Clone); 248106c3fb27SDimitry Andric assert(NodeToCallingFunc.count(Node)); 248206c3fb27SDimitry Andric NodeToCallingFunc[Clone] = NodeToCallingFunc[Node]; 2483*0fca6ea1SDimitry Andric moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true, 2484*0fca6ea1SDimitry Andric ContextIdsToMove); 248506c3fb27SDimitry Andric return Clone; 248606c3fb27SDimitry Andric } 248706c3fb27SDimitry Andric 248806c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 248906c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: 249006c3fb27SDimitry Andric moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge, 249106c3fb27SDimitry Andric ContextNode *NewCallee, EdgeIter *CallerEdgeI, 2492*0fca6ea1SDimitry Andric bool NewClone, 2493*0fca6ea1SDimitry Andric DenseSet<uint32_t> ContextIdsToMove) { 249406c3fb27SDimitry Andric // NewCallee and Edge's current callee must be clones of the same original 249506c3fb27SDimitry Andric // node (Edge's current callee may be the original node too). 249606c3fb27SDimitry Andric assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode()); 2497*0fca6ea1SDimitry Andric 249806c3fb27SDimitry Andric ContextNode *OldCallee = Edge->Callee; 2499*0fca6ea1SDimitry Andric 2500*0fca6ea1SDimitry Andric // We might already have an edge to the new callee from earlier cloning for a 2501*0fca6ea1SDimitry Andric // different allocation. If one exists we will reuse it. 2502*0fca6ea1SDimitry Andric auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(Edge->Caller); 2503*0fca6ea1SDimitry Andric 2504*0fca6ea1SDimitry Andric // Callers will pass an empty ContextIdsToMove set when they want to move the 2505*0fca6ea1SDimitry Andric // edge. Copy in Edge's ids for simplicity. 2506*0fca6ea1SDimitry Andric if (ContextIdsToMove.empty()) 2507*0fca6ea1SDimitry Andric ContextIdsToMove = Edge->getContextIds(); 2508*0fca6ea1SDimitry Andric 2509*0fca6ea1SDimitry Andric // If we are moving all of Edge's ids, then just move the whole Edge. 2510*0fca6ea1SDimitry Andric // Otherwise only move the specified subset, to a new edge if needed. 2511*0fca6ea1SDimitry Andric if (Edge->getContextIds().size() == ContextIdsToMove.size()) { 2512*0fca6ea1SDimitry Andric // Moving the whole Edge. 251306c3fb27SDimitry Andric if (CallerEdgeI) 251406c3fb27SDimitry Andric *CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI); 251506c3fb27SDimitry Andric else 251606c3fb27SDimitry Andric OldCallee->eraseCallerEdge(Edge.get()); 2517*0fca6ea1SDimitry Andric if (ExistingEdgeToNewCallee) { 2518*0fca6ea1SDimitry Andric // Since we already have an edge to NewCallee, simply move the ids 2519*0fca6ea1SDimitry Andric // onto it, and remove the existing Edge. 2520*0fca6ea1SDimitry Andric ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.begin(), 2521*0fca6ea1SDimitry Andric ContextIdsToMove.end()); 2522*0fca6ea1SDimitry Andric ExistingEdgeToNewCallee->AllocTypes |= Edge->AllocTypes; 2523*0fca6ea1SDimitry Andric assert(Edge->ContextIds == ContextIdsToMove); 2524*0fca6ea1SDimitry Andric Edge->ContextIds.clear(); 2525*0fca6ea1SDimitry Andric Edge->AllocTypes = (uint8_t)AllocationType::None; 2526*0fca6ea1SDimitry Andric Edge->Caller->eraseCalleeEdge(Edge.get()); 2527*0fca6ea1SDimitry Andric } else { 2528*0fca6ea1SDimitry Andric // Otherwise just reconnect Edge to NewCallee. 252906c3fb27SDimitry Andric Edge->Callee = NewCallee; 253006c3fb27SDimitry Andric NewCallee->CallerEdges.push_back(Edge); 2531*0fca6ea1SDimitry Andric // Don't need to update Edge's context ids since we are simply 2532*0fca6ea1SDimitry Andric // reconnecting it. 2533*0fca6ea1SDimitry Andric } 2534*0fca6ea1SDimitry Andric // In either case, need to update the alloc types on New Callee. 253506c3fb27SDimitry Andric NewCallee->AllocTypes |= Edge->AllocTypes; 2536*0fca6ea1SDimitry Andric } else { 2537*0fca6ea1SDimitry Andric // Only moving a subset of Edge's ids. 2538*0fca6ea1SDimitry Andric if (CallerEdgeI) 2539*0fca6ea1SDimitry Andric ++CallerEdgeI; 2540*0fca6ea1SDimitry Andric // Compute the alloc type of the subset of ids being moved. 2541*0fca6ea1SDimitry Andric auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove); 2542*0fca6ea1SDimitry Andric if (ExistingEdgeToNewCallee) { 2543*0fca6ea1SDimitry Andric // Since we already have an edge to NewCallee, simply move the ids 2544*0fca6ea1SDimitry Andric // onto it. 2545*0fca6ea1SDimitry Andric ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.begin(), 2546*0fca6ea1SDimitry Andric ContextIdsToMove.end()); 2547*0fca6ea1SDimitry Andric ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType; 2548*0fca6ea1SDimitry Andric } else { 2549*0fca6ea1SDimitry Andric // Otherwise, create a new edge to NewCallee for the ids being moved. 2550*0fca6ea1SDimitry Andric auto NewEdge = std::make_shared<ContextEdge>( 2551*0fca6ea1SDimitry Andric NewCallee, Edge->Caller, CallerEdgeAllocType, ContextIdsToMove); 2552*0fca6ea1SDimitry Andric Edge->Caller->CalleeEdges.push_back(NewEdge); 2553*0fca6ea1SDimitry Andric NewCallee->CallerEdges.push_back(NewEdge); 2554*0fca6ea1SDimitry Andric } 2555*0fca6ea1SDimitry Andric // In either case, need to update the alloc types on NewCallee, and remove 2556*0fca6ea1SDimitry Andric // those ids and update the alloc type on the original Edge. 2557*0fca6ea1SDimitry Andric NewCallee->AllocTypes |= CallerEdgeAllocType; 2558*0fca6ea1SDimitry Andric set_subtract(Edge->ContextIds, ContextIdsToMove); 2559*0fca6ea1SDimitry Andric Edge->AllocTypes = computeAllocType(Edge->ContextIds); 2560*0fca6ea1SDimitry Andric } 256106c3fb27SDimitry Andric // Now walk the old callee node's callee edges and move Edge's context ids 256206c3fb27SDimitry Andric // over to the corresponding edge into the clone (which is created here if 256306c3fb27SDimitry Andric // this is a newly created clone). 256406c3fb27SDimitry Andric for (auto &OldCalleeEdge : OldCallee->CalleeEdges) { 256506c3fb27SDimitry Andric // The context ids moving to the new callee are the subset of this edge's 256606c3fb27SDimitry Andric // context ids and the context ids on the caller edge being moved. 256706c3fb27SDimitry Andric DenseSet<uint32_t> EdgeContextIdsToMove = 2568*0fca6ea1SDimitry Andric set_intersection(OldCalleeEdge->getContextIds(), ContextIdsToMove); 256906c3fb27SDimitry Andric set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove); 257006c3fb27SDimitry Andric OldCalleeEdge->AllocTypes = 257106c3fb27SDimitry Andric computeAllocType(OldCalleeEdge->getContextIds()); 257206c3fb27SDimitry Andric if (!NewClone) { 257306c3fb27SDimitry Andric // Update context ids / alloc type on corresponding edge to NewCallee. 257406c3fb27SDimitry Andric // There is a chance this may not exist if we are reusing an existing 257506c3fb27SDimitry Andric // clone, specifically during function assignment, where we would have 257606c3fb27SDimitry Andric // removed none type edges after creating the clone. If we can't find 257706c3fb27SDimitry Andric // a corresponding edge there, fall through to the cloning below. 257806c3fb27SDimitry Andric if (auto *NewCalleeEdge = 257906c3fb27SDimitry Andric NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) { 258006c3fb27SDimitry Andric NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.begin(), 258106c3fb27SDimitry Andric EdgeContextIdsToMove.end()); 258206c3fb27SDimitry Andric NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove); 258306c3fb27SDimitry Andric continue; 258406c3fb27SDimitry Andric } 258506c3fb27SDimitry Andric } 258606c3fb27SDimitry Andric auto NewEdge = std::make_shared<ContextEdge>( 258706c3fb27SDimitry Andric OldCalleeEdge->Callee, NewCallee, 258806c3fb27SDimitry Andric computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove); 258906c3fb27SDimitry Andric NewCallee->CalleeEdges.push_back(NewEdge); 259006c3fb27SDimitry Andric NewEdge->Callee->CallerEdges.push_back(NewEdge); 259106c3fb27SDimitry Andric } 2592*0fca6ea1SDimitry Andric // Recompute the node alloc type now that its callee edges have been 2593*0fca6ea1SDimitry Andric // updated (since we will compute from those edges). 2594*0fca6ea1SDimitry Andric OldCallee->AllocTypes = OldCallee->computeAllocType(); 2595*0fca6ea1SDimitry Andric // OldCallee alloc type should be None iff its context id set is now empty. 2596*0fca6ea1SDimitry Andric assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) == 2597*0fca6ea1SDimitry Andric OldCallee->emptyContextIds()); 259806c3fb27SDimitry Andric if (VerifyCCG) { 259906c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee, /*CheckEdges=*/false); 260006c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee, /*CheckEdges=*/false); 260106c3fb27SDimitry Andric for (const auto &OldCalleeEdge : OldCallee->CalleeEdges) 260206c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee, 260306c3fb27SDimitry Andric /*CheckEdges=*/false); 260406c3fb27SDimitry Andric for (const auto &NewCalleeEdge : NewCallee->CalleeEdges) 260506c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee, 260606c3fb27SDimitry Andric /*CheckEdges=*/false); 260706c3fb27SDimitry Andric } 260806c3fb27SDimitry Andric } 260906c3fb27SDimitry Andric 261006c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 2611*0fca6ea1SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: 2612*0fca6ea1SDimitry Andric recursivelyRemoveNoneTypeCalleeEdges( 2613*0fca6ea1SDimitry Andric ContextNode *Node, DenseSet<const ContextNode *> &Visited) { 2614*0fca6ea1SDimitry Andric auto Inserted = Visited.insert(Node); 2615*0fca6ea1SDimitry Andric if (!Inserted.second) 2616*0fca6ea1SDimitry Andric return; 2617*0fca6ea1SDimitry Andric 2618*0fca6ea1SDimitry Andric removeNoneTypeCalleeEdges(Node); 2619*0fca6ea1SDimitry Andric 2620*0fca6ea1SDimitry Andric for (auto *Clone : Node->Clones) 2621*0fca6ea1SDimitry Andric recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited); 2622*0fca6ea1SDimitry Andric 2623*0fca6ea1SDimitry Andric // The recursive call may remove some of this Node's caller edges. 2624*0fca6ea1SDimitry Andric // Iterate over a copy and skip any that were removed. 2625*0fca6ea1SDimitry Andric auto CallerEdges = Node->CallerEdges; 2626*0fca6ea1SDimitry Andric for (auto &Edge : CallerEdges) { 2627*0fca6ea1SDimitry Andric // Skip any that have been removed by an earlier recursive call. 2628*0fca6ea1SDimitry Andric if (Edge->Callee == nullptr && Edge->Caller == nullptr) { 2629*0fca6ea1SDimitry Andric assert(!is_contained(Node->CallerEdges, Edge)); 2630*0fca6ea1SDimitry Andric continue; 2631*0fca6ea1SDimitry Andric } 2632*0fca6ea1SDimitry Andric recursivelyRemoveNoneTypeCalleeEdges(Edge->Caller, Visited); 2633*0fca6ea1SDimitry Andric } 2634*0fca6ea1SDimitry Andric } 2635*0fca6ea1SDimitry Andric 2636*0fca6ea1SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 263706c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() { 263806c3fb27SDimitry Andric DenseSet<const ContextNode *> Visited; 2639*0fca6ea1SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) { 2640*0fca6ea1SDimitry Andric Visited.clear(); 2641*0fca6ea1SDimitry Andric identifyClones(Entry.second, Visited, Entry.second->getContextIds()); 2642*0fca6ea1SDimitry Andric } 2643*0fca6ea1SDimitry Andric Visited.clear(); 264406c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) 2645*0fca6ea1SDimitry Andric recursivelyRemoveNoneTypeCalleeEdges(Entry.second, Visited); 2646*0fca6ea1SDimitry Andric if (VerifyCCG) 2647*0fca6ea1SDimitry Andric check(); 264806c3fb27SDimitry Andric } 264906c3fb27SDimitry Andric 265006c3fb27SDimitry Andric // helper function to check an AllocType is cold or notcold or both. 265106c3fb27SDimitry Andric bool checkColdOrNotCold(uint8_t AllocType) { 265206c3fb27SDimitry Andric return (AllocType == (uint8_t)AllocationType::Cold) || 265306c3fb27SDimitry Andric (AllocType == (uint8_t)AllocationType::NotCold) || 265406c3fb27SDimitry Andric (AllocType == 265506c3fb27SDimitry Andric ((uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold)); 265606c3fb27SDimitry Andric } 265706c3fb27SDimitry Andric 265806c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 265906c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones( 2660*0fca6ea1SDimitry Andric ContextNode *Node, DenseSet<const ContextNode *> &Visited, 2661*0fca6ea1SDimitry Andric const DenseSet<uint32_t> &AllocContextIds) { 266206c3fb27SDimitry Andric if (VerifyNodes) 2663*0fca6ea1SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false); 266406c3fb27SDimitry Andric assert(!Node->CloneOf); 266506c3fb27SDimitry Andric 266606c3fb27SDimitry Andric // If Node as a null call, then either it wasn't found in the module (regular 266706c3fb27SDimitry Andric // LTO) or summary index (ThinLTO), or there were other conditions blocking 266806c3fb27SDimitry Andric // cloning (e.g. recursion, calls multiple targets, etc). 266906c3fb27SDimitry Andric // Do this here so that we don't try to recursively clone callers below, which 267006c3fb27SDimitry Andric // isn't useful at least for this node. 267106c3fb27SDimitry Andric if (!Node->hasCall()) 267206c3fb27SDimitry Andric return; 267306c3fb27SDimitry Andric 267406c3fb27SDimitry Andric #ifndef NDEBUG 267506c3fb27SDimitry Andric auto Insert = 267606c3fb27SDimitry Andric #endif 267706c3fb27SDimitry Andric Visited.insert(Node); 267806c3fb27SDimitry Andric // We should not have visited this node yet. 267906c3fb27SDimitry Andric assert(Insert.second); 268006c3fb27SDimitry Andric // The recursive call to identifyClones may delete the current edge from the 268106c3fb27SDimitry Andric // CallerEdges vector. Make a copy and iterate on that, simpler than passing 268206c3fb27SDimitry Andric // in an iterator and having recursive call erase from it. Other edges may 268306c3fb27SDimitry Andric // also get removed during the recursion, which will have null Callee and 268406c3fb27SDimitry Andric // Caller pointers (and are deleted later), so we skip those below. 268506c3fb27SDimitry Andric { 268606c3fb27SDimitry Andric auto CallerEdges = Node->CallerEdges; 268706c3fb27SDimitry Andric for (auto &Edge : CallerEdges) { 268806c3fb27SDimitry Andric // Skip any that have been removed by an earlier recursive call. 268906c3fb27SDimitry Andric if (Edge->Callee == nullptr && Edge->Caller == nullptr) { 26905f757f3fSDimitry Andric assert(!llvm::count(Node->CallerEdges, Edge)); 269106c3fb27SDimitry Andric continue; 269206c3fb27SDimitry Andric } 269306c3fb27SDimitry Andric // Ignore any caller we previously visited via another edge. 269406c3fb27SDimitry Andric if (!Visited.count(Edge->Caller) && !Edge->Caller->CloneOf) { 2695*0fca6ea1SDimitry Andric identifyClones(Edge->Caller, Visited, AllocContextIds); 269606c3fb27SDimitry Andric } 269706c3fb27SDimitry Andric } 269806c3fb27SDimitry Andric } 269906c3fb27SDimitry Andric 270006c3fb27SDimitry Andric // Check if we reached an unambiguous call or have have only a single caller. 270106c3fb27SDimitry Andric if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1) 270206c3fb27SDimitry Andric return; 270306c3fb27SDimitry Andric 270406c3fb27SDimitry Andric // We need to clone. 270506c3fb27SDimitry Andric 270606c3fb27SDimitry Andric // Try to keep the original version as alloc type NotCold. This will make 270706c3fb27SDimitry Andric // cases with indirect calls or any other situation with an unknown call to 270806c3fb27SDimitry Andric // the original function get the default behavior. We do this by sorting the 270906c3fb27SDimitry Andric // CallerEdges of the Node we will clone by alloc type. 271006c3fb27SDimitry Andric // 271106c3fb27SDimitry Andric // Give NotCold edge the lowest sort priority so those edges are at the end of 271206c3fb27SDimitry Andric // the caller edges vector, and stay on the original version (since the below 271306c3fb27SDimitry Andric // code clones greedily until it finds all remaining edges have the same type 271406c3fb27SDimitry Andric // and leaves the remaining ones on the original Node). 271506c3fb27SDimitry Andric // 271606c3fb27SDimitry Andric // We shouldn't actually have any None type edges, so the sorting priority for 271706c3fb27SDimitry Andric // that is arbitrary, and we assert in that case below. 271806c3fb27SDimitry Andric const unsigned AllocTypeCloningPriority[] = {/*None*/ 3, /*NotCold*/ 4, 271906c3fb27SDimitry Andric /*Cold*/ 1, 272006c3fb27SDimitry Andric /*NotColdCold*/ 2}; 272106c3fb27SDimitry Andric std::stable_sort(Node->CallerEdges.begin(), Node->CallerEdges.end(), 272206c3fb27SDimitry Andric [&](const std::shared_ptr<ContextEdge> &A, 272306c3fb27SDimitry Andric const std::shared_ptr<ContextEdge> &B) { 2724*0fca6ea1SDimitry Andric // Nodes with non-empty context ids should be sorted before 2725*0fca6ea1SDimitry Andric // those with empty context ids. 2726*0fca6ea1SDimitry Andric if (A->ContextIds.empty()) 2727*0fca6ea1SDimitry Andric // Either B ContextIds are non-empty (in which case we 2728*0fca6ea1SDimitry Andric // should return false because B < A), or B ContextIds 2729*0fca6ea1SDimitry Andric // are empty, in which case they are equal, and we should 2730*0fca6ea1SDimitry Andric // maintain the original relative ordering. 2731*0fca6ea1SDimitry Andric return false; 2732*0fca6ea1SDimitry Andric if (B->ContextIds.empty()) 2733*0fca6ea1SDimitry Andric return true; 273406c3fb27SDimitry Andric 273506c3fb27SDimitry Andric if (A->AllocTypes == B->AllocTypes) 273606c3fb27SDimitry Andric // Use the first context id for each edge as a 273706c3fb27SDimitry Andric // tie-breaker. 273806c3fb27SDimitry Andric return *A->ContextIds.begin() < *B->ContextIds.begin(); 273906c3fb27SDimitry Andric return AllocTypeCloningPriority[A->AllocTypes] < 274006c3fb27SDimitry Andric AllocTypeCloningPriority[B->AllocTypes]; 274106c3fb27SDimitry Andric }); 274206c3fb27SDimitry Andric 274306c3fb27SDimitry Andric assert(Node->AllocTypes != (uint8_t)AllocationType::None); 274406c3fb27SDimitry Andric 274506c3fb27SDimitry Andric // Iterate until we find no more opportunities for disambiguating the alloc 274606c3fb27SDimitry Andric // types via cloning. In most cases this loop will terminate once the Node 274706c3fb27SDimitry Andric // has a single allocation type, in which case no more cloning is needed. 274806c3fb27SDimitry Andric // We need to be able to remove Edge from CallerEdges, so need to adjust 274906c3fb27SDimitry Andric // iterator inside the loop. 275006c3fb27SDimitry Andric for (auto EI = Node->CallerEdges.begin(); EI != Node->CallerEdges.end();) { 275106c3fb27SDimitry Andric auto CallerEdge = *EI; 275206c3fb27SDimitry Andric 275306c3fb27SDimitry Andric // See if cloning the prior caller edge left this node with a single alloc 275406c3fb27SDimitry Andric // type or a single caller. In that case no more cloning of Node is needed. 275506c3fb27SDimitry Andric if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1) 275606c3fb27SDimitry Andric break; 275706c3fb27SDimitry Andric 2758*0fca6ea1SDimitry Andric // Only need to process the ids along this edge pertaining to the given 2759*0fca6ea1SDimitry Andric // allocation. 2760*0fca6ea1SDimitry Andric auto CallerEdgeContextsForAlloc = 2761*0fca6ea1SDimitry Andric set_intersection(CallerEdge->getContextIds(), AllocContextIds); 2762*0fca6ea1SDimitry Andric if (CallerEdgeContextsForAlloc.empty()) { 2763*0fca6ea1SDimitry Andric ++EI; 2764*0fca6ea1SDimitry Andric continue; 2765*0fca6ea1SDimitry Andric } 2766*0fca6ea1SDimitry Andric auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc); 2767*0fca6ea1SDimitry Andric 276806c3fb27SDimitry Andric // Compute the node callee edge alloc types corresponding to the context ids 276906c3fb27SDimitry Andric // for this caller edge. 277006c3fb27SDimitry Andric std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge; 277106c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge.reserve(Node->CalleeEdges.size()); 277206c3fb27SDimitry Andric for (auto &CalleeEdge : Node->CalleeEdges) 277306c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes( 2774*0fca6ea1SDimitry Andric CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc)); 277506c3fb27SDimitry Andric 277606c3fb27SDimitry Andric // Don't clone if doing so will not disambiguate any alloc types amongst 277706c3fb27SDimitry Andric // caller edges (including the callee edges that would be cloned). 277806c3fb27SDimitry Andric // Otherwise we will simply move all edges to the clone. 277906c3fb27SDimitry Andric // 278006c3fb27SDimitry Andric // First check if by cloning we will disambiguate the caller allocation 278106c3fb27SDimitry Andric // type from node's allocation type. Query allocTypeToUse so that we don't 278206c3fb27SDimitry Andric // bother cloning to distinguish NotCold+Cold from NotCold. Note that 278306c3fb27SDimitry Andric // neither of these should be None type. 278406c3fb27SDimitry Andric // 278506c3fb27SDimitry Andric // Then check if by cloning node at least one of the callee edges will be 278606c3fb27SDimitry Andric // disambiguated by splitting out different context ids. 278706c3fb27SDimitry Andric assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None); 278806c3fb27SDimitry Andric assert(Node->AllocTypes != (uint8_t)AllocationType::None); 2789*0fca6ea1SDimitry Andric if (allocTypeToUse(CallerAllocTypeForAlloc) == 279006c3fb27SDimitry Andric allocTypeToUse(Node->AllocTypes) && 279106c3fb27SDimitry Andric allocTypesMatch<DerivedCCG, FuncTy, CallTy>( 279206c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) { 279306c3fb27SDimitry Andric ++EI; 279406c3fb27SDimitry Andric continue; 279506c3fb27SDimitry Andric } 279606c3fb27SDimitry Andric 279706c3fb27SDimitry Andric // First see if we can use an existing clone. Check each clone and its 279806c3fb27SDimitry Andric // callee edges for matching alloc types. 279906c3fb27SDimitry Andric ContextNode *Clone = nullptr; 280006c3fb27SDimitry Andric for (auto *CurClone : Node->Clones) { 280106c3fb27SDimitry Andric if (allocTypeToUse(CurClone->AllocTypes) != 2802*0fca6ea1SDimitry Andric allocTypeToUse(CallerAllocTypeForAlloc)) 280306c3fb27SDimitry Andric continue; 280406c3fb27SDimitry Andric 280506c3fb27SDimitry Andric if (!allocTypesMatch<DerivedCCG, FuncTy, CallTy>( 280606c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge, CurClone->CalleeEdges)) 280706c3fb27SDimitry Andric continue; 280806c3fb27SDimitry Andric Clone = CurClone; 280906c3fb27SDimitry Andric break; 281006c3fb27SDimitry Andric } 281106c3fb27SDimitry Andric 281206c3fb27SDimitry Andric // The edge iterator is adjusted when we move the CallerEdge to the clone. 281306c3fb27SDimitry Andric if (Clone) 2814*0fca6ea1SDimitry Andric moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI, /*NewClone=*/false, 2815*0fca6ea1SDimitry Andric CallerEdgeContextsForAlloc); 281606c3fb27SDimitry Andric else 2817*0fca6ea1SDimitry Andric Clone = 2818*0fca6ea1SDimitry Andric moveEdgeToNewCalleeClone(CallerEdge, &EI, CallerEdgeContextsForAlloc); 281906c3fb27SDimitry Andric 282006c3fb27SDimitry Andric assert(EI == Node->CallerEdges.end() || 282106c3fb27SDimitry Andric Node->AllocTypes != (uint8_t)AllocationType::None); 282206c3fb27SDimitry Andric // Sanity check that no alloc types on clone or its edges are None. 282306c3fb27SDimitry Andric assert(Clone->AllocTypes != (uint8_t)AllocationType::None); 282406c3fb27SDimitry Andric } 282506c3fb27SDimitry Andric 282606c3fb27SDimitry Andric // We should still have some context ids on the original Node. 2827*0fca6ea1SDimitry Andric assert(!Node->emptyContextIds()); 282806c3fb27SDimitry Andric 282906c3fb27SDimitry Andric // Sanity check that no alloc types on node or edges are None. 283006c3fb27SDimitry Andric assert(Node->AllocTypes != (uint8_t)AllocationType::None); 283106c3fb27SDimitry Andric 283206c3fb27SDimitry Andric if (VerifyNodes) 2833*0fca6ea1SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false); 283406c3fb27SDimitry Andric } 283506c3fb27SDimitry Andric 283606c3fb27SDimitry Andric void ModuleCallsiteContextGraph::updateAllocationCall( 283706c3fb27SDimitry Andric CallInfo &Call, AllocationType AllocType) { 283806c3fb27SDimitry Andric std::string AllocTypeString = getAllocTypeAttributeString(AllocType); 283906c3fb27SDimitry Andric auto A = llvm::Attribute::get(Call.call()->getFunction()->getContext(), 284006c3fb27SDimitry Andric "memprof", AllocTypeString); 284106c3fb27SDimitry Andric cast<CallBase>(Call.call())->addFnAttr(A); 284206c3fb27SDimitry Andric OREGetter(Call.call()->getFunction()) 284306c3fb27SDimitry Andric .emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", Call.call()) 284406c3fb27SDimitry Andric << ore::NV("AllocationCall", Call.call()) << " in clone " 284506c3fb27SDimitry Andric << ore::NV("Caller", Call.call()->getFunction()) 284606c3fb27SDimitry Andric << " marked with memprof allocation attribute " 284706c3fb27SDimitry Andric << ore::NV("Attribute", AllocTypeString)); 284806c3fb27SDimitry Andric } 284906c3fb27SDimitry Andric 285006c3fb27SDimitry Andric void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &Call, 285106c3fb27SDimitry Andric AllocationType AllocType) { 285206c3fb27SDimitry Andric auto *AI = Call.call().dyn_cast<AllocInfo *>(); 285306c3fb27SDimitry Andric assert(AI); 285406c3fb27SDimitry Andric assert(AI->Versions.size() > Call.cloneNo()); 285506c3fb27SDimitry Andric AI->Versions[Call.cloneNo()] = (uint8_t)AllocType; 285606c3fb27SDimitry Andric } 285706c3fb27SDimitry Andric 285806c3fb27SDimitry Andric void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall, 285906c3fb27SDimitry Andric FuncInfo CalleeFunc) { 286006c3fb27SDimitry Andric if (CalleeFunc.cloneNo() > 0) 286106c3fb27SDimitry Andric cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func()); 286206c3fb27SDimitry Andric OREGetter(CallerCall.call()->getFunction()) 286306c3fb27SDimitry Andric .emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call()) 286406c3fb27SDimitry Andric << ore::NV("Call", CallerCall.call()) << " in clone " 286506c3fb27SDimitry Andric << ore::NV("Caller", CallerCall.call()->getFunction()) 286606c3fb27SDimitry Andric << " assigned to call function clone " 286706c3fb27SDimitry Andric << ore::NV("Callee", CalleeFunc.func())); 286806c3fb27SDimitry Andric } 286906c3fb27SDimitry Andric 287006c3fb27SDimitry Andric void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall, 287106c3fb27SDimitry Andric FuncInfo CalleeFunc) { 287206c3fb27SDimitry Andric auto *CI = CallerCall.call().dyn_cast<CallsiteInfo *>(); 287306c3fb27SDimitry Andric assert(CI && 287406c3fb27SDimitry Andric "Caller cannot be an allocation which should not have profiled calls"); 287506c3fb27SDimitry Andric assert(CI->Clones.size() > CallerCall.cloneNo()); 287606c3fb27SDimitry Andric CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo(); 287706c3fb27SDimitry Andric } 287806c3fb27SDimitry Andric 287906c3fb27SDimitry Andric CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 288006c3fb27SDimitry Andric Instruction *>::FuncInfo 288106c3fb27SDimitry Andric ModuleCallsiteContextGraph::cloneFunctionForCallsite( 288206c3fb27SDimitry Andric FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap, 288306c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) { 288406c3fb27SDimitry Andric // Use existing LLVM facilities for cloning and obtaining Call in clone 288506c3fb27SDimitry Andric ValueToValueMapTy VMap; 288606c3fb27SDimitry Andric auto *NewFunc = CloneFunction(Func.func(), VMap); 288706c3fb27SDimitry Andric std::string Name = getMemProfFuncName(Func.func()->getName(), CloneNo); 288806c3fb27SDimitry Andric assert(!Func.func()->getParent()->getFunction(Name)); 288906c3fb27SDimitry Andric NewFunc->setName(Name); 289006c3fb27SDimitry Andric for (auto &Inst : CallsWithMetadataInFunc) { 289106c3fb27SDimitry Andric // This map always has the initial version in it. 289206c3fb27SDimitry Andric assert(Inst.cloneNo() == 0); 289306c3fb27SDimitry Andric CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo}; 289406c3fb27SDimitry Andric } 289506c3fb27SDimitry Andric OREGetter(Func.func()) 289606c3fb27SDimitry Andric .emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", Func.func()) 289706c3fb27SDimitry Andric << "created clone " << ore::NV("NewFunction", NewFunc)); 289806c3fb27SDimitry Andric return {NewFunc, CloneNo}; 289906c3fb27SDimitry Andric } 290006c3fb27SDimitry Andric 290106c3fb27SDimitry Andric CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 290206c3fb27SDimitry Andric IndexCall>::FuncInfo 290306c3fb27SDimitry Andric IndexCallsiteContextGraph::cloneFunctionForCallsite( 290406c3fb27SDimitry Andric FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap, 290506c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) { 290606c3fb27SDimitry Andric // Check how many clones we have of Call (and therefore function). 290706c3fb27SDimitry Andric // The next clone number is the current size of versions array. 290806c3fb27SDimitry Andric // Confirm this matches the CloneNo provided by the caller, which is based on 290906c3fb27SDimitry Andric // the number of function clones we have. 291006c3fb27SDimitry Andric assert(CloneNo == 291106c3fb27SDimitry Andric (Call.call().is<AllocInfo *>() 291206c3fb27SDimitry Andric ? Call.call().dyn_cast<AllocInfo *>()->Versions.size() 291306c3fb27SDimitry Andric : Call.call().dyn_cast<CallsiteInfo *>()->Clones.size())); 291406c3fb27SDimitry Andric // Walk all the instructions in this function. Create a new version for 291506c3fb27SDimitry Andric // each (by adding an entry to the Versions/Clones summary array), and copy 291606c3fb27SDimitry Andric // over the version being called for the function clone being cloned here. 291706c3fb27SDimitry Andric // Additionally, add an entry to the CallMap for the new function clone, 291806c3fb27SDimitry Andric // mapping the original call (clone 0, what is in CallsWithMetadataInFunc) 291906c3fb27SDimitry Andric // to the new call clone. 292006c3fb27SDimitry Andric for (auto &Inst : CallsWithMetadataInFunc) { 292106c3fb27SDimitry Andric // This map always has the initial version in it. 292206c3fb27SDimitry Andric assert(Inst.cloneNo() == 0); 292306c3fb27SDimitry Andric if (auto *AI = Inst.call().dyn_cast<AllocInfo *>()) { 292406c3fb27SDimitry Andric assert(AI->Versions.size() == CloneNo); 292506c3fb27SDimitry Andric // We assign the allocation type later (in updateAllocationCall), just add 292606c3fb27SDimitry Andric // an entry for it here. 292706c3fb27SDimitry Andric AI->Versions.push_back(0); 292806c3fb27SDimitry Andric } else { 292906c3fb27SDimitry Andric auto *CI = Inst.call().dyn_cast<CallsiteInfo *>(); 293006c3fb27SDimitry Andric assert(CI && CI->Clones.size() == CloneNo); 293106c3fb27SDimitry Andric // We assign the clone number later (in updateCall), just add an entry for 293206c3fb27SDimitry Andric // it here. 293306c3fb27SDimitry Andric CI->Clones.push_back(0); 293406c3fb27SDimitry Andric } 293506c3fb27SDimitry Andric CallMap[Inst] = {Inst.call(), CloneNo}; 293606c3fb27SDimitry Andric } 293706c3fb27SDimitry Andric return {Func.func(), CloneNo}; 293806c3fb27SDimitry Andric } 293906c3fb27SDimitry Andric 294006c3fb27SDimitry Andric // This method assigns cloned callsites to functions, cloning the functions as 294106c3fb27SDimitry Andric // needed. The assignment is greedy and proceeds roughly as follows: 294206c3fb27SDimitry Andric // 294306c3fb27SDimitry Andric // For each function Func: 294406c3fb27SDimitry Andric // For each call with graph Node having clones: 294506c3fb27SDimitry Andric // Initialize ClonesWorklist to Node and its clones 294606c3fb27SDimitry Andric // Initialize NodeCloneCount to 0 294706c3fb27SDimitry Andric // While ClonesWorklist is not empty: 294806c3fb27SDimitry Andric // Clone = pop front ClonesWorklist 294906c3fb27SDimitry Andric // NodeCloneCount++ 295006c3fb27SDimitry Andric // If Func has been cloned less than NodeCloneCount times: 295106c3fb27SDimitry Andric // If NodeCloneCount is 1: 295206c3fb27SDimitry Andric // Assign Clone to original Func 295306c3fb27SDimitry Andric // Continue 295406c3fb27SDimitry Andric // Create a new function clone 295506c3fb27SDimitry Andric // If other callers not assigned to call a function clone yet: 295606c3fb27SDimitry Andric // Assign them to call new function clone 295706c3fb27SDimitry Andric // Continue 295806c3fb27SDimitry Andric // Assign any other caller calling the cloned version to new clone 295906c3fb27SDimitry Andric // 296006c3fb27SDimitry Andric // For each caller of Clone: 296106c3fb27SDimitry Andric // If caller is assigned to call a specific function clone: 296206c3fb27SDimitry Andric // If we cannot assign Clone to that function clone: 296306c3fb27SDimitry Andric // Create new callsite Clone NewClone 296406c3fb27SDimitry Andric // Add NewClone to ClonesWorklist 296506c3fb27SDimitry Andric // Continue 296606c3fb27SDimitry Andric // Assign Clone to existing caller's called function clone 296706c3fb27SDimitry Andric // Else: 296806c3fb27SDimitry Andric // If Clone not already assigned to a function clone: 296906c3fb27SDimitry Andric // Assign to first function clone without assignment 297006c3fb27SDimitry Andric // Assign caller to selected function clone 297106c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 297206c3fb27SDimitry Andric bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { 297306c3fb27SDimitry Andric bool Changed = false; 297406c3fb27SDimitry Andric 297506c3fb27SDimitry Andric // Keep track of the assignment of nodes (callsites) to function clones they 297606c3fb27SDimitry Andric // call. 297706c3fb27SDimitry Andric DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap; 297806c3fb27SDimitry Andric 297906c3fb27SDimitry Andric // Update caller node to call function version CalleeFunc, by recording the 298006c3fb27SDimitry Andric // assignment in CallsiteToCalleeFuncCloneMap. 298106c3fb27SDimitry Andric auto RecordCalleeFuncOfCallsite = [&](ContextNode *Caller, 298206c3fb27SDimitry Andric const FuncInfo &CalleeFunc) { 298306c3fb27SDimitry Andric assert(Caller->hasCall()); 298406c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[Caller] = CalleeFunc; 298506c3fb27SDimitry Andric }; 298606c3fb27SDimitry Andric 298706c3fb27SDimitry Andric // Walk all functions for which we saw calls with memprof metadata, and handle 298806c3fb27SDimitry Andric // cloning for each of its calls. 298906c3fb27SDimitry Andric for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) { 299006c3fb27SDimitry Andric FuncInfo OrigFunc(Func); 299106c3fb27SDimitry Andric // Map from each clone of OrigFunc to a map of remappings of each call of 299206c3fb27SDimitry Andric // interest (from original uncloned call to the corresponding cloned call in 299306c3fb27SDimitry Andric // that function clone). 299406c3fb27SDimitry Andric std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap; 299506c3fb27SDimitry Andric for (auto &Call : CallsWithMetadata) { 299606c3fb27SDimitry Andric ContextNode *Node = getNodeForInst(Call); 299706c3fb27SDimitry Andric // Skip call if we do not have a node for it (all uses of its stack ids 299806c3fb27SDimitry Andric // were either on inlined chains or pruned from the MIBs), or if we did 299906c3fb27SDimitry Andric // not create any clones for it. 300006c3fb27SDimitry Andric if (!Node || Node->Clones.empty()) 300106c3fb27SDimitry Andric continue; 300206c3fb27SDimitry Andric assert(Node->hasCall() && 300306c3fb27SDimitry Andric "Not having a call should have prevented cloning"); 300406c3fb27SDimitry Andric 300506c3fb27SDimitry Andric // Track the assignment of function clones to clones of the current 300606c3fb27SDimitry Andric // callsite Node being handled. 300706c3fb27SDimitry Andric std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap; 300806c3fb27SDimitry Andric 300906c3fb27SDimitry Andric // Assign callsite version CallsiteClone to function version FuncClone, 301006c3fb27SDimitry Andric // and also assign (possibly cloned) Call to CallsiteClone. 301106c3fb27SDimitry Andric auto AssignCallsiteCloneToFuncClone = [&](const FuncInfo &FuncClone, 301206c3fb27SDimitry Andric CallInfo &Call, 301306c3fb27SDimitry Andric ContextNode *CallsiteClone, 301406c3fb27SDimitry Andric bool IsAlloc) { 301506c3fb27SDimitry Andric // Record the clone of callsite node assigned to this function clone. 301606c3fb27SDimitry Andric FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone; 301706c3fb27SDimitry Andric 301806c3fb27SDimitry Andric assert(FuncClonesToCallMap.count(FuncClone)); 301906c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone]; 302006c3fb27SDimitry Andric CallInfo CallClone(Call); 302106c3fb27SDimitry Andric if (CallMap.count(Call)) 302206c3fb27SDimitry Andric CallClone = CallMap[Call]; 302306c3fb27SDimitry Andric CallsiteClone->setCall(CallClone); 302406c3fb27SDimitry Andric }; 302506c3fb27SDimitry Andric 302606c3fb27SDimitry Andric // Keep track of the clones of callsite Node that need to be assigned to 302706c3fb27SDimitry Andric // function clones. This list may be expanded in the loop body below if we 302806c3fb27SDimitry Andric // find additional cloning is required. 302906c3fb27SDimitry Andric std::deque<ContextNode *> ClonesWorklist; 303006c3fb27SDimitry Andric // Ignore original Node if we moved all of its contexts to clones. 3031*0fca6ea1SDimitry Andric if (!Node->emptyContextIds()) 303206c3fb27SDimitry Andric ClonesWorklist.push_back(Node); 303306c3fb27SDimitry Andric ClonesWorklist.insert(ClonesWorklist.end(), Node->Clones.begin(), 303406c3fb27SDimitry Andric Node->Clones.end()); 303506c3fb27SDimitry Andric 303606c3fb27SDimitry Andric // Now walk through all of the clones of this callsite Node that we need, 303706c3fb27SDimitry Andric // and determine the assignment to a corresponding clone of the current 303806c3fb27SDimitry Andric // function (creating new function clones as needed). 303906c3fb27SDimitry Andric unsigned NodeCloneCount = 0; 304006c3fb27SDimitry Andric while (!ClonesWorklist.empty()) { 304106c3fb27SDimitry Andric ContextNode *Clone = ClonesWorklist.front(); 304206c3fb27SDimitry Andric ClonesWorklist.pop_front(); 304306c3fb27SDimitry Andric NodeCloneCount++; 304406c3fb27SDimitry Andric if (VerifyNodes) 304506c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Clone); 304606c3fb27SDimitry Andric 304706c3fb27SDimitry Andric // Need to create a new function clone if we have more callsite clones 304806c3fb27SDimitry Andric // than existing function clones, which would have been assigned to an 304906c3fb27SDimitry Andric // earlier clone in the list (we assign callsite clones to function 305006c3fb27SDimitry Andric // clones greedily). 305106c3fb27SDimitry Andric if (FuncClonesToCallMap.size() < NodeCloneCount) { 305206c3fb27SDimitry Andric // If this is the first callsite copy, assign to original function. 305306c3fb27SDimitry Andric if (NodeCloneCount == 1) { 305406c3fb27SDimitry Andric // Since FuncClonesToCallMap is empty in this case, no clones have 305506c3fb27SDimitry Andric // been created for this function yet, and no callers should have 305606c3fb27SDimitry Andric // been assigned a function clone for this callee node yet. 305706c3fb27SDimitry Andric assert(llvm::none_of( 305806c3fb27SDimitry Andric Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) { 305906c3fb27SDimitry Andric return CallsiteToCalleeFuncCloneMap.count(E->Caller); 306006c3fb27SDimitry Andric })); 306106c3fb27SDimitry Andric // Initialize with empty call map, assign Clone to original function 306206c3fb27SDimitry Andric // and its callers, and skip to the next clone. 306306c3fb27SDimitry Andric FuncClonesToCallMap[OrigFunc] = {}; 306406c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 306506c3fb27SDimitry Andric OrigFunc, Call, Clone, 306606c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 306706c3fb27SDimitry Andric for (auto &CE : Clone->CallerEdges) { 306806c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 306906c3fb27SDimitry Andric if (!CE->Caller->hasCall()) 307006c3fb27SDimitry Andric continue; 307106c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(CE->Caller, OrigFunc); 307206c3fb27SDimitry Andric } 307306c3fb27SDimitry Andric continue; 307406c3fb27SDimitry Andric } 307506c3fb27SDimitry Andric 307606c3fb27SDimitry Andric // First locate which copy of OrigFunc to clone again. If a caller 307706c3fb27SDimitry Andric // of this callsite clone was already assigned to call a particular 307806c3fb27SDimitry Andric // function clone, we need to redirect all of those callers to the 307906c3fb27SDimitry Andric // new function clone, and update their other callees within this 308006c3fb27SDimitry Andric // function. 308106c3fb27SDimitry Andric FuncInfo PreviousAssignedFuncClone; 308206c3fb27SDimitry Andric auto EI = llvm::find_if( 308306c3fb27SDimitry Andric Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) { 308406c3fb27SDimitry Andric return CallsiteToCalleeFuncCloneMap.count(E->Caller); 308506c3fb27SDimitry Andric }); 308606c3fb27SDimitry Andric bool CallerAssignedToCloneOfFunc = false; 308706c3fb27SDimitry Andric if (EI != Clone->CallerEdges.end()) { 308806c3fb27SDimitry Andric const std::shared_ptr<ContextEdge> &Edge = *EI; 308906c3fb27SDimitry Andric PreviousAssignedFuncClone = 309006c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[Edge->Caller]; 309106c3fb27SDimitry Andric CallerAssignedToCloneOfFunc = true; 309206c3fb27SDimitry Andric } 309306c3fb27SDimitry Andric 309406c3fb27SDimitry Andric // Clone function and save it along with the CallInfo map created 309506c3fb27SDimitry Andric // during cloning in the FuncClonesToCallMap. 309606c3fb27SDimitry Andric std::map<CallInfo, CallInfo> NewCallMap; 309706c3fb27SDimitry Andric unsigned CloneNo = FuncClonesToCallMap.size(); 309806c3fb27SDimitry Andric assert(CloneNo > 0 && "Clone 0 is the original function, which " 309906c3fb27SDimitry Andric "should already exist in the map"); 310006c3fb27SDimitry Andric FuncInfo NewFuncClone = cloneFunctionForCallsite( 310106c3fb27SDimitry Andric OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo); 310206c3fb27SDimitry Andric FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap)); 310306c3fb27SDimitry Andric FunctionClonesAnalysis++; 310406c3fb27SDimitry Andric Changed = true; 310506c3fb27SDimitry Andric 310606c3fb27SDimitry Andric // If no caller callsites were already assigned to a clone of this 310706c3fb27SDimitry Andric // function, we can simply assign this clone to the new func clone 310806c3fb27SDimitry Andric // and update all callers to it, then skip to the next clone. 310906c3fb27SDimitry Andric if (!CallerAssignedToCloneOfFunc) { 311006c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 311106c3fb27SDimitry Andric NewFuncClone, Call, Clone, 311206c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 311306c3fb27SDimitry Andric for (auto &CE : Clone->CallerEdges) { 311406c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 311506c3fb27SDimitry Andric if (!CE->Caller->hasCall()) 311606c3fb27SDimitry Andric continue; 311706c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone); 311806c3fb27SDimitry Andric } 311906c3fb27SDimitry Andric continue; 312006c3fb27SDimitry Andric } 312106c3fb27SDimitry Andric 312206c3fb27SDimitry Andric // We may need to do additional node cloning in this case. 312306c3fb27SDimitry Andric // Reset the CallsiteToCalleeFuncCloneMap entry for any callers 312406c3fb27SDimitry Andric // that were previously assigned to call PreviousAssignedFuncClone, 312506c3fb27SDimitry Andric // to record that they now call NewFuncClone. 312606c3fb27SDimitry Andric for (auto CE : Clone->CallerEdges) { 3127297eecfbSDimitry Andric // Skip any that have been removed on an earlier iteration. 3128297eecfbSDimitry Andric if (!CE) 3129297eecfbSDimitry Andric continue; 313006c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 313106c3fb27SDimitry Andric if (!CE->Caller->hasCall()) 313206c3fb27SDimitry Andric continue; 313306c3fb27SDimitry Andric 313406c3fb27SDimitry Andric if (!CallsiteToCalleeFuncCloneMap.count(CE->Caller) || 313506c3fb27SDimitry Andric // We subsequently fall through to later handling that 313606c3fb27SDimitry Andric // will perform any additional cloning required for 313706c3fb27SDimitry Andric // callers that were calling other function clones. 313806c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[CE->Caller] != 313906c3fb27SDimitry Andric PreviousAssignedFuncClone) 314006c3fb27SDimitry Andric continue; 314106c3fb27SDimitry Andric 314206c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone); 314306c3fb27SDimitry Andric 314406c3fb27SDimitry Andric // If we are cloning a function that was already assigned to some 314506c3fb27SDimitry Andric // callers, then essentially we are creating new callsite clones 314606c3fb27SDimitry Andric // of the other callsites in that function that are reached by those 314706c3fb27SDimitry Andric // callers. Clone the other callees of the current callsite's caller 314806c3fb27SDimitry Andric // that were already assigned to PreviousAssignedFuncClone 314906c3fb27SDimitry Andric // accordingly. This is important since we subsequently update the 315006c3fb27SDimitry Andric // calls from the nodes in the graph and their assignments to callee 315106c3fb27SDimitry Andric // functions recorded in CallsiteToCalleeFuncCloneMap. 315206c3fb27SDimitry Andric for (auto CalleeEdge : CE->Caller->CalleeEdges) { 315306c3fb27SDimitry Andric // Skip any that have been removed on an earlier iteration when 315406c3fb27SDimitry Andric // cleaning up newly None type callee edges. 315506c3fb27SDimitry Andric if (!CalleeEdge) 315606c3fb27SDimitry Andric continue; 315706c3fb27SDimitry Andric ContextNode *Callee = CalleeEdge->Callee; 315806c3fb27SDimitry Andric // Skip the current callsite, we are looking for other 315906c3fb27SDimitry Andric // callsites Caller calls, as well as any that does not have a 316006c3fb27SDimitry Andric // recorded callsite Call. 316106c3fb27SDimitry Andric if (Callee == Clone || !Callee->hasCall()) 316206c3fb27SDimitry Andric continue; 316306c3fb27SDimitry Andric ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge); 316406c3fb27SDimitry Andric removeNoneTypeCalleeEdges(NewClone); 316506c3fb27SDimitry Andric // Moving the edge may have resulted in some none type 316606c3fb27SDimitry Andric // callee edges on the original Callee. 316706c3fb27SDimitry Andric removeNoneTypeCalleeEdges(Callee); 316806c3fb27SDimitry Andric assert(NewClone->AllocTypes != (uint8_t)AllocationType::None); 316906c3fb27SDimitry Andric // If the Callee node was already assigned to call a specific 317006c3fb27SDimitry Andric // function version, make sure its new clone is assigned to call 317106c3fb27SDimitry Andric // that same function clone. 317206c3fb27SDimitry Andric if (CallsiteToCalleeFuncCloneMap.count(Callee)) 317306c3fb27SDimitry Andric RecordCalleeFuncOfCallsite( 317406c3fb27SDimitry Andric NewClone, CallsiteToCalleeFuncCloneMap[Callee]); 317506c3fb27SDimitry Andric // Update NewClone with the new Call clone of this callsite's Call 317606c3fb27SDimitry Andric // created for the new function clone created earlier. 317706c3fb27SDimitry Andric // Recall that we have already ensured when building the graph 317806c3fb27SDimitry Andric // that each caller can only call callsites within the same 317906c3fb27SDimitry Andric // function, so we are guaranteed that Callee Call is in the 318006c3fb27SDimitry Andric // current OrigFunc. 318106c3fb27SDimitry Andric // CallMap is set up as indexed by original Call at clone 0. 318206c3fb27SDimitry Andric CallInfo OrigCall(Callee->getOrigNode()->Call); 318306c3fb27SDimitry Andric OrigCall.setCloneNo(0); 318406c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap = 318506c3fb27SDimitry Andric FuncClonesToCallMap[NewFuncClone]; 318606c3fb27SDimitry Andric assert(CallMap.count(OrigCall)); 318706c3fb27SDimitry Andric CallInfo NewCall(CallMap[OrigCall]); 318806c3fb27SDimitry Andric assert(NewCall); 318906c3fb27SDimitry Andric NewClone->setCall(NewCall); 319006c3fb27SDimitry Andric } 319106c3fb27SDimitry Andric } 319206c3fb27SDimitry Andric // Fall through to handling below to perform the recording of the 319306c3fb27SDimitry Andric // function for this callsite clone. This enables handling of cases 319406c3fb27SDimitry Andric // where the callers were assigned to different clones of a function. 319506c3fb27SDimitry Andric } 319606c3fb27SDimitry Andric 319706c3fb27SDimitry Andric // See if we can use existing function clone. Walk through 319806c3fb27SDimitry Andric // all caller edges to see if any have already been assigned to 319906c3fb27SDimitry Andric // a clone of this callsite's function. If we can use it, do so. If not, 320006c3fb27SDimitry Andric // because that function clone is already assigned to a different clone 320106c3fb27SDimitry Andric // of this callsite, then we need to clone again. 320206c3fb27SDimitry Andric // Basically, this checking is needed to handle the case where different 320306c3fb27SDimitry Andric // caller functions/callsites may need versions of this function 320406c3fb27SDimitry Andric // containing different mixes of callsite clones across the different 320506c3fb27SDimitry Andric // callsites within the function. If that happens, we need to create 320606c3fb27SDimitry Andric // additional function clones to handle the various combinations. 320706c3fb27SDimitry Andric // 320806c3fb27SDimitry Andric // Keep track of any new clones of this callsite created by the 320906c3fb27SDimitry Andric // following loop, as well as any existing clone that we decided to 321006c3fb27SDimitry Andric // assign this clone to. 321106c3fb27SDimitry Andric std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap; 321206c3fb27SDimitry Andric FuncInfo FuncCloneAssignedToCurCallsiteClone; 321306c3fb27SDimitry Andric // We need to be able to remove Edge from CallerEdges, so need to adjust 321406c3fb27SDimitry Andric // iterator in the loop. 321506c3fb27SDimitry Andric for (auto EI = Clone->CallerEdges.begin(); 321606c3fb27SDimitry Andric EI != Clone->CallerEdges.end();) { 321706c3fb27SDimitry Andric auto Edge = *EI; 321806c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 321906c3fb27SDimitry Andric if (!Edge->Caller->hasCall()) { 322006c3fb27SDimitry Andric EI++; 322106c3fb27SDimitry Andric continue; 322206c3fb27SDimitry Andric } 322306c3fb27SDimitry Andric // If this caller already assigned to call a version of OrigFunc, need 322406c3fb27SDimitry Andric // to ensure we can assign this callsite clone to that function clone. 322506c3fb27SDimitry Andric if (CallsiteToCalleeFuncCloneMap.count(Edge->Caller)) { 322606c3fb27SDimitry Andric FuncInfo FuncCloneCalledByCaller = 322706c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[Edge->Caller]; 322806c3fb27SDimitry Andric // First we need to confirm that this function clone is available 322906c3fb27SDimitry Andric // for use by this callsite node clone. 323006c3fb27SDimitry Andric // 323106c3fb27SDimitry Andric // While FuncCloneToCurNodeCloneMap is built only for this Node and 323206c3fb27SDimitry Andric // its callsite clones, one of those callsite clones X could have 323306c3fb27SDimitry Andric // been assigned to the same function clone called by Edge's caller 323406c3fb27SDimitry Andric // - if Edge's caller calls another callsite within Node's original 323506c3fb27SDimitry Andric // function, and that callsite has another caller reaching clone X. 323606c3fb27SDimitry Andric // We need to clone Node again in this case. 323706c3fb27SDimitry Andric if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) && 323806c3fb27SDimitry Andric FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] != 323906c3fb27SDimitry Andric Clone) || 324006c3fb27SDimitry Andric // Detect when we have multiple callers of this callsite that 324106c3fb27SDimitry Andric // have already been assigned to specific, and different, clones 324206c3fb27SDimitry Andric // of OrigFunc (due to other unrelated callsites in Func they 324306c3fb27SDimitry Andric // reach via call contexts). Is this Clone of callsite Node 324406c3fb27SDimitry Andric // assigned to a different clone of OrigFunc? If so, clone Node 324506c3fb27SDimitry Andric // again. 324606c3fb27SDimitry Andric (FuncCloneAssignedToCurCallsiteClone && 324706c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone != 324806c3fb27SDimitry Andric FuncCloneCalledByCaller)) { 324906c3fb27SDimitry Andric // We need to use a different newly created callsite clone, in 325006c3fb27SDimitry Andric // order to assign it to another new function clone on a 325106c3fb27SDimitry Andric // subsequent iteration over the Clones array (adjusted below). 325206c3fb27SDimitry Andric // Note we specifically do not reset the 325306c3fb27SDimitry Andric // CallsiteToCalleeFuncCloneMap entry for this caller, so that 325406c3fb27SDimitry Andric // when this new clone is processed later we know which version of 325506c3fb27SDimitry Andric // the function to copy (so that other callsite clones we have 325606c3fb27SDimitry Andric // assigned to that function clone are properly cloned over). See 325706c3fb27SDimitry Andric // comments in the function cloning handling earlier. 325806c3fb27SDimitry Andric 325906c3fb27SDimitry Andric // Check if we already have cloned this callsite again while 326006c3fb27SDimitry Andric // walking through caller edges, for a caller calling the same 326106c3fb27SDimitry Andric // function clone. If so, we can move this edge to that new clone 326206c3fb27SDimitry Andric // rather than creating yet another new clone. 326306c3fb27SDimitry Andric if (FuncCloneToNewCallsiteCloneMap.count( 326406c3fb27SDimitry Andric FuncCloneCalledByCaller)) { 326506c3fb27SDimitry Andric ContextNode *NewClone = 326606c3fb27SDimitry Andric FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller]; 326706c3fb27SDimitry Andric moveEdgeToExistingCalleeClone(Edge, NewClone, &EI); 326806c3fb27SDimitry Andric // Cleanup any none type edges cloned over. 326906c3fb27SDimitry Andric removeNoneTypeCalleeEdges(NewClone); 327006c3fb27SDimitry Andric } else { 327106c3fb27SDimitry Andric // Create a new callsite clone. 327206c3fb27SDimitry Andric ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge, &EI); 327306c3fb27SDimitry Andric removeNoneTypeCalleeEdges(NewClone); 327406c3fb27SDimitry Andric FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] = 327506c3fb27SDimitry Andric NewClone; 327606c3fb27SDimitry Andric // Add to list of clones and process later. 327706c3fb27SDimitry Andric ClonesWorklist.push_back(NewClone); 327806c3fb27SDimitry Andric assert(EI == Clone->CallerEdges.end() || 327906c3fb27SDimitry Andric Clone->AllocTypes != (uint8_t)AllocationType::None); 328006c3fb27SDimitry Andric assert(NewClone->AllocTypes != (uint8_t)AllocationType::None); 328106c3fb27SDimitry Andric } 328206c3fb27SDimitry Andric // Moving the caller edge may have resulted in some none type 328306c3fb27SDimitry Andric // callee edges. 328406c3fb27SDimitry Andric removeNoneTypeCalleeEdges(Clone); 328506c3fb27SDimitry Andric // We will handle the newly created callsite clone in a subsequent 328606c3fb27SDimitry Andric // iteration over this Node's Clones. Continue here since we 328706c3fb27SDimitry Andric // already adjusted iterator EI while moving the edge. 328806c3fb27SDimitry Andric continue; 328906c3fb27SDimitry Andric } 329006c3fb27SDimitry Andric 329106c3fb27SDimitry Andric // Otherwise, we can use the function clone already assigned to this 329206c3fb27SDimitry Andric // caller. 329306c3fb27SDimitry Andric if (!FuncCloneAssignedToCurCallsiteClone) { 329406c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller; 329506c3fb27SDimitry Andric // Assign Clone to FuncCloneCalledByCaller 329606c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 329706c3fb27SDimitry Andric FuncCloneCalledByCaller, Call, Clone, 329806c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 329906c3fb27SDimitry Andric } else 330006c3fb27SDimitry Andric // Don't need to do anything - callsite is already calling this 330106c3fb27SDimitry Andric // function clone. 330206c3fb27SDimitry Andric assert(FuncCloneAssignedToCurCallsiteClone == 330306c3fb27SDimitry Andric FuncCloneCalledByCaller); 330406c3fb27SDimitry Andric 330506c3fb27SDimitry Andric } else { 330606c3fb27SDimitry Andric // We have not already assigned this caller to a version of 330706c3fb27SDimitry Andric // OrigFunc. Do the assignment now. 330806c3fb27SDimitry Andric 330906c3fb27SDimitry Andric // First check if we have already assigned this callsite clone to a 331006c3fb27SDimitry Andric // clone of OrigFunc for another caller during this iteration over 331106c3fb27SDimitry Andric // its caller edges. 331206c3fb27SDimitry Andric if (!FuncCloneAssignedToCurCallsiteClone) { 331306c3fb27SDimitry Andric // Find first function in FuncClonesToCallMap without an assigned 331406c3fb27SDimitry Andric // clone of this callsite Node. We should always have one 331506c3fb27SDimitry Andric // available at this point due to the earlier cloning when the 331606c3fb27SDimitry Andric // FuncClonesToCallMap size was smaller than the clone number. 331706c3fb27SDimitry Andric for (auto &CF : FuncClonesToCallMap) { 331806c3fb27SDimitry Andric if (!FuncCloneToCurNodeCloneMap.count(CF.first)) { 331906c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone = CF.first; 332006c3fb27SDimitry Andric break; 332106c3fb27SDimitry Andric } 332206c3fb27SDimitry Andric } 332306c3fb27SDimitry Andric assert(FuncCloneAssignedToCurCallsiteClone); 332406c3fb27SDimitry Andric // Assign Clone to FuncCloneAssignedToCurCallsiteClone 332506c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 332606c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone, Call, Clone, 332706c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 332806c3fb27SDimitry Andric } else 332906c3fb27SDimitry Andric assert(FuncCloneToCurNodeCloneMap 333006c3fb27SDimitry Andric [FuncCloneAssignedToCurCallsiteClone] == Clone); 333106c3fb27SDimitry Andric // Update callers to record function version called. 333206c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(Edge->Caller, 333306c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone); 333406c3fb27SDimitry Andric } 333506c3fb27SDimitry Andric 333606c3fb27SDimitry Andric EI++; 333706c3fb27SDimitry Andric } 333806c3fb27SDimitry Andric } 333906c3fb27SDimitry Andric if (VerifyCCG) { 334006c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node); 334106c3fb27SDimitry Andric for (const auto &PE : Node->CalleeEdges) 334206c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee); 334306c3fb27SDimitry Andric for (const auto &CE : Node->CallerEdges) 334406c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller); 334506c3fb27SDimitry Andric for (auto *Clone : Node->Clones) { 334606c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Clone); 334706c3fb27SDimitry Andric for (const auto &PE : Clone->CalleeEdges) 334806c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee); 334906c3fb27SDimitry Andric for (const auto &CE : Clone->CallerEdges) 335006c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller); 335106c3fb27SDimitry Andric } 335206c3fb27SDimitry Andric } 335306c3fb27SDimitry Andric } 335406c3fb27SDimitry Andric } 335506c3fb27SDimitry Andric 335606c3fb27SDimitry Andric auto UpdateCalls = [&](ContextNode *Node, 335706c3fb27SDimitry Andric DenseSet<const ContextNode *> &Visited, 335806c3fb27SDimitry Andric auto &&UpdateCalls) { 335906c3fb27SDimitry Andric auto Inserted = Visited.insert(Node); 336006c3fb27SDimitry Andric if (!Inserted.second) 336106c3fb27SDimitry Andric return; 336206c3fb27SDimitry Andric 336306c3fb27SDimitry Andric for (auto *Clone : Node->Clones) 336406c3fb27SDimitry Andric UpdateCalls(Clone, Visited, UpdateCalls); 336506c3fb27SDimitry Andric 336606c3fb27SDimitry Andric for (auto &Edge : Node->CallerEdges) 336706c3fb27SDimitry Andric UpdateCalls(Edge->Caller, Visited, UpdateCalls); 336806c3fb27SDimitry Andric 336906c3fb27SDimitry Andric // Skip if either no call to update, or if we ended up with no context ids 337006c3fb27SDimitry Andric // (we moved all edges onto other clones). 3371*0fca6ea1SDimitry Andric if (!Node->hasCall() || Node->emptyContextIds()) 337206c3fb27SDimitry Andric return; 337306c3fb27SDimitry Andric 337406c3fb27SDimitry Andric if (Node->IsAllocation) { 337506c3fb27SDimitry Andric updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes)); 337606c3fb27SDimitry Andric return; 337706c3fb27SDimitry Andric } 337806c3fb27SDimitry Andric 337906c3fb27SDimitry Andric if (!CallsiteToCalleeFuncCloneMap.count(Node)) 338006c3fb27SDimitry Andric return; 338106c3fb27SDimitry Andric 338206c3fb27SDimitry Andric auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node]; 338306c3fb27SDimitry Andric updateCall(Node->Call, CalleeFunc); 338406c3fb27SDimitry Andric }; 338506c3fb27SDimitry Andric 338606c3fb27SDimitry Andric // Performs DFS traversal starting from allocation nodes to update calls to 338706c3fb27SDimitry Andric // reflect cloning decisions recorded earlier. For regular LTO this will 338806c3fb27SDimitry Andric // update the actual calls in the IR to call the appropriate function clone 338906c3fb27SDimitry Andric // (and add attributes to allocation calls), whereas for ThinLTO the decisions 339006c3fb27SDimitry Andric // are recorded in the summary entries. 339106c3fb27SDimitry Andric DenseSet<const ContextNode *> Visited; 339206c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) 339306c3fb27SDimitry Andric UpdateCalls(Entry.second, Visited, UpdateCalls); 339406c3fb27SDimitry Andric 339506c3fb27SDimitry Andric return Changed; 339606c3fb27SDimitry Andric } 339706c3fb27SDimitry Andric 339806c3fb27SDimitry Andric static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones( 339906c3fb27SDimitry Andric Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, 340006c3fb27SDimitry Andric std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>> 340106c3fb27SDimitry Andric &FuncToAliasMap) { 340206c3fb27SDimitry Andric // The first "clone" is the original copy, we should only call this if we 340306c3fb27SDimitry Andric // needed to create new clones. 340406c3fb27SDimitry Andric assert(NumClones > 1); 340506c3fb27SDimitry Andric SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; 340606c3fb27SDimitry Andric VMaps.reserve(NumClones - 1); 340706c3fb27SDimitry Andric FunctionsClonedThinBackend++; 340806c3fb27SDimitry Andric for (unsigned I = 1; I < NumClones; I++) { 340906c3fb27SDimitry Andric VMaps.emplace_back(std::make_unique<ValueToValueMapTy>()); 341006c3fb27SDimitry Andric auto *NewF = CloneFunction(&F, *VMaps.back()); 341106c3fb27SDimitry Andric FunctionClonesThinBackend++; 341206c3fb27SDimitry Andric // Strip memprof and callsite metadata from clone as they are no longer 341306c3fb27SDimitry Andric // needed. 341406c3fb27SDimitry Andric for (auto &BB : *NewF) { 341506c3fb27SDimitry Andric for (auto &Inst : BB) { 341606c3fb27SDimitry Andric Inst.setMetadata(LLVMContext::MD_memprof, nullptr); 341706c3fb27SDimitry Andric Inst.setMetadata(LLVMContext::MD_callsite, nullptr); 341806c3fb27SDimitry Andric } 341906c3fb27SDimitry Andric } 342006c3fb27SDimitry Andric std::string Name = getMemProfFuncName(F.getName(), I); 342106c3fb27SDimitry Andric auto *PrevF = M.getFunction(Name); 342206c3fb27SDimitry Andric if (PrevF) { 342306c3fb27SDimitry Andric // We might have created this when adjusting callsite in another 342406c3fb27SDimitry Andric // function. It should be a declaration. 342506c3fb27SDimitry Andric assert(PrevF->isDeclaration()); 342606c3fb27SDimitry Andric NewF->takeName(PrevF); 342706c3fb27SDimitry Andric PrevF->replaceAllUsesWith(NewF); 342806c3fb27SDimitry Andric PrevF->eraseFromParent(); 342906c3fb27SDimitry Andric } else 343006c3fb27SDimitry Andric NewF->setName(Name); 343106c3fb27SDimitry Andric ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) 343206c3fb27SDimitry Andric << "created clone " << ore::NV("NewFunction", NewF)); 343306c3fb27SDimitry Andric 343406c3fb27SDimitry Andric // Now handle aliases to this function, and clone those as well. 343506c3fb27SDimitry Andric if (!FuncToAliasMap.count(&F)) 343606c3fb27SDimitry Andric continue; 343706c3fb27SDimitry Andric for (auto *A : FuncToAliasMap[&F]) { 343806c3fb27SDimitry Andric std::string Name = getMemProfFuncName(A->getName(), I); 343906c3fb27SDimitry Andric auto *PrevA = M.getNamedAlias(Name); 344006c3fb27SDimitry Andric auto *NewA = GlobalAlias::create(A->getValueType(), 344106c3fb27SDimitry Andric A->getType()->getPointerAddressSpace(), 344206c3fb27SDimitry Andric A->getLinkage(), Name, NewF); 344306c3fb27SDimitry Andric NewA->copyAttributesFrom(A); 344406c3fb27SDimitry Andric if (PrevA) { 344506c3fb27SDimitry Andric // We might have created this when adjusting callsite in another 344606c3fb27SDimitry Andric // function. It should be a declaration. 344706c3fb27SDimitry Andric assert(PrevA->isDeclaration()); 344806c3fb27SDimitry Andric NewA->takeName(PrevA); 344906c3fb27SDimitry Andric PrevA->replaceAllUsesWith(NewA); 345006c3fb27SDimitry Andric PrevA->eraseFromParent(); 345106c3fb27SDimitry Andric } 345206c3fb27SDimitry Andric } 345306c3fb27SDimitry Andric } 345406c3fb27SDimitry Andric return VMaps; 345506c3fb27SDimitry Andric } 345606c3fb27SDimitry Andric 345706c3fb27SDimitry Andric // Locate the summary for F. This is complicated by the fact that it might 345806c3fb27SDimitry Andric // have been internalized or promoted. 345906c3fb27SDimitry Andric static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, 346006c3fb27SDimitry Andric const ModuleSummaryIndex *ImportSummary) { 346106c3fb27SDimitry Andric // FIXME: Ideally we would retain the original GUID in some fashion on the 346206c3fb27SDimitry Andric // function (e.g. as metadata), but for now do our best to locate the 346306c3fb27SDimitry Andric // summary without that information. 346406c3fb27SDimitry Andric ValueInfo TheFnVI = ImportSummary->getValueInfo(F.getGUID()); 346506c3fb27SDimitry Andric if (!TheFnVI) 346606c3fb27SDimitry Andric // See if theFn was internalized, by checking index directly with 346706c3fb27SDimitry Andric // original name (this avoids the name adjustment done by getGUID() for 346806c3fb27SDimitry Andric // internal symbols). 346906c3fb27SDimitry Andric TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(F.getName())); 347006c3fb27SDimitry Andric if (TheFnVI) 347106c3fb27SDimitry Andric return TheFnVI; 347206c3fb27SDimitry Andric // Now query with the original name before any promotion was performed. 347306c3fb27SDimitry Andric StringRef OrigName = 347406c3fb27SDimitry Andric ModuleSummaryIndex::getOriginalNameBeforePromote(F.getName()); 347506c3fb27SDimitry Andric std::string OrigId = GlobalValue::getGlobalIdentifier( 347606c3fb27SDimitry Andric OrigName, GlobalValue::InternalLinkage, M.getSourceFileName()); 347706c3fb27SDimitry Andric TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(OrigId)); 347806c3fb27SDimitry Andric if (TheFnVI) 347906c3fb27SDimitry Andric return TheFnVI; 348006c3fb27SDimitry Andric // Could be a promoted local imported from another module. We need to pass 348106c3fb27SDimitry Andric // down more info here to find the original module id. For now, try with 348206c3fb27SDimitry Andric // the OrigName which might have been stored in the OidGuidMap in the 348306c3fb27SDimitry Andric // index. This would not work if there were same-named locals in multiple 348406c3fb27SDimitry Andric // modules, however. 348506c3fb27SDimitry Andric auto OrigGUID = 348606c3fb27SDimitry Andric ImportSummary->getGUIDFromOriginalID(GlobalValue::getGUID(OrigName)); 348706c3fb27SDimitry Andric if (OrigGUID) 348806c3fb27SDimitry Andric TheFnVI = ImportSummary->getValueInfo(OrigGUID); 348906c3fb27SDimitry Andric return TheFnVI; 349006c3fb27SDimitry Andric } 349106c3fb27SDimitry Andric 349206c3fb27SDimitry Andric bool MemProfContextDisambiguation::applyImport(Module &M) { 349306c3fb27SDimitry Andric assert(ImportSummary); 349406c3fb27SDimitry Andric bool Changed = false; 349506c3fb27SDimitry Andric 349606c3fb27SDimitry Andric auto IsMemProfClone = [](const Function &F) { 349706c3fb27SDimitry Andric return F.getName().contains(MemProfCloneSuffix); 349806c3fb27SDimitry Andric }; 349906c3fb27SDimitry Andric 350006c3fb27SDimitry Andric // We also need to clone any aliases that reference cloned functions, because 350106c3fb27SDimitry Andric // the modified callsites may invoke via the alias. Keep track of the aliases 350206c3fb27SDimitry Andric // for each function. 350306c3fb27SDimitry Andric std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>> 350406c3fb27SDimitry Andric FuncToAliasMap; 350506c3fb27SDimitry Andric for (auto &A : M.aliases()) { 350606c3fb27SDimitry Andric auto *Aliasee = A.getAliaseeObject(); 350706c3fb27SDimitry Andric if (auto *F = dyn_cast<Function>(Aliasee)) 350806c3fb27SDimitry Andric FuncToAliasMap[F].insert(&A); 350906c3fb27SDimitry Andric } 351006c3fb27SDimitry Andric 351106c3fb27SDimitry Andric for (auto &F : M) { 351206c3fb27SDimitry Andric if (F.isDeclaration() || IsMemProfClone(F)) 351306c3fb27SDimitry Andric continue; 351406c3fb27SDimitry Andric 351506c3fb27SDimitry Andric OptimizationRemarkEmitter ORE(&F); 351606c3fb27SDimitry Andric 351706c3fb27SDimitry Andric SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; 351806c3fb27SDimitry Andric bool ClonesCreated = false; 351906c3fb27SDimitry Andric unsigned NumClonesCreated = 0; 352006c3fb27SDimitry Andric auto CloneFuncIfNeeded = [&](unsigned NumClones) { 352106c3fb27SDimitry Andric // We should at least have version 0 which is the original copy. 352206c3fb27SDimitry Andric assert(NumClones > 0); 352306c3fb27SDimitry Andric // If only one copy needed use original. 352406c3fb27SDimitry Andric if (NumClones == 1) 352506c3fb27SDimitry Andric return; 352606c3fb27SDimitry Andric // If we already performed cloning of this function, confirm that the 352706c3fb27SDimitry Andric // requested number of clones matches (the thin link should ensure the 352806c3fb27SDimitry Andric // number of clones for each constituent callsite is consistent within 352906c3fb27SDimitry Andric // each function), before returning. 353006c3fb27SDimitry Andric if (ClonesCreated) { 353106c3fb27SDimitry Andric assert(NumClonesCreated == NumClones); 353206c3fb27SDimitry Andric return; 353306c3fb27SDimitry Andric } 353406c3fb27SDimitry Andric VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap); 353506c3fb27SDimitry Andric // The first "clone" is the original copy, which doesn't have a VMap. 353606c3fb27SDimitry Andric assert(VMaps.size() == NumClones - 1); 353706c3fb27SDimitry Andric Changed = true; 353806c3fb27SDimitry Andric ClonesCreated = true; 353906c3fb27SDimitry Andric NumClonesCreated = NumClones; 354006c3fb27SDimitry Andric }; 354106c3fb27SDimitry Andric 3542297eecfbSDimitry Andric auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB, 3543297eecfbSDimitry Andric Function *CalledFunction) { 3544297eecfbSDimitry Andric // Perform cloning if not yet done. 3545297eecfbSDimitry Andric CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size()); 3546297eecfbSDimitry Andric 3547297eecfbSDimitry Andric // Should have skipped indirect calls via mayHaveMemprofSummary. 3548297eecfbSDimitry Andric assert(CalledFunction); 3549297eecfbSDimitry Andric assert(!IsMemProfClone(*CalledFunction)); 3550297eecfbSDimitry Andric 3551297eecfbSDimitry Andric // Update the calls per the summary info. 3552297eecfbSDimitry Andric // Save orig name since it gets updated in the first iteration 3553297eecfbSDimitry Andric // below. 3554297eecfbSDimitry Andric auto CalleeOrigName = CalledFunction->getName(); 3555297eecfbSDimitry Andric for (unsigned J = 0; J < StackNode.Clones.size(); J++) { 3556297eecfbSDimitry Andric // Do nothing if this version calls the original version of its 3557297eecfbSDimitry Andric // callee. 3558297eecfbSDimitry Andric if (!StackNode.Clones[J]) 3559297eecfbSDimitry Andric continue; 3560297eecfbSDimitry Andric auto NewF = M.getOrInsertFunction( 3561297eecfbSDimitry Andric getMemProfFuncName(CalleeOrigName, StackNode.Clones[J]), 3562297eecfbSDimitry Andric CalledFunction->getFunctionType()); 3563297eecfbSDimitry Andric CallBase *CBClone; 3564297eecfbSDimitry Andric // Copy 0 is the original function. 3565297eecfbSDimitry Andric if (!J) 3566297eecfbSDimitry Andric CBClone = CB; 3567297eecfbSDimitry Andric else 3568297eecfbSDimitry Andric CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); 3569297eecfbSDimitry Andric CBClone->setCalledFunction(NewF); 3570297eecfbSDimitry Andric ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone) 3571297eecfbSDimitry Andric << ore::NV("Call", CBClone) << " in clone " 3572297eecfbSDimitry Andric << ore::NV("Caller", CBClone->getFunction()) 3573297eecfbSDimitry Andric << " assigned to call function clone " 3574297eecfbSDimitry Andric << ore::NV("Callee", NewF.getCallee())); 3575297eecfbSDimitry Andric } 3576297eecfbSDimitry Andric }; 3577297eecfbSDimitry Andric 357806c3fb27SDimitry Andric // Locate the summary for F. 357906c3fb27SDimitry Andric ValueInfo TheFnVI = findValueInfoForFunc(F, M, ImportSummary); 358006c3fb27SDimitry Andric // If not found, this could be an imported local (see comment in 358106c3fb27SDimitry Andric // findValueInfoForFunc). Skip for now as it will be cloned in its original 358206c3fb27SDimitry Andric // module (where it would have been promoted to global scope so should 358306c3fb27SDimitry Andric // satisfy any reference in this module). 358406c3fb27SDimitry Andric if (!TheFnVI) 358506c3fb27SDimitry Andric continue; 358606c3fb27SDimitry Andric 358706c3fb27SDimitry Andric auto *GVSummary = 358806c3fb27SDimitry Andric ImportSummary->findSummaryInModule(TheFnVI, M.getModuleIdentifier()); 3589*0fca6ea1SDimitry Andric if (!GVSummary) { 3590*0fca6ea1SDimitry Andric // Must have been imported, use the summary which matches the definition。 3591*0fca6ea1SDimitry Andric // (might be multiple if this was a linkonce_odr). 3592*0fca6ea1SDimitry Andric auto SrcModuleMD = F.getMetadata("thinlto_src_module"); 3593*0fca6ea1SDimitry Andric assert(SrcModuleMD && 3594*0fca6ea1SDimitry Andric "enable-import-metadata is needed to emit thinlto_src_module"); 3595*0fca6ea1SDimitry Andric StringRef SrcModule = 3596*0fca6ea1SDimitry Andric dyn_cast<MDString>(SrcModuleMD->getOperand(0))->getString(); 3597*0fca6ea1SDimitry Andric for (auto &GVS : TheFnVI.getSummaryList()) { 3598*0fca6ea1SDimitry Andric if (GVS->modulePath() == SrcModule) { 3599*0fca6ea1SDimitry Andric GVSummary = GVS.get(); 3600*0fca6ea1SDimitry Andric break; 3601*0fca6ea1SDimitry Andric } 3602*0fca6ea1SDimitry Andric } 3603*0fca6ea1SDimitry Andric assert(GVSummary && GVSummary->modulePath() == SrcModule); 3604*0fca6ea1SDimitry Andric } 360506c3fb27SDimitry Andric 360606c3fb27SDimitry Andric // If this was an imported alias skip it as we won't have the function 360706c3fb27SDimitry Andric // summary, and it should be cloned in the original module. 360806c3fb27SDimitry Andric if (isa<AliasSummary>(GVSummary)) 360906c3fb27SDimitry Andric continue; 361006c3fb27SDimitry Andric 361106c3fb27SDimitry Andric auto *FS = cast<FunctionSummary>(GVSummary->getBaseObject()); 361206c3fb27SDimitry Andric 361306c3fb27SDimitry Andric if (FS->allocs().empty() && FS->callsites().empty()) 361406c3fb27SDimitry Andric continue; 361506c3fb27SDimitry Andric 361606c3fb27SDimitry Andric auto SI = FS->callsites().begin(); 361706c3fb27SDimitry Andric auto AI = FS->allocs().begin(); 361806c3fb27SDimitry Andric 3619297eecfbSDimitry Andric // To handle callsite infos synthesized for tail calls which have missing 3620297eecfbSDimitry Andric // frames in the profiled context, map callee VI to the synthesized callsite 3621297eecfbSDimitry Andric // info. 3622297eecfbSDimitry Andric DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite; 3623297eecfbSDimitry Andric // Iterate the callsites for this function in reverse, since we place all 3624297eecfbSDimitry Andric // those synthesized for tail calls at the end. 3625297eecfbSDimitry Andric for (auto CallsiteIt = FS->callsites().rbegin(); 3626297eecfbSDimitry Andric CallsiteIt != FS->callsites().rend(); CallsiteIt++) { 3627297eecfbSDimitry Andric auto &Callsite = *CallsiteIt; 3628297eecfbSDimitry Andric // Stop as soon as we see a non-synthesized callsite info (see comment 3629297eecfbSDimitry Andric // above loop). All the entries added for discovered tail calls have empty 3630297eecfbSDimitry Andric // stack ids. 3631297eecfbSDimitry Andric if (!Callsite.StackIdIndices.empty()) 3632297eecfbSDimitry Andric break; 3633297eecfbSDimitry Andric MapTailCallCalleeVIToCallsite.insert({Callsite.Callee, Callsite}); 3634297eecfbSDimitry Andric } 3635297eecfbSDimitry Andric 363606c3fb27SDimitry Andric // Assume for now that the instructions are in the exact same order 363706c3fb27SDimitry Andric // as when the summary was created, but confirm this is correct by 363806c3fb27SDimitry Andric // matching the stack ids. 363906c3fb27SDimitry Andric for (auto &BB : F) { 364006c3fb27SDimitry Andric for (auto &I : BB) { 364106c3fb27SDimitry Andric auto *CB = dyn_cast<CallBase>(&I); 364206c3fb27SDimitry Andric // Same handling as when creating module summary. 364306c3fb27SDimitry Andric if (!mayHaveMemprofSummary(CB)) 364406c3fb27SDimitry Andric continue; 364506c3fb27SDimitry Andric 36465f757f3fSDimitry Andric auto *CalledValue = CB->getCalledOperand(); 36475f757f3fSDimitry Andric auto *CalledFunction = CB->getCalledFunction(); 36485f757f3fSDimitry Andric if (CalledValue && !CalledFunction) { 36495f757f3fSDimitry Andric CalledValue = CalledValue->stripPointerCasts(); 36505f757f3fSDimitry Andric // Stripping pointer casts can reveal a called function. 36515f757f3fSDimitry Andric CalledFunction = dyn_cast<Function>(CalledValue); 36525f757f3fSDimitry Andric } 36535f757f3fSDimitry Andric // Check if this is an alias to a function. If so, get the 36545f757f3fSDimitry Andric // called aliasee for the checks below. 36555f757f3fSDimitry Andric if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) { 36565f757f3fSDimitry Andric assert(!CalledFunction && 36575f757f3fSDimitry Andric "Expected null called function in callsite for alias"); 36585f757f3fSDimitry Andric CalledFunction = dyn_cast<Function>(GA->getAliaseeObject()); 36595f757f3fSDimitry Andric } 36605f757f3fSDimitry Andric 366106c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext( 366206c3fb27SDimitry Andric I.getMetadata(LLVMContext::MD_callsite)); 366306c3fb27SDimitry Andric auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof); 366406c3fb27SDimitry Andric 366506c3fb27SDimitry Andric // Include allocs that were already assigned a memprof function 366606c3fb27SDimitry Andric // attribute in the statistics. 366706c3fb27SDimitry Andric if (CB->getAttributes().hasFnAttr("memprof")) { 366806c3fb27SDimitry Andric assert(!MemProfMD); 366906c3fb27SDimitry Andric CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold" 367006c3fb27SDimitry Andric ? AllocTypeColdThinBackend++ 367106c3fb27SDimitry Andric : AllocTypeNotColdThinBackend++; 367206c3fb27SDimitry Andric OrigAllocsThinBackend++; 367306c3fb27SDimitry Andric AllocVersionsThinBackend++; 367406c3fb27SDimitry Andric if (!MaxAllocVersionsThinBackend) 367506c3fb27SDimitry Andric MaxAllocVersionsThinBackend = 1; 367606c3fb27SDimitry Andric // Remove any remaining callsite metadata and we can skip the rest of 367706c3fb27SDimitry Andric // the handling for this instruction, since no cloning needed. 367806c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_callsite, nullptr); 367906c3fb27SDimitry Andric continue; 368006c3fb27SDimitry Andric } 368106c3fb27SDimitry Andric 368206c3fb27SDimitry Andric if (MemProfMD) { 368306c3fb27SDimitry Andric // Consult the next alloc node. 368406c3fb27SDimitry Andric assert(AI != FS->allocs().end()); 368506c3fb27SDimitry Andric auto &AllocNode = *(AI++); 368606c3fb27SDimitry Andric 368706c3fb27SDimitry Andric // Sanity check that the MIB stack ids match between the summary and 368806c3fb27SDimitry Andric // instruction metadata. 368906c3fb27SDimitry Andric auto MIBIter = AllocNode.MIBs.begin(); 369006c3fb27SDimitry Andric for (auto &MDOp : MemProfMD->operands()) { 369106c3fb27SDimitry Andric assert(MIBIter != AllocNode.MIBs.end()); 369206c3fb27SDimitry Andric LLVM_ATTRIBUTE_UNUSED auto StackIdIndexIter = 369306c3fb27SDimitry Andric MIBIter->StackIdIndices.begin(); 369406c3fb27SDimitry Andric auto *MIBMD = cast<const MDNode>(MDOp); 369506c3fb27SDimitry Andric MDNode *StackMDNode = getMIBStackNode(MIBMD); 369606c3fb27SDimitry Andric assert(StackMDNode); 369706c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> StackContext(StackMDNode); 3698*0fca6ea1SDimitry Andric auto ContextIterBegin = 369906c3fb27SDimitry Andric StackContext.beginAfterSharedPrefix(CallsiteContext); 3700*0fca6ea1SDimitry Andric // Skip the checking on the first iteration. 3701*0fca6ea1SDimitry Andric uint64_t LastStackContextId = 3702*0fca6ea1SDimitry Andric (ContextIterBegin != StackContext.end() && 3703*0fca6ea1SDimitry Andric *ContextIterBegin == 0) 3704*0fca6ea1SDimitry Andric ? 1 3705*0fca6ea1SDimitry Andric : 0; 3706*0fca6ea1SDimitry Andric for (auto ContextIter = ContextIterBegin; 370706c3fb27SDimitry Andric ContextIter != StackContext.end(); ++ContextIter) { 370806c3fb27SDimitry Andric // If this is a direct recursion, simply skip the duplicate 370906c3fb27SDimitry Andric // entries, to be consistent with how the summary ids were 371006c3fb27SDimitry Andric // generated during ModuleSummaryAnalysis. 3711*0fca6ea1SDimitry Andric if (LastStackContextId == *ContextIter) 371206c3fb27SDimitry Andric continue; 3713*0fca6ea1SDimitry Andric LastStackContextId = *ContextIter; 371406c3fb27SDimitry Andric assert(StackIdIndexIter != MIBIter->StackIdIndices.end()); 371506c3fb27SDimitry Andric assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) == 371606c3fb27SDimitry Andric *ContextIter); 371706c3fb27SDimitry Andric StackIdIndexIter++; 371806c3fb27SDimitry Andric } 371906c3fb27SDimitry Andric MIBIter++; 372006c3fb27SDimitry Andric } 372106c3fb27SDimitry Andric 372206c3fb27SDimitry Andric // Perform cloning if not yet done. 372306c3fb27SDimitry Andric CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size()); 372406c3fb27SDimitry Andric 372506c3fb27SDimitry Andric OrigAllocsThinBackend++; 372606c3fb27SDimitry Andric AllocVersionsThinBackend += AllocNode.Versions.size(); 372706c3fb27SDimitry Andric if (MaxAllocVersionsThinBackend < AllocNode.Versions.size()) 372806c3fb27SDimitry Andric MaxAllocVersionsThinBackend = AllocNode.Versions.size(); 372906c3fb27SDimitry Andric 373006c3fb27SDimitry Andric // If there is only one version that means we didn't end up 373106c3fb27SDimitry Andric // considering this function for cloning, and in that case the alloc 373206c3fb27SDimitry Andric // will still be none type or should have gotten the default NotCold. 373306c3fb27SDimitry Andric // Skip that after calling clone helper since that does some sanity 373406c3fb27SDimitry Andric // checks that confirm we haven't decided yet that we need cloning. 373506c3fb27SDimitry Andric if (AllocNode.Versions.size() == 1) { 373606c3fb27SDimitry Andric assert((AllocationType)AllocNode.Versions[0] == 373706c3fb27SDimitry Andric AllocationType::NotCold || 373806c3fb27SDimitry Andric (AllocationType)AllocNode.Versions[0] == 373906c3fb27SDimitry Andric AllocationType::None); 374006c3fb27SDimitry Andric UnclonableAllocsThinBackend++; 374106c3fb27SDimitry Andric continue; 374206c3fb27SDimitry Andric } 374306c3fb27SDimitry Andric 374406c3fb27SDimitry Andric // All versions should have a singular allocation type. 374506c3fb27SDimitry Andric assert(llvm::none_of(AllocNode.Versions, [](uint8_t Type) { 374606c3fb27SDimitry Andric return Type == ((uint8_t)AllocationType::NotCold | 374706c3fb27SDimitry Andric (uint8_t)AllocationType::Cold); 374806c3fb27SDimitry Andric })); 374906c3fb27SDimitry Andric 375006c3fb27SDimitry Andric // Update the allocation types per the summary info. 375106c3fb27SDimitry Andric for (unsigned J = 0; J < AllocNode.Versions.size(); J++) { 375206c3fb27SDimitry Andric // Ignore any that didn't get an assigned allocation type. 375306c3fb27SDimitry Andric if (AllocNode.Versions[J] == (uint8_t)AllocationType::None) 375406c3fb27SDimitry Andric continue; 375506c3fb27SDimitry Andric AllocationType AllocTy = (AllocationType)AllocNode.Versions[J]; 375606c3fb27SDimitry Andric AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++ 375706c3fb27SDimitry Andric : AllocTypeNotColdThinBackend++; 375806c3fb27SDimitry Andric std::string AllocTypeString = getAllocTypeAttributeString(AllocTy); 375906c3fb27SDimitry Andric auto A = llvm::Attribute::get(F.getContext(), "memprof", 376006c3fb27SDimitry Andric AllocTypeString); 376106c3fb27SDimitry Andric CallBase *CBClone; 376206c3fb27SDimitry Andric // Copy 0 is the original function. 376306c3fb27SDimitry Andric if (!J) 376406c3fb27SDimitry Andric CBClone = CB; 376506c3fb27SDimitry Andric else 376606c3fb27SDimitry Andric // Since VMaps are only created for new clones, we index with 376706c3fb27SDimitry Andric // clone J-1 (J==0 is the original clone and does not have a VMaps 376806c3fb27SDimitry Andric // entry). 376906c3fb27SDimitry Andric CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); 377006c3fb27SDimitry Andric CBClone->addFnAttr(A); 377106c3fb27SDimitry Andric ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", CBClone) 377206c3fb27SDimitry Andric << ore::NV("AllocationCall", CBClone) << " in clone " 377306c3fb27SDimitry Andric << ore::NV("Caller", CBClone->getFunction()) 377406c3fb27SDimitry Andric << " marked with memprof allocation attribute " 377506c3fb27SDimitry Andric << ore::NV("Attribute", AllocTypeString)); 377606c3fb27SDimitry Andric } 377706c3fb27SDimitry Andric } else if (!CallsiteContext.empty()) { 377806c3fb27SDimitry Andric // Consult the next callsite node. 377906c3fb27SDimitry Andric assert(SI != FS->callsites().end()); 378006c3fb27SDimitry Andric auto &StackNode = *(SI++); 378106c3fb27SDimitry Andric 378206c3fb27SDimitry Andric #ifndef NDEBUG 378306c3fb27SDimitry Andric // Sanity check that the stack ids match between the summary and 378406c3fb27SDimitry Andric // instruction metadata. 378506c3fb27SDimitry Andric auto StackIdIndexIter = StackNode.StackIdIndices.begin(); 378606c3fb27SDimitry Andric for (auto StackId : CallsiteContext) { 378706c3fb27SDimitry Andric assert(StackIdIndexIter != StackNode.StackIdIndices.end()); 378806c3fb27SDimitry Andric assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) == 378906c3fb27SDimitry Andric StackId); 379006c3fb27SDimitry Andric StackIdIndexIter++; 379106c3fb27SDimitry Andric } 379206c3fb27SDimitry Andric #endif 379306c3fb27SDimitry Andric 3794297eecfbSDimitry Andric CloneCallsite(StackNode, CB, CalledFunction); 3795297eecfbSDimitry Andric } else if (CB->isTailCall()) { 3796297eecfbSDimitry Andric // Locate the synthesized callsite info for the callee VI, if any was 3797297eecfbSDimitry Andric // created, and use that for cloning. 3798297eecfbSDimitry Andric ValueInfo CalleeVI = 3799297eecfbSDimitry Andric findValueInfoForFunc(*CalledFunction, M, ImportSummary); 3800297eecfbSDimitry Andric if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) { 3801297eecfbSDimitry Andric auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI); 3802297eecfbSDimitry Andric assert(Callsite != MapTailCallCalleeVIToCallsite.end()); 3803297eecfbSDimitry Andric CloneCallsite(Callsite->second, CB, CalledFunction); 380406c3fb27SDimitry Andric } 380506c3fb27SDimitry Andric } 380606c3fb27SDimitry Andric // Memprof and callsite metadata on memory allocations no longer needed. 380706c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_memprof, nullptr); 380806c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_callsite, nullptr); 380906c3fb27SDimitry Andric } 381006c3fb27SDimitry Andric } 381106c3fb27SDimitry Andric } 381206c3fb27SDimitry Andric 381306c3fb27SDimitry Andric return Changed; 381406c3fb27SDimitry Andric } 381506c3fb27SDimitry Andric 381606c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 381706c3fb27SDimitry Andric bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() { 381806c3fb27SDimitry Andric if (DumpCCG) { 381906c3fb27SDimitry Andric dbgs() << "CCG before cloning:\n"; 382006c3fb27SDimitry Andric dbgs() << *this; 382106c3fb27SDimitry Andric } 382206c3fb27SDimitry Andric if (ExportToDot) 382306c3fb27SDimitry Andric exportToDot("postbuild"); 382406c3fb27SDimitry Andric 382506c3fb27SDimitry Andric if (VerifyCCG) { 382606c3fb27SDimitry Andric check(); 382706c3fb27SDimitry Andric } 382806c3fb27SDimitry Andric 382906c3fb27SDimitry Andric identifyClones(); 383006c3fb27SDimitry Andric 383106c3fb27SDimitry Andric if (VerifyCCG) { 383206c3fb27SDimitry Andric check(); 383306c3fb27SDimitry Andric } 383406c3fb27SDimitry Andric 383506c3fb27SDimitry Andric if (DumpCCG) { 383606c3fb27SDimitry Andric dbgs() << "CCG after cloning:\n"; 383706c3fb27SDimitry Andric dbgs() << *this; 383806c3fb27SDimitry Andric } 383906c3fb27SDimitry Andric if (ExportToDot) 384006c3fb27SDimitry Andric exportToDot("cloned"); 384106c3fb27SDimitry Andric 384206c3fb27SDimitry Andric bool Changed = assignFunctions(); 384306c3fb27SDimitry Andric 384406c3fb27SDimitry Andric if (DumpCCG) { 384506c3fb27SDimitry Andric dbgs() << "CCG after assigning function clones:\n"; 384606c3fb27SDimitry Andric dbgs() << *this; 384706c3fb27SDimitry Andric } 384806c3fb27SDimitry Andric if (ExportToDot) 384906c3fb27SDimitry Andric exportToDot("clonefuncassign"); 385006c3fb27SDimitry Andric 3851*0fca6ea1SDimitry Andric if (MemProfReportHintedSizes) 3852*0fca6ea1SDimitry Andric printTotalSizes(errs()); 3853*0fca6ea1SDimitry Andric 385406c3fb27SDimitry Andric return Changed; 385506c3fb27SDimitry Andric } 385606c3fb27SDimitry Andric 385706c3fb27SDimitry Andric bool MemProfContextDisambiguation::processModule( 385806c3fb27SDimitry Andric Module &M, 38597a6dacacSDimitry Andric llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) { 386006c3fb27SDimitry Andric 386106c3fb27SDimitry Andric // If we have an import summary, then the cloning decisions were made during 386206c3fb27SDimitry Andric // the thin link on the index. Apply them and return. 386306c3fb27SDimitry Andric if (ImportSummary) 386406c3fb27SDimitry Andric return applyImport(M); 386506c3fb27SDimitry Andric 386606c3fb27SDimitry Andric // TODO: If/when other types of memprof cloning are enabled beyond just for 386706c3fb27SDimitry Andric // hot and cold, we will need to change this to individually control the 386806c3fb27SDimitry Andric // AllocationType passed to addStackNodesForMIB during CCG construction. 386906c3fb27SDimitry Andric // Note that we specifically check this after applying imports above, so that 387006c3fb27SDimitry Andric // the option isn't needed to be passed to distributed ThinLTO backend 387106c3fb27SDimitry Andric // clang processes, which won't necessarily have visibility into the linker 387206c3fb27SDimitry Andric // dependences. Instead the information is communicated from the LTO link to 387306c3fb27SDimitry Andric // the backends via the combined summary index. 387406c3fb27SDimitry Andric if (!SupportsHotColdNew) 387506c3fb27SDimitry Andric return false; 387606c3fb27SDimitry Andric 387706c3fb27SDimitry Andric ModuleCallsiteContextGraph CCG(M, OREGetter); 387806c3fb27SDimitry Andric return CCG.process(); 387906c3fb27SDimitry Andric } 388006c3fb27SDimitry Andric 388106c3fb27SDimitry Andric MemProfContextDisambiguation::MemProfContextDisambiguation( 388206c3fb27SDimitry Andric const ModuleSummaryIndex *Summary) 388306c3fb27SDimitry Andric : ImportSummary(Summary) { 388406c3fb27SDimitry Andric if (ImportSummary) { 388506c3fb27SDimitry Andric // The MemProfImportSummary should only be used for testing ThinLTO 388606c3fb27SDimitry Andric // distributed backend handling via opt, in which case we don't have a 388706c3fb27SDimitry Andric // summary from the pass pipeline. 388806c3fb27SDimitry Andric assert(MemProfImportSummary.empty()); 388906c3fb27SDimitry Andric return; 389006c3fb27SDimitry Andric } 389106c3fb27SDimitry Andric if (MemProfImportSummary.empty()) 389206c3fb27SDimitry Andric return; 389306c3fb27SDimitry Andric 389406c3fb27SDimitry Andric auto ReadSummaryFile = 389506c3fb27SDimitry Andric errorOrToExpected(MemoryBuffer::getFile(MemProfImportSummary)); 389606c3fb27SDimitry Andric if (!ReadSummaryFile) { 389706c3fb27SDimitry Andric logAllUnhandledErrors(ReadSummaryFile.takeError(), errs(), 389806c3fb27SDimitry Andric "Error loading file '" + MemProfImportSummary + 389906c3fb27SDimitry Andric "': "); 390006c3fb27SDimitry Andric return; 390106c3fb27SDimitry Andric } 390206c3fb27SDimitry Andric auto ImportSummaryForTestingOrErr = getModuleSummaryIndex(**ReadSummaryFile); 390306c3fb27SDimitry Andric if (!ImportSummaryForTestingOrErr) { 390406c3fb27SDimitry Andric logAllUnhandledErrors(ImportSummaryForTestingOrErr.takeError(), errs(), 390506c3fb27SDimitry Andric "Error parsing file '" + MemProfImportSummary + 390606c3fb27SDimitry Andric "': "); 390706c3fb27SDimitry Andric return; 390806c3fb27SDimitry Andric } 390906c3fb27SDimitry Andric ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr); 391006c3fb27SDimitry Andric ImportSummary = ImportSummaryForTesting.get(); 391106c3fb27SDimitry Andric } 391206c3fb27SDimitry Andric 391306c3fb27SDimitry Andric PreservedAnalyses MemProfContextDisambiguation::run(Module &M, 391406c3fb27SDimitry Andric ModuleAnalysisManager &AM) { 391506c3fb27SDimitry Andric auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 391606c3fb27SDimitry Andric auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & { 391706c3fb27SDimitry Andric return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); 391806c3fb27SDimitry Andric }; 391906c3fb27SDimitry Andric if (!processModule(M, OREGetter)) 392006c3fb27SDimitry Andric return PreservedAnalyses::all(); 392106c3fb27SDimitry Andric return PreservedAnalyses::none(); 392206c3fb27SDimitry Andric } 392306c3fb27SDimitry Andric 392406c3fb27SDimitry Andric void MemProfContextDisambiguation::run( 392506c3fb27SDimitry Andric ModuleSummaryIndex &Index, 39267a6dacacSDimitry Andric llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 392706c3fb27SDimitry Andric isPrevailing) { 392806c3fb27SDimitry Andric // TODO: If/when other types of memprof cloning are enabled beyond just for 392906c3fb27SDimitry Andric // hot and cold, we will need to change this to individually control the 393006c3fb27SDimitry Andric // AllocationType passed to addStackNodesForMIB during CCG construction. 393106c3fb27SDimitry Andric // The index was set from the option, so these should be in sync. 393206c3fb27SDimitry Andric assert(Index.withSupportsHotColdNew() == SupportsHotColdNew); 393306c3fb27SDimitry Andric if (!SupportsHotColdNew) 393406c3fb27SDimitry Andric return; 393506c3fb27SDimitry Andric 393606c3fb27SDimitry Andric IndexCallsiteContextGraph CCG(Index, isPrevailing); 393706c3fb27SDimitry Andric CCG.process(); 393806c3fb27SDimitry Andric } 3939