1*06c3fb27SDimitry Andric //==-- MemProfContextDisambiguation.cpp - Disambiguate contexts -------------=// 2*06c3fb27SDimitry Andric // 3*06c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*06c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*06c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*06c3fb27SDimitry Andric // 7*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 8*06c3fb27SDimitry Andric // 9*06c3fb27SDimitry Andric // This file implements support for context disambiguation of allocation 10*06c3fb27SDimitry Andric // calls for profile guided heap optimization. Specifically, it uses Memprof 11*06c3fb27SDimitry Andric // profiles which indicate context specific allocation behavior (currently 12*06c3fb27SDimitry Andric // distinguishing cold vs hot memory allocations). Cloning is performed to 13*06c3fb27SDimitry Andric // expose the cold allocation call contexts, and the allocation calls are 14*06c3fb27SDimitry Andric // subsequently annotated with an attribute for later transformation. 15*06c3fb27SDimitry Andric // 16*06c3fb27SDimitry Andric // The transformations can be performed either directly on IR (regular LTO), or 17*06c3fb27SDimitry Andric // on a ThinLTO index (and later applied to the IR during the ThinLTO backend). 18*06c3fb27SDimitry Andric // Both types of LTO operate on a the same base graph representation, which 19*06c3fb27SDimitry Andric // uses CRTP to support either IR or Index formats. 20*06c3fb27SDimitry Andric // 21*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 22*06c3fb27SDimitry Andric 23*06c3fb27SDimitry Andric #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" 24*06c3fb27SDimitry Andric #include "llvm/ADT/DenseMap.h" 25*06c3fb27SDimitry Andric #include "llvm/ADT/DenseSet.h" 26*06c3fb27SDimitry Andric #include "llvm/ADT/MapVector.h" 27*06c3fb27SDimitry Andric #include "llvm/ADT/SetOperations.h" 28*06c3fb27SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 29*06c3fb27SDimitry Andric #include "llvm/ADT/SmallSet.h" 30*06c3fb27SDimitry Andric #include "llvm/ADT/SmallVector.h" 31*06c3fb27SDimitry Andric #include "llvm/ADT/Statistic.h" 32*06c3fb27SDimitry Andric #include "llvm/Analysis/MemoryProfileInfo.h" 33*06c3fb27SDimitry Andric #include "llvm/Analysis/ModuleSummaryAnalysis.h" 34*06c3fb27SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 35*06c3fb27SDimitry Andric #include "llvm/Bitcode/BitcodeReader.h" 36*06c3fb27SDimitry Andric #include "llvm/IR/Constants.h" 37*06c3fb27SDimitry Andric #include "llvm/IR/Instructions.h" 38*06c3fb27SDimitry Andric #include "llvm/IR/Module.h" 39*06c3fb27SDimitry Andric #include "llvm/IR/ModuleSummaryIndex.h" 40*06c3fb27SDimitry Andric #include "llvm/Pass.h" 41*06c3fb27SDimitry Andric #include "llvm/Support/CommandLine.h" 42*06c3fb27SDimitry Andric #include "llvm/Support/FileSystem.h" 43*06c3fb27SDimitry Andric #include "llvm/Support/GraphWriter.h" 44*06c3fb27SDimitry Andric #include "llvm/Support/raw_ostream.h" 45*06c3fb27SDimitry Andric #include "llvm/Transforms/IPO.h" 46*06c3fb27SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h" 47*06c3fb27SDimitry Andric #include <sstream> 48*06c3fb27SDimitry Andric #include <vector> 49*06c3fb27SDimitry Andric using namespace llvm; 50*06c3fb27SDimitry Andric using namespace llvm::memprof; 51*06c3fb27SDimitry Andric 52*06c3fb27SDimitry Andric #define DEBUG_TYPE "memprof-context-disambiguation" 53*06c3fb27SDimitry Andric 54*06c3fb27SDimitry Andric STATISTIC(FunctionClonesAnalysis, 55*06c3fb27SDimitry Andric "Number of function clones created during whole program analysis"); 56*06c3fb27SDimitry Andric STATISTIC(FunctionClonesThinBackend, 57*06c3fb27SDimitry Andric "Number of function clones created during ThinLTO backend"); 58*06c3fb27SDimitry Andric STATISTIC(FunctionsClonedThinBackend, 59*06c3fb27SDimitry Andric "Number of functions that had clones created during ThinLTO backend"); 60*06c3fb27SDimitry Andric STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly " 61*06c3fb27SDimitry Andric "cloned) during whole program analysis"); 62*06c3fb27SDimitry Andric STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) " 63*06c3fb27SDimitry Andric "during whole program analysis"); 64*06c3fb27SDimitry Andric STATISTIC(AllocTypeNotColdThinBackend, 65*06c3fb27SDimitry Andric "Number of not cold static allocations (possibly cloned) during " 66*06c3fb27SDimitry Andric "ThinLTO backend"); 67*06c3fb27SDimitry Andric STATISTIC(AllocTypeColdThinBackend, "Number of cold static allocations " 68*06c3fb27SDimitry Andric "(possibly cloned) during ThinLTO backend"); 69*06c3fb27SDimitry Andric STATISTIC(OrigAllocsThinBackend, 70*06c3fb27SDimitry Andric "Number of original (not cloned) allocations with memprof profiles " 71*06c3fb27SDimitry Andric "during ThinLTO backend"); 72*06c3fb27SDimitry Andric STATISTIC( 73*06c3fb27SDimitry Andric AllocVersionsThinBackend, 74*06c3fb27SDimitry Andric "Number of allocation versions (including clones) during ThinLTO backend"); 75*06c3fb27SDimitry Andric STATISTIC(MaxAllocVersionsThinBackend, 76*06c3fb27SDimitry Andric "Maximum number of allocation versions created for an original " 77*06c3fb27SDimitry Andric "allocation during ThinLTO backend"); 78*06c3fb27SDimitry Andric STATISTIC(UnclonableAllocsThinBackend, 79*06c3fb27SDimitry Andric "Number of unclonable ambigous allocations during ThinLTO backend"); 80*06c3fb27SDimitry Andric 81*06c3fb27SDimitry Andric static cl::opt<std::string> DotFilePathPrefix( 82*06c3fb27SDimitry Andric "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, 83*06c3fb27SDimitry Andric cl::value_desc("filename"), 84*06c3fb27SDimitry Andric cl::desc("Specify the path prefix of the MemProf dot files.")); 85*06c3fb27SDimitry Andric 86*06c3fb27SDimitry Andric static cl::opt<bool> ExportToDot("memprof-export-to-dot", cl::init(false), 87*06c3fb27SDimitry Andric cl::Hidden, 88*06c3fb27SDimitry Andric cl::desc("Export graph to dot files.")); 89*06c3fb27SDimitry Andric 90*06c3fb27SDimitry Andric static cl::opt<bool> 91*06c3fb27SDimitry Andric DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, 92*06c3fb27SDimitry Andric cl::desc("Dump CallingContextGraph to stdout after each stage.")); 93*06c3fb27SDimitry Andric 94*06c3fb27SDimitry Andric static cl::opt<bool> 95*06c3fb27SDimitry Andric VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, 96*06c3fb27SDimitry Andric cl::desc("Perform verification checks on CallingContextGraph.")); 97*06c3fb27SDimitry Andric 98*06c3fb27SDimitry Andric static cl::opt<bool> 99*06c3fb27SDimitry Andric VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, 100*06c3fb27SDimitry Andric cl::desc("Perform frequent verification checks on nodes.")); 101*06c3fb27SDimitry Andric 102*06c3fb27SDimitry Andric static cl::opt<std::string> MemProfImportSummary( 103*06c3fb27SDimitry Andric "memprof-import-summary", 104*06c3fb27SDimitry Andric cl::desc("Import summary to use for testing the ThinLTO backend via opt"), 105*06c3fb27SDimitry Andric cl::Hidden); 106*06c3fb27SDimitry Andric 107*06c3fb27SDimitry Andric // Indicate we are linking with an allocator that supports hot/cold operator 108*06c3fb27SDimitry Andric // new interfaces. 109*06c3fb27SDimitry Andric cl::opt<bool> SupportsHotColdNew( 110*06c3fb27SDimitry Andric "supports-hot-cold-new", cl::init(false), cl::Hidden, 111*06c3fb27SDimitry Andric cl::desc("Linking with hot/cold operator new interfaces")); 112*06c3fb27SDimitry Andric 113*06c3fb27SDimitry Andric namespace { 114*06c3fb27SDimitry Andric /// CRTP base for graphs built from either IR or ThinLTO summary index. 115*06c3fb27SDimitry Andric /// 116*06c3fb27SDimitry Andric /// The graph represents the call contexts in all memprof metadata on allocation 117*06c3fb27SDimitry Andric /// calls, with nodes for the allocations themselves, as well as for the calls 118*06c3fb27SDimitry Andric /// in each context. The graph is initially built from the allocation memprof 119*06c3fb27SDimitry Andric /// metadata (or summary) MIBs. It is then updated to match calls with callsite 120*06c3fb27SDimitry Andric /// metadata onto the nodes, updating it to reflect any inlining performed on 121*06c3fb27SDimitry Andric /// those calls. 122*06c3fb27SDimitry Andric /// 123*06c3fb27SDimitry Andric /// Each MIB (representing an allocation's call context with allocation 124*06c3fb27SDimitry Andric /// behavior) is assigned a unique context id during the graph build. The edges 125*06c3fb27SDimitry Andric /// and nodes in the graph are decorated with the context ids they carry. This 126*06c3fb27SDimitry Andric /// is used to correctly update the graph when cloning is performed so that we 127*06c3fb27SDimitry Andric /// can uniquify the context for a single (possibly cloned) allocation. 128*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 129*06c3fb27SDimitry Andric class CallsiteContextGraph { 130*06c3fb27SDimitry Andric public: 131*06c3fb27SDimitry Andric CallsiteContextGraph() = default; 132*06c3fb27SDimitry Andric CallsiteContextGraph(const CallsiteContextGraph &) = default; 133*06c3fb27SDimitry Andric CallsiteContextGraph(CallsiteContextGraph &&) = default; 134*06c3fb27SDimitry Andric 135*06c3fb27SDimitry Andric /// Main entry point to perform analysis and transformations on graph. 136*06c3fb27SDimitry Andric bool process(); 137*06c3fb27SDimitry Andric 138*06c3fb27SDimitry Andric /// Perform cloning on the graph necessary to uniquely identify the allocation 139*06c3fb27SDimitry Andric /// behavior of an allocation based on its context. 140*06c3fb27SDimitry Andric void identifyClones(); 141*06c3fb27SDimitry Andric 142*06c3fb27SDimitry Andric /// Assign callsite clones to functions, cloning functions as needed to 143*06c3fb27SDimitry Andric /// accommodate the combinations of their callsite clones reached by callers. 144*06c3fb27SDimitry Andric /// For regular LTO this clones functions and callsites in the IR, but for 145*06c3fb27SDimitry Andric /// ThinLTO the cloning decisions are noted in the summaries and later applied 146*06c3fb27SDimitry Andric /// in applyImport. 147*06c3fb27SDimitry Andric bool assignFunctions(); 148*06c3fb27SDimitry Andric 149*06c3fb27SDimitry Andric void dump() const; 150*06c3fb27SDimitry Andric void print(raw_ostream &OS) const; 151*06c3fb27SDimitry Andric 152*06c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, 153*06c3fb27SDimitry Andric const CallsiteContextGraph &CCG) { 154*06c3fb27SDimitry Andric CCG.print(OS); 155*06c3fb27SDimitry Andric return OS; 156*06c3fb27SDimitry Andric } 157*06c3fb27SDimitry Andric 158*06c3fb27SDimitry Andric friend struct GraphTraits< 159*06c3fb27SDimitry Andric const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>; 160*06c3fb27SDimitry Andric friend struct DOTGraphTraits< 161*06c3fb27SDimitry Andric const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>; 162*06c3fb27SDimitry Andric 163*06c3fb27SDimitry Andric void exportToDot(std::string Label) const; 164*06c3fb27SDimitry Andric 165*06c3fb27SDimitry Andric /// Represents a function clone via FuncTy pointer and clone number pair. 166*06c3fb27SDimitry Andric struct FuncInfo final 167*06c3fb27SDimitry Andric : public std::pair<FuncTy *, unsigned /*Clone number*/> { 168*06c3fb27SDimitry Andric using Base = std::pair<FuncTy *, unsigned>; 169*06c3fb27SDimitry Andric FuncInfo(const Base &B) : Base(B) {} 170*06c3fb27SDimitry Andric FuncInfo(FuncTy *F = nullptr, unsigned CloneNo = 0) : Base(F, CloneNo) {} 171*06c3fb27SDimitry Andric explicit operator bool() const { return this->first != nullptr; } 172*06c3fb27SDimitry Andric FuncTy *func() const { return this->first; } 173*06c3fb27SDimitry Andric unsigned cloneNo() const { return this->second; } 174*06c3fb27SDimitry Andric }; 175*06c3fb27SDimitry Andric 176*06c3fb27SDimitry Andric /// Represents a callsite clone via CallTy and clone number pair. 177*06c3fb27SDimitry Andric struct CallInfo final : public std::pair<CallTy, unsigned /*Clone number*/> { 178*06c3fb27SDimitry Andric using Base = std::pair<CallTy, unsigned>; 179*06c3fb27SDimitry Andric CallInfo(const Base &B) : Base(B) {} 180*06c3fb27SDimitry Andric CallInfo(CallTy Call = nullptr, unsigned CloneNo = 0) 181*06c3fb27SDimitry Andric : Base(Call, CloneNo) {} 182*06c3fb27SDimitry Andric explicit operator bool() const { return (bool)this->first; } 183*06c3fb27SDimitry Andric CallTy call() const { return this->first; } 184*06c3fb27SDimitry Andric unsigned cloneNo() const { return this->second; } 185*06c3fb27SDimitry Andric void setCloneNo(unsigned N) { this->second = N; } 186*06c3fb27SDimitry Andric void print(raw_ostream &OS) const { 187*06c3fb27SDimitry Andric if (!operator bool()) { 188*06c3fb27SDimitry Andric assert(!cloneNo()); 189*06c3fb27SDimitry Andric OS << "null Call"; 190*06c3fb27SDimitry Andric return; 191*06c3fb27SDimitry Andric } 192*06c3fb27SDimitry Andric call()->print(OS); 193*06c3fb27SDimitry Andric OS << "\t(clone " << cloneNo() << ")"; 194*06c3fb27SDimitry Andric } 195*06c3fb27SDimitry Andric void dump() const { 196*06c3fb27SDimitry Andric print(dbgs()); 197*06c3fb27SDimitry Andric dbgs() << "\n"; 198*06c3fb27SDimitry Andric } 199*06c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, const CallInfo &Call) { 200*06c3fb27SDimitry Andric Call.print(OS); 201*06c3fb27SDimitry Andric return OS; 202*06c3fb27SDimitry Andric } 203*06c3fb27SDimitry Andric }; 204*06c3fb27SDimitry Andric 205*06c3fb27SDimitry Andric struct ContextEdge; 206*06c3fb27SDimitry Andric 207*06c3fb27SDimitry Andric /// Node in the Callsite Context Graph 208*06c3fb27SDimitry Andric struct ContextNode { 209*06c3fb27SDimitry Andric // Keep this for now since in the IR case where we have an Instruction* it 210*06c3fb27SDimitry Andric // is not as immediately discoverable. Used for printing richer information 211*06c3fb27SDimitry Andric // when dumping graph. 212*06c3fb27SDimitry Andric bool IsAllocation; 213*06c3fb27SDimitry Andric 214*06c3fb27SDimitry Andric // Keeps track of when the Call was reset to null because there was 215*06c3fb27SDimitry Andric // recursion. 216*06c3fb27SDimitry Andric bool Recursive = false; 217*06c3fb27SDimitry Andric 218*06c3fb27SDimitry Andric // The corresponding allocation or interior call. 219*06c3fb27SDimitry Andric CallInfo Call; 220*06c3fb27SDimitry Andric 221*06c3fb27SDimitry Andric // For alloc nodes this is a unique id assigned when constructed, and for 222*06c3fb27SDimitry Andric // callsite stack nodes it is the original stack id when the node is 223*06c3fb27SDimitry Andric // constructed from the memprof MIB metadata on the alloc nodes. Note that 224*06c3fb27SDimitry Andric // this is only used when matching callsite metadata onto the stack nodes 225*06c3fb27SDimitry Andric // created when processing the allocation memprof MIBs, and for labeling 226*06c3fb27SDimitry Andric // nodes in the dot graph. Therefore we don't bother to assign a value for 227*06c3fb27SDimitry Andric // clones. 228*06c3fb27SDimitry Andric uint64_t OrigStackOrAllocId = 0; 229*06c3fb27SDimitry Andric 230*06c3fb27SDimitry Andric // This will be formed by ORing together the AllocationType enum values 231*06c3fb27SDimitry Andric // for contexts including this node. 232*06c3fb27SDimitry Andric uint8_t AllocTypes = 0; 233*06c3fb27SDimitry Andric 234*06c3fb27SDimitry Andric // Edges to all callees in the profiled call stacks. 235*06c3fb27SDimitry Andric // TODO: Should this be a map (from Callee node) for more efficient lookup? 236*06c3fb27SDimitry Andric std::vector<std::shared_ptr<ContextEdge>> CalleeEdges; 237*06c3fb27SDimitry Andric 238*06c3fb27SDimitry Andric // Edges to all callers in the profiled call stacks. 239*06c3fb27SDimitry Andric // TODO: Should this be a map (from Caller node) for more efficient lookup? 240*06c3fb27SDimitry Andric std::vector<std::shared_ptr<ContextEdge>> CallerEdges; 241*06c3fb27SDimitry Andric 242*06c3fb27SDimitry Andric // The set of IDs for contexts including this node. 243*06c3fb27SDimitry Andric DenseSet<uint32_t> ContextIds; 244*06c3fb27SDimitry Andric 245*06c3fb27SDimitry Andric // List of clones of this ContextNode, initially empty. 246*06c3fb27SDimitry Andric std::vector<ContextNode *> Clones; 247*06c3fb27SDimitry Andric 248*06c3fb27SDimitry Andric // If a clone, points to the original uncloned node. 249*06c3fb27SDimitry Andric ContextNode *CloneOf = nullptr; 250*06c3fb27SDimitry Andric 251*06c3fb27SDimitry Andric ContextNode(bool IsAllocation) : IsAllocation(IsAllocation), Call() {} 252*06c3fb27SDimitry Andric 253*06c3fb27SDimitry Andric ContextNode(bool IsAllocation, CallInfo C) 254*06c3fb27SDimitry Andric : IsAllocation(IsAllocation), Call(C) {} 255*06c3fb27SDimitry Andric 256*06c3fb27SDimitry Andric void addClone(ContextNode *Clone) { 257*06c3fb27SDimitry Andric if (CloneOf) { 258*06c3fb27SDimitry Andric CloneOf->Clones.push_back(Clone); 259*06c3fb27SDimitry Andric Clone->CloneOf = CloneOf; 260*06c3fb27SDimitry Andric } else { 261*06c3fb27SDimitry Andric Clones.push_back(Clone); 262*06c3fb27SDimitry Andric assert(!Clone->CloneOf); 263*06c3fb27SDimitry Andric Clone->CloneOf = this; 264*06c3fb27SDimitry Andric } 265*06c3fb27SDimitry Andric } 266*06c3fb27SDimitry Andric 267*06c3fb27SDimitry Andric ContextNode *getOrigNode() { 268*06c3fb27SDimitry Andric if (!CloneOf) 269*06c3fb27SDimitry Andric return this; 270*06c3fb27SDimitry Andric return CloneOf; 271*06c3fb27SDimitry Andric } 272*06c3fb27SDimitry Andric 273*06c3fb27SDimitry Andric void addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, 274*06c3fb27SDimitry Andric unsigned int ContextId); 275*06c3fb27SDimitry Andric 276*06c3fb27SDimitry Andric ContextEdge *findEdgeFromCallee(const ContextNode *Callee); 277*06c3fb27SDimitry Andric ContextEdge *findEdgeFromCaller(const ContextNode *Caller); 278*06c3fb27SDimitry Andric void eraseCalleeEdge(const ContextEdge *Edge); 279*06c3fb27SDimitry Andric void eraseCallerEdge(const ContextEdge *Edge); 280*06c3fb27SDimitry Andric 281*06c3fb27SDimitry Andric void setCall(CallInfo C) { Call = C; } 282*06c3fb27SDimitry Andric 283*06c3fb27SDimitry Andric bool hasCall() const { return (bool)Call.call(); } 284*06c3fb27SDimitry Andric 285*06c3fb27SDimitry Andric void printCall(raw_ostream &OS) const { Call.print(OS); } 286*06c3fb27SDimitry Andric 287*06c3fb27SDimitry Andric // True if this node was effectively removed from the graph, in which case 288*06c3fb27SDimitry Andric // its context id set, caller edges, and callee edges should all be empty. 289*06c3fb27SDimitry Andric bool isRemoved() const { 290*06c3fb27SDimitry Andric assert(ContextIds.empty() == 291*06c3fb27SDimitry Andric (CalleeEdges.empty() && CallerEdges.empty())); 292*06c3fb27SDimitry Andric return ContextIds.empty(); 293*06c3fb27SDimitry Andric } 294*06c3fb27SDimitry Andric 295*06c3fb27SDimitry Andric void dump() const; 296*06c3fb27SDimitry Andric void print(raw_ostream &OS) const; 297*06c3fb27SDimitry Andric 298*06c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, const ContextNode &Node) { 299*06c3fb27SDimitry Andric Node.print(OS); 300*06c3fb27SDimitry Andric return OS; 301*06c3fb27SDimitry Andric } 302*06c3fb27SDimitry Andric }; 303*06c3fb27SDimitry Andric 304*06c3fb27SDimitry Andric /// Edge in the Callsite Context Graph from a ContextNode N to a caller or 305*06c3fb27SDimitry Andric /// callee. 306*06c3fb27SDimitry Andric struct ContextEdge { 307*06c3fb27SDimitry Andric ContextNode *Callee; 308*06c3fb27SDimitry Andric ContextNode *Caller; 309*06c3fb27SDimitry Andric 310*06c3fb27SDimitry Andric // This will be formed by ORing together the AllocationType enum values 311*06c3fb27SDimitry Andric // for contexts including this edge. 312*06c3fb27SDimitry Andric uint8_t AllocTypes = 0; 313*06c3fb27SDimitry Andric 314*06c3fb27SDimitry Andric // The set of IDs for contexts including this edge. 315*06c3fb27SDimitry Andric DenseSet<uint32_t> ContextIds; 316*06c3fb27SDimitry Andric 317*06c3fb27SDimitry Andric ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t AllocType, 318*06c3fb27SDimitry Andric DenseSet<uint32_t> ContextIds) 319*06c3fb27SDimitry Andric : Callee(Callee), Caller(Caller), AllocTypes(AllocType), 320*06c3fb27SDimitry Andric ContextIds(ContextIds) {} 321*06c3fb27SDimitry Andric 322*06c3fb27SDimitry Andric DenseSet<uint32_t> &getContextIds() { return ContextIds; } 323*06c3fb27SDimitry Andric 324*06c3fb27SDimitry Andric void dump() const; 325*06c3fb27SDimitry Andric void print(raw_ostream &OS) const; 326*06c3fb27SDimitry Andric 327*06c3fb27SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, const ContextEdge &Edge) { 328*06c3fb27SDimitry Andric Edge.print(OS); 329*06c3fb27SDimitry Andric return OS; 330*06c3fb27SDimitry Andric } 331*06c3fb27SDimitry Andric }; 332*06c3fb27SDimitry Andric 333*06c3fb27SDimitry Andric /// Helper to remove callee edges that have allocation type None (due to not 334*06c3fb27SDimitry Andric /// carrying any context ids) after transformations. 335*06c3fb27SDimitry Andric void removeNoneTypeCalleeEdges(ContextNode *Node); 336*06c3fb27SDimitry Andric 337*06c3fb27SDimitry Andric protected: 338*06c3fb27SDimitry Andric /// Get a list of nodes corresponding to the stack ids in the given callsite 339*06c3fb27SDimitry Andric /// context. 340*06c3fb27SDimitry Andric template <class NodeT, class IteratorT> 341*06c3fb27SDimitry Andric std::vector<uint64_t> 342*06c3fb27SDimitry Andric getStackIdsWithContextNodes(CallStack<NodeT, IteratorT> &CallsiteContext); 343*06c3fb27SDimitry Andric 344*06c3fb27SDimitry Andric /// Adds nodes for the given allocation and any stack ids on its memprof MIB 345*06c3fb27SDimitry Andric /// metadata (or summary). 346*06c3fb27SDimitry Andric ContextNode *addAllocNode(CallInfo Call, const FuncTy *F); 347*06c3fb27SDimitry Andric 348*06c3fb27SDimitry Andric /// Adds nodes for the given MIB stack ids. 349*06c3fb27SDimitry Andric template <class NodeT, class IteratorT> 350*06c3fb27SDimitry Andric void addStackNodesForMIB(ContextNode *AllocNode, 351*06c3fb27SDimitry Andric CallStack<NodeT, IteratorT> &StackContext, 352*06c3fb27SDimitry Andric CallStack<NodeT, IteratorT> &CallsiteContext, 353*06c3fb27SDimitry Andric AllocationType AllocType); 354*06c3fb27SDimitry Andric 355*06c3fb27SDimitry Andric /// Matches all callsite metadata (or summary) to the nodes created for 356*06c3fb27SDimitry Andric /// allocation memprof MIB metadata, synthesizing new nodes to reflect any 357*06c3fb27SDimitry Andric /// inlining performed on those callsite instructions. 358*06c3fb27SDimitry Andric void updateStackNodes(); 359*06c3fb27SDimitry Andric 360*06c3fb27SDimitry Andric /// Update graph to conservatively handle any callsite stack nodes that target 361*06c3fb27SDimitry Andric /// multiple different callee target functions. 362*06c3fb27SDimitry Andric void handleCallsitesWithMultipleTargets(); 363*06c3fb27SDimitry Andric 364*06c3fb27SDimitry Andric /// Save lists of calls with MemProf metadata in each function, for faster 365*06c3fb27SDimitry Andric /// iteration. 366*06c3fb27SDimitry Andric std::vector<std::pair<FuncTy *, std::vector<CallInfo>>> 367*06c3fb27SDimitry Andric FuncToCallsWithMetadata; 368*06c3fb27SDimitry Andric 369*06c3fb27SDimitry Andric /// Map from callsite node to the enclosing caller function. 370*06c3fb27SDimitry Andric std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc; 371*06c3fb27SDimitry Andric 372*06c3fb27SDimitry Andric private: 373*06c3fb27SDimitry Andric using EdgeIter = typename std::vector<std::shared_ptr<ContextEdge>>::iterator; 374*06c3fb27SDimitry Andric 375*06c3fb27SDimitry Andric using CallContextInfo = std::tuple<CallTy, std::vector<uint64_t>, 376*06c3fb27SDimitry Andric const FuncTy *, DenseSet<uint32_t>>; 377*06c3fb27SDimitry Andric 378*06c3fb27SDimitry Andric /// Assigns the given Node to calls at or inlined into the location with 379*06c3fb27SDimitry Andric /// the Node's stack id, after post order traversing and processing its 380*06c3fb27SDimitry Andric /// caller nodes. Uses the call information recorded in the given 381*06c3fb27SDimitry Andric /// StackIdToMatchingCalls map, and creates new nodes for inlined sequences 382*06c3fb27SDimitry Andric /// as needed. Called by updateStackNodes which sets up the given 383*06c3fb27SDimitry Andric /// StackIdToMatchingCalls map. 384*06c3fb27SDimitry Andric void assignStackNodesPostOrder( 385*06c3fb27SDimitry Andric ContextNode *Node, DenseSet<const ContextNode *> &Visited, 386*06c3fb27SDimitry Andric DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls); 387*06c3fb27SDimitry Andric 388*06c3fb27SDimitry Andric /// Duplicates the given set of context ids, updating the provided 389*06c3fb27SDimitry Andric /// map from each original id with the newly generated context ids, 390*06c3fb27SDimitry Andric /// and returning the new duplicated id set. 391*06c3fb27SDimitry Andric DenseSet<uint32_t> duplicateContextIds( 392*06c3fb27SDimitry Andric const DenseSet<uint32_t> &StackSequenceContextIds, 393*06c3fb27SDimitry Andric DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds); 394*06c3fb27SDimitry Andric 395*06c3fb27SDimitry Andric /// Propagates all duplicated context ids across the graph. 396*06c3fb27SDimitry Andric void propagateDuplicateContextIds( 397*06c3fb27SDimitry Andric const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds); 398*06c3fb27SDimitry Andric 399*06c3fb27SDimitry Andric /// Connect the NewNode to OrigNode's callees if TowardsCallee is true, 400*06c3fb27SDimitry Andric /// else to its callers. Also updates OrigNode's edges to remove any context 401*06c3fb27SDimitry Andric /// ids moved to the newly created edge. 402*06c3fb27SDimitry Andric void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode, 403*06c3fb27SDimitry Andric bool TowardsCallee); 404*06c3fb27SDimitry Andric 405*06c3fb27SDimitry Andric /// Get the stack id corresponding to the given Id or Index (for IR this will 406*06c3fb27SDimitry Andric /// return itself, for a summary index this will return the id recorded in the 407*06c3fb27SDimitry Andric /// index for that stack id index value). 408*06c3fb27SDimitry Andric uint64_t getStackId(uint64_t IdOrIndex) const { 409*06c3fb27SDimitry Andric return static_cast<const DerivedCCG *>(this)->getStackId(IdOrIndex); 410*06c3fb27SDimitry Andric } 411*06c3fb27SDimitry Andric 412*06c3fb27SDimitry Andric /// Returns true if the given call targets the given function. 413*06c3fb27SDimitry Andric bool calleeMatchesFunc(CallTy Call, const FuncTy *Func) { 414*06c3fb27SDimitry Andric return static_cast<DerivedCCG *>(this)->calleeMatchesFunc(Call, Func); 415*06c3fb27SDimitry Andric } 416*06c3fb27SDimitry Andric 417*06c3fb27SDimitry Andric /// Get a list of nodes corresponding to the stack ids in the given 418*06c3fb27SDimitry Andric /// callsite's context. 419*06c3fb27SDimitry Andric std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) { 420*06c3fb27SDimitry Andric return static_cast<DerivedCCG *>(this)->getStackIdsWithContextNodesForCall( 421*06c3fb27SDimitry Andric Call); 422*06c3fb27SDimitry Andric } 423*06c3fb27SDimitry Andric 424*06c3fb27SDimitry Andric /// Get the last stack id in the context for callsite. 425*06c3fb27SDimitry Andric uint64_t getLastStackId(CallTy Call) { 426*06c3fb27SDimitry Andric return static_cast<DerivedCCG *>(this)->getLastStackId(Call); 427*06c3fb27SDimitry Andric } 428*06c3fb27SDimitry Andric 429*06c3fb27SDimitry Andric /// Update the allocation call to record type of allocated memory. 430*06c3fb27SDimitry Andric void updateAllocationCall(CallInfo &Call, AllocationType AllocType) { 431*06c3fb27SDimitry Andric AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++; 432*06c3fb27SDimitry Andric static_cast<DerivedCCG *>(this)->updateAllocationCall(Call, AllocType); 433*06c3fb27SDimitry Andric } 434*06c3fb27SDimitry Andric 435*06c3fb27SDimitry Andric /// Update non-allocation call to invoke (possibly cloned) function 436*06c3fb27SDimitry Andric /// CalleeFunc. 437*06c3fb27SDimitry Andric void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) { 438*06c3fb27SDimitry Andric static_cast<DerivedCCG *>(this)->updateCall(CallerCall, CalleeFunc); 439*06c3fb27SDimitry Andric } 440*06c3fb27SDimitry Andric 441*06c3fb27SDimitry Andric /// Clone the given function for the given callsite, recording mapping of all 442*06c3fb27SDimitry Andric /// of the functions tracked calls to their new versions in the CallMap. 443*06c3fb27SDimitry Andric /// Assigns new clones to clone number CloneNo. 444*06c3fb27SDimitry Andric FuncInfo cloneFunctionForCallsite( 445*06c3fb27SDimitry Andric FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap, 446*06c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) { 447*06c3fb27SDimitry Andric return static_cast<DerivedCCG *>(this)->cloneFunctionForCallsite( 448*06c3fb27SDimitry Andric Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo); 449*06c3fb27SDimitry Andric } 450*06c3fb27SDimitry Andric 451*06c3fb27SDimitry Andric /// Gets a label to use in the dot graph for the given call clone in the given 452*06c3fb27SDimitry Andric /// function. 453*06c3fb27SDimitry Andric std::string getLabel(const FuncTy *Func, const CallTy Call, 454*06c3fb27SDimitry Andric unsigned CloneNo) const { 455*06c3fb27SDimitry Andric return static_cast<const DerivedCCG *>(this)->getLabel(Func, Call, CloneNo); 456*06c3fb27SDimitry Andric } 457*06c3fb27SDimitry Andric 458*06c3fb27SDimitry Andric /// Helpers to find the node corresponding to the given call or stackid. 459*06c3fb27SDimitry Andric ContextNode *getNodeForInst(const CallInfo &C); 460*06c3fb27SDimitry Andric ContextNode *getNodeForAlloc(const CallInfo &C); 461*06c3fb27SDimitry Andric ContextNode *getNodeForStackId(uint64_t StackId); 462*06c3fb27SDimitry Andric 463*06c3fb27SDimitry Andric /// Removes the node information recorded for the given call. 464*06c3fb27SDimitry Andric void unsetNodeForInst(const CallInfo &C); 465*06c3fb27SDimitry Andric 466*06c3fb27SDimitry Andric /// Computes the alloc type corresponding to the given context ids, by 467*06c3fb27SDimitry Andric /// unioning their recorded alloc types. 468*06c3fb27SDimitry Andric uint8_t computeAllocType(DenseSet<uint32_t> &ContextIds); 469*06c3fb27SDimitry Andric 470*06c3fb27SDimitry Andric /// Returns the alloction type of the intersection of the contexts of two 471*06c3fb27SDimitry Andric /// nodes (based on their provided context id sets), optimized for the case 472*06c3fb27SDimitry Andric /// when Node1Ids is smaller than Node2Ids. 473*06c3fb27SDimitry Andric uint8_t intersectAllocTypesImpl(const DenseSet<uint32_t> &Node1Ids, 474*06c3fb27SDimitry Andric const DenseSet<uint32_t> &Node2Ids); 475*06c3fb27SDimitry Andric 476*06c3fb27SDimitry Andric /// Returns the alloction type of the intersection of the contexts of two 477*06c3fb27SDimitry Andric /// nodes (based on their provided context id sets). 478*06c3fb27SDimitry Andric uint8_t intersectAllocTypes(const DenseSet<uint32_t> &Node1Ids, 479*06c3fb27SDimitry Andric const DenseSet<uint32_t> &Node2Ids); 480*06c3fb27SDimitry Andric 481*06c3fb27SDimitry Andric /// Create a clone of Edge's callee and move Edge to that new callee node, 482*06c3fb27SDimitry Andric /// performing the necessary context id and allocation type updates. 483*06c3fb27SDimitry Andric /// If callee's caller edge iterator is supplied, it is updated when removing 484*06c3fb27SDimitry Andric /// the edge from that list. 485*06c3fb27SDimitry Andric ContextNode * 486*06c3fb27SDimitry Andric moveEdgeToNewCalleeClone(const std::shared_ptr<ContextEdge> &Edge, 487*06c3fb27SDimitry Andric EdgeIter *CallerEdgeI = nullptr); 488*06c3fb27SDimitry Andric 489*06c3fb27SDimitry Andric /// Change the callee of Edge to existing callee clone NewCallee, performing 490*06c3fb27SDimitry Andric /// the necessary context id and allocation type updates. 491*06c3fb27SDimitry Andric /// If callee's caller edge iterator is supplied, it is updated when removing 492*06c3fb27SDimitry Andric /// the edge from that list. 493*06c3fb27SDimitry Andric void moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge, 494*06c3fb27SDimitry Andric ContextNode *NewCallee, 495*06c3fb27SDimitry Andric EdgeIter *CallerEdgeI = nullptr, 496*06c3fb27SDimitry Andric bool NewClone = false); 497*06c3fb27SDimitry Andric 498*06c3fb27SDimitry Andric /// Recursively perform cloning on the graph for the given Node and its 499*06c3fb27SDimitry Andric /// callers, in order to uniquely identify the allocation behavior of an 500*06c3fb27SDimitry Andric /// allocation given its context. 501*06c3fb27SDimitry Andric void identifyClones(ContextNode *Node, 502*06c3fb27SDimitry Andric DenseSet<const ContextNode *> &Visited); 503*06c3fb27SDimitry Andric 504*06c3fb27SDimitry Andric /// Map from each context ID to the AllocationType assigned to that context. 505*06c3fb27SDimitry Andric std::map<uint32_t, AllocationType> ContextIdToAllocationType; 506*06c3fb27SDimitry Andric 507*06c3fb27SDimitry Andric /// Identifies the context node created for a stack id when adding the MIB 508*06c3fb27SDimitry Andric /// contexts to the graph. This is used to locate the context nodes when 509*06c3fb27SDimitry Andric /// trying to assign the corresponding callsites with those stack ids to these 510*06c3fb27SDimitry Andric /// nodes. 511*06c3fb27SDimitry Andric std::map<uint64_t, ContextNode *> StackEntryIdToContextNodeMap; 512*06c3fb27SDimitry Andric 513*06c3fb27SDimitry Andric /// Maps to track the calls to their corresponding nodes in the graph. 514*06c3fb27SDimitry Andric MapVector<CallInfo, ContextNode *> AllocationCallToContextNodeMap; 515*06c3fb27SDimitry Andric MapVector<CallInfo, ContextNode *> NonAllocationCallToContextNodeMap; 516*06c3fb27SDimitry Andric 517*06c3fb27SDimitry Andric /// Owner of all ContextNode unique_ptrs. 518*06c3fb27SDimitry Andric std::vector<std::unique_ptr<ContextNode>> NodeOwner; 519*06c3fb27SDimitry Andric 520*06c3fb27SDimitry Andric /// Perform sanity checks on graph when requested. 521*06c3fb27SDimitry Andric void check() const; 522*06c3fb27SDimitry Andric 523*06c3fb27SDimitry Andric /// Keeps track of the last unique context id assigned. 524*06c3fb27SDimitry Andric unsigned int LastContextId = 0; 525*06c3fb27SDimitry Andric }; 526*06c3fb27SDimitry Andric 527*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 528*06c3fb27SDimitry Andric using ContextNode = 529*06c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode; 530*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 531*06c3fb27SDimitry Andric using ContextEdge = 532*06c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge; 533*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 534*06c3fb27SDimitry Andric using FuncInfo = 535*06c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo; 536*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 537*06c3fb27SDimitry Andric using CallInfo = 538*06c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo; 539*06c3fb27SDimitry Andric 540*06c3fb27SDimitry Andric /// CRTP derived class for graphs built from IR (regular LTO). 541*06c3fb27SDimitry Andric class ModuleCallsiteContextGraph 542*06c3fb27SDimitry Andric : public CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 543*06c3fb27SDimitry Andric Instruction *> { 544*06c3fb27SDimitry Andric public: 545*06c3fb27SDimitry Andric ModuleCallsiteContextGraph( 546*06c3fb27SDimitry Andric Module &M, 547*06c3fb27SDimitry Andric function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter); 548*06c3fb27SDimitry Andric 549*06c3fb27SDimitry Andric private: 550*06c3fb27SDimitry Andric friend CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 551*06c3fb27SDimitry Andric Instruction *>; 552*06c3fb27SDimitry Andric 553*06c3fb27SDimitry Andric uint64_t getStackId(uint64_t IdOrIndex) const; 554*06c3fb27SDimitry Andric bool calleeMatchesFunc(Instruction *Call, const Function *Func); 555*06c3fb27SDimitry Andric uint64_t getLastStackId(Instruction *Call); 556*06c3fb27SDimitry Andric std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *Call); 557*06c3fb27SDimitry Andric void updateAllocationCall(CallInfo &Call, AllocationType AllocType); 558*06c3fb27SDimitry Andric void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc); 559*06c3fb27SDimitry Andric CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 560*06c3fb27SDimitry Andric Instruction *>::FuncInfo 561*06c3fb27SDimitry Andric cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call, 562*06c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap, 563*06c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, 564*06c3fb27SDimitry Andric unsigned CloneNo); 565*06c3fb27SDimitry Andric std::string getLabel(const Function *Func, const Instruction *Call, 566*06c3fb27SDimitry Andric unsigned CloneNo) const; 567*06c3fb27SDimitry Andric 568*06c3fb27SDimitry Andric const Module &Mod; 569*06c3fb27SDimitry Andric function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter; 570*06c3fb27SDimitry Andric }; 571*06c3fb27SDimitry Andric 572*06c3fb27SDimitry Andric /// Represents a call in the summary index graph, which can either be an 573*06c3fb27SDimitry Andric /// allocation or an interior callsite node in an allocation's context. 574*06c3fb27SDimitry Andric /// Holds a pointer to the corresponding data structure in the index. 575*06c3fb27SDimitry Andric struct IndexCall : public PointerUnion<CallsiteInfo *, AllocInfo *> { 576*06c3fb27SDimitry Andric IndexCall() : PointerUnion() {} 577*06c3fb27SDimitry Andric IndexCall(std::nullptr_t) : IndexCall() {} 578*06c3fb27SDimitry Andric IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {} 579*06c3fb27SDimitry Andric IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {} 580*06c3fb27SDimitry Andric IndexCall(PointerUnion PT) : PointerUnion(PT) {} 581*06c3fb27SDimitry Andric 582*06c3fb27SDimitry Andric IndexCall *operator->() { return this; } 583*06c3fb27SDimitry Andric 584*06c3fb27SDimitry Andric PointerUnion<CallsiteInfo *, AllocInfo *> getBase() const { return *this; } 585*06c3fb27SDimitry Andric 586*06c3fb27SDimitry Andric void print(raw_ostream &OS) const { 587*06c3fb27SDimitry Andric if (auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(getBase())) { 588*06c3fb27SDimitry Andric OS << *AI; 589*06c3fb27SDimitry Andric } else { 590*06c3fb27SDimitry Andric auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(getBase()); 591*06c3fb27SDimitry Andric assert(CI); 592*06c3fb27SDimitry Andric OS << *CI; 593*06c3fb27SDimitry Andric } 594*06c3fb27SDimitry Andric } 595*06c3fb27SDimitry Andric }; 596*06c3fb27SDimitry Andric 597*06c3fb27SDimitry Andric /// CRTP derived class for graphs built from summary index (ThinLTO). 598*06c3fb27SDimitry Andric class IndexCallsiteContextGraph 599*06c3fb27SDimitry Andric : public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 600*06c3fb27SDimitry Andric IndexCall> { 601*06c3fb27SDimitry Andric public: 602*06c3fb27SDimitry Andric IndexCallsiteContextGraph( 603*06c3fb27SDimitry Andric ModuleSummaryIndex &Index, 604*06c3fb27SDimitry Andric function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 605*06c3fb27SDimitry Andric isPrevailing); 606*06c3fb27SDimitry Andric 607*06c3fb27SDimitry Andric private: 608*06c3fb27SDimitry Andric friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 609*06c3fb27SDimitry Andric IndexCall>; 610*06c3fb27SDimitry Andric 611*06c3fb27SDimitry Andric uint64_t getStackId(uint64_t IdOrIndex) const; 612*06c3fb27SDimitry Andric bool calleeMatchesFunc(IndexCall &Call, const FunctionSummary *Func); 613*06c3fb27SDimitry Andric uint64_t getLastStackId(IndexCall &Call); 614*06c3fb27SDimitry Andric std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call); 615*06c3fb27SDimitry Andric void updateAllocationCall(CallInfo &Call, AllocationType AllocType); 616*06c3fb27SDimitry Andric void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc); 617*06c3fb27SDimitry Andric CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 618*06c3fb27SDimitry Andric IndexCall>::FuncInfo 619*06c3fb27SDimitry Andric cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call, 620*06c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap, 621*06c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, 622*06c3fb27SDimitry Andric unsigned CloneNo); 623*06c3fb27SDimitry Andric std::string getLabel(const FunctionSummary *Func, const IndexCall &Call, 624*06c3fb27SDimitry Andric unsigned CloneNo) const; 625*06c3fb27SDimitry Andric 626*06c3fb27SDimitry Andric // Saves mapping from function summaries containing memprof records back to 627*06c3fb27SDimitry Andric // its VI, for use in checking and debugging. 628*06c3fb27SDimitry Andric std::map<const FunctionSummary *, ValueInfo> FSToVIMap; 629*06c3fb27SDimitry Andric 630*06c3fb27SDimitry Andric const ModuleSummaryIndex &Index; 631*06c3fb27SDimitry Andric }; 632*06c3fb27SDimitry Andric } // namespace 633*06c3fb27SDimitry Andric 634*06c3fb27SDimitry Andric namespace llvm { 635*06c3fb27SDimitry Andric template <> 636*06c3fb27SDimitry Andric struct DenseMapInfo<typename CallsiteContextGraph< 637*06c3fb27SDimitry Andric ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo> 638*06c3fb27SDimitry Andric : public DenseMapInfo<std::pair<Instruction *, unsigned>> {}; 639*06c3fb27SDimitry Andric template <> 640*06c3fb27SDimitry Andric struct DenseMapInfo<typename CallsiteContextGraph< 641*06c3fb27SDimitry Andric IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo> 642*06c3fb27SDimitry Andric : public DenseMapInfo<std::pair<IndexCall, unsigned>> {}; 643*06c3fb27SDimitry Andric template <> 644*06c3fb27SDimitry Andric struct DenseMapInfo<IndexCall> 645*06c3fb27SDimitry Andric : public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {}; 646*06c3fb27SDimitry Andric } // end namespace llvm 647*06c3fb27SDimitry Andric 648*06c3fb27SDimitry Andric namespace { 649*06c3fb27SDimitry Andric 650*06c3fb27SDimitry Andric struct FieldSeparator { 651*06c3fb27SDimitry Andric bool Skip = true; 652*06c3fb27SDimitry Andric const char *Sep; 653*06c3fb27SDimitry Andric 654*06c3fb27SDimitry Andric FieldSeparator(const char *Sep = ", ") : Sep(Sep) {} 655*06c3fb27SDimitry Andric }; 656*06c3fb27SDimitry Andric 657*06c3fb27SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) { 658*06c3fb27SDimitry Andric if (FS.Skip) { 659*06c3fb27SDimitry Andric FS.Skip = false; 660*06c3fb27SDimitry Andric return OS; 661*06c3fb27SDimitry Andric } 662*06c3fb27SDimitry Andric return OS << FS.Sep; 663*06c3fb27SDimitry Andric } 664*06c3fb27SDimitry Andric 665*06c3fb27SDimitry Andric // Map the uint8_t alloc types (which may contain NotCold|Cold) to the alloc 666*06c3fb27SDimitry Andric // type we should actually use on the corresponding allocation. 667*06c3fb27SDimitry Andric // If we can't clone a node that has NotCold+Cold alloc type, we will fall 668*06c3fb27SDimitry Andric // back to using NotCold. So don't bother cloning to distinguish NotCold+Cold 669*06c3fb27SDimitry Andric // from NotCold. 670*06c3fb27SDimitry Andric AllocationType allocTypeToUse(uint8_t AllocTypes) { 671*06c3fb27SDimitry Andric assert(AllocTypes != (uint8_t)AllocationType::None); 672*06c3fb27SDimitry Andric if (AllocTypes == 673*06c3fb27SDimitry Andric ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold)) 674*06c3fb27SDimitry Andric return AllocationType::NotCold; 675*06c3fb27SDimitry Andric else 676*06c3fb27SDimitry Andric return (AllocationType)AllocTypes; 677*06c3fb27SDimitry Andric } 678*06c3fb27SDimitry Andric 679*06c3fb27SDimitry Andric // Helper to check if the alloc types for all edges recorded in the 680*06c3fb27SDimitry Andric // InAllocTypes vector match the alloc types for all edges in the Edges 681*06c3fb27SDimitry Andric // vector. 682*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 683*06c3fb27SDimitry Andric bool allocTypesMatch( 684*06c3fb27SDimitry Andric const std::vector<uint8_t> &InAllocTypes, 685*06c3fb27SDimitry Andric const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>> 686*06c3fb27SDimitry Andric &Edges) { 687*06c3fb27SDimitry Andric return std::equal( 688*06c3fb27SDimitry Andric InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), 689*06c3fb27SDimitry Andric [](const uint8_t &l, 690*06c3fb27SDimitry Andric const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) { 691*06c3fb27SDimitry Andric // Can share if one of the edges is None type - don't 692*06c3fb27SDimitry Andric // care about the type along that edge as it doesn't 693*06c3fb27SDimitry Andric // exist for those context ids. 694*06c3fb27SDimitry Andric if (l == (uint8_t)AllocationType::None || 695*06c3fb27SDimitry Andric r->AllocTypes == (uint8_t)AllocationType::None) 696*06c3fb27SDimitry Andric return true; 697*06c3fb27SDimitry Andric return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes); 698*06c3fb27SDimitry Andric }); 699*06c3fb27SDimitry Andric } 700*06c3fb27SDimitry Andric 701*06c3fb27SDimitry Andric } // end anonymous namespace 702*06c3fb27SDimitry Andric 703*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 704*06c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 705*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst( 706*06c3fb27SDimitry Andric const CallInfo &C) { 707*06c3fb27SDimitry Andric ContextNode *Node = getNodeForAlloc(C); 708*06c3fb27SDimitry Andric if (Node) 709*06c3fb27SDimitry Andric return Node; 710*06c3fb27SDimitry Andric 711*06c3fb27SDimitry Andric return NonAllocationCallToContextNodeMap.lookup(C); 712*06c3fb27SDimitry Andric } 713*06c3fb27SDimitry Andric 714*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 715*06c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 716*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc( 717*06c3fb27SDimitry Andric const CallInfo &C) { 718*06c3fb27SDimitry Andric return AllocationCallToContextNodeMap.lookup(C); 719*06c3fb27SDimitry Andric } 720*06c3fb27SDimitry Andric 721*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 722*06c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 723*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId( 724*06c3fb27SDimitry Andric uint64_t StackId) { 725*06c3fb27SDimitry Andric auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId); 726*06c3fb27SDimitry Andric if (StackEntryNode != StackEntryIdToContextNodeMap.end()) 727*06c3fb27SDimitry Andric return StackEntryNode->second; 728*06c3fb27SDimitry Andric return nullptr; 729*06c3fb27SDimitry Andric } 730*06c3fb27SDimitry Andric 731*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 732*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::unsetNodeForInst( 733*06c3fb27SDimitry Andric const CallInfo &C) { 734*06c3fb27SDimitry Andric AllocationCallToContextNodeMap.erase(C) || 735*06c3fb27SDimitry Andric NonAllocationCallToContextNodeMap.erase(C); 736*06c3fb27SDimitry Andric assert(!AllocationCallToContextNodeMap.count(C) && 737*06c3fb27SDimitry Andric !NonAllocationCallToContextNodeMap.count(C)); 738*06c3fb27SDimitry Andric } 739*06c3fb27SDimitry Andric 740*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 741*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 742*06c3fb27SDimitry Andric addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, 743*06c3fb27SDimitry Andric unsigned int ContextId) { 744*06c3fb27SDimitry Andric for (auto &Edge : CallerEdges) { 745*06c3fb27SDimitry Andric if (Edge->Caller == Caller) { 746*06c3fb27SDimitry Andric Edge->AllocTypes |= (uint8_t)AllocType; 747*06c3fb27SDimitry Andric Edge->getContextIds().insert(ContextId); 748*06c3fb27SDimitry Andric return; 749*06c3fb27SDimitry Andric } 750*06c3fb27SDimitry Andric } 751*06c3fb27SDimitry Andric std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>( 752*06c3fb27SDimitry Andric this, Caller, (uint8_t)AllocType, DenseSet<uint32_t>({ContextId})); 753*06c3fb27SDimitry Andric CallerEdges.push_back(Edge); 754*06c3fb27SDimitry Andric Caller->CalleeEdges.push_back(Edge); 755*06c3fb27SDimitry Andric } 756*06c3fb27SDimitry Andric 757*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 758*06c3fb27SDimitry Andric void CallsiteContextGraph< 759*06c3fb27SDimitry Andric DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) { 760*06c3fb27SDimitry Andric for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) { 761*06c3fb27SDimitry Andric auto Edge = *EI; 762*06c3fb27SDimitry Andric if (Edge->AllocTypes == (uint8_t)AllocationType::None) { 763*06c3fb27SDimitry Andric assert(Edge->ContextIds.empty()); 764*06c3fb27SDimitry Andric Edge->Callee->eraseCallerEdge(Edge.get()); 765*06c3fb27SDimitry Andric EI = Node->CalleeEdges.erase(EI); 766*06c3fb27SDimitry Andric } else 767*06c3fb27SDimitry Andric ++EI; 768*06c3fb27SDimitry Andric } 769*06c3fb27SDimitry Andric } 770*06c3fb27SDimitry Andric 771*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 772*06c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge * 773*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 774*06c3fb27SDimitry Andric findEdgeFromCallee(const ContextNode *Callee) { 775*06c3fb27SDimitry Andric for (const auto &Edge : CalleeEdges) 776*06c3fb27SDimitry Andric if (Edge->Callee == Callee) 777*06c3fb27SDimitry Andric return Edge.get(); 778*06c3fb27SDimitry Andric return nullptr; 779*06c3fb27SDimitry Andric } 780*06c3fb27SDimitry Andric 781*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 782*06c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge * 783*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 784*06c3fb27SDimitry Andric findEdgeFromCaller(const ContextNode *Caller) { 785*06c3fb27SDimitry Andric for (const auto &Edge : CallerEdges) 786*06c3fb27SDimitry Andric if (Edge->Caller == Caller) 787*06c3fb27SDimitry Andric return Edge.get(); 788*06c3fb27SDimitry Andric return nullptr; 789*06c3fb27SDimitry Andric } 790*06c3fb27SDimitry Andric 791*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 792*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 793*06c3fb27SDimitry Andric eraseCalleeEdge(const ContextEdge *Edge) { 794*06c3fb27SDimitry Andric auto EI = 795*06c3fb27SDimitry Andric std::find_if(CalleeEdges.begin(), CalleeEdges.end(), 796*06c3fb27SDimitry Andric [Edge](const std::shared_ptr<ContextEdge> &CalleeEdge) { 797*06c3fb27SDimitry Andric return CalleeEdge.get() == Edge; 798*06c3fb27SDimitry Andric }); 799*06c3fb27SDimitry Andric assert(EI != CalleeEdges.end()); 800*06c3fb27SDimitry Andric CalleeEdges.erase(EI); 801*06c3fb27SDimitry Andric } 802*06c3fb27SDimitry Andric 803*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 804*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: 805*06c3fb27SDimitry Andric eraseCallerEdge(const ContextEdge *Edge) { 806*06c3fb27SDimitry Andric auto EI = 807*06c3fb27SDimitry Andric std::find_if(CallerEdges.begin(), CallerEdges.end(), 808*06c3fb27SDimitry Andric [Edge](const std::shared_ptr<ContextEdge> &CallerEdge) { 809*06c3fb27SDimitry Andric return CallerEdge.get() == Edge; 810*06c3fb27SDimitry Andric }); 811*06c3fb27SDimitry Andric assert(EI != CallerEdges.end()); 812*06c3fb27SDimitry Andric CallerEdges.erase(EI); 813*06c3fb27SDimitry Andric } 814*06c3fb27SDimitry Andric 815*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 816*06c3fb27SDimitry Andric uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType( 817*06c3fb27SDimitry Andric DenseSet<uint32_t> &ContextIds) { 818*06c3fb27SDimitry Andric uint8_t BothTypes = 819*06c3fb27SDimitry Andric (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold; 820*06c3fb27SDimitry Andric uint8_t AllocType = (uint8_t)AllocationType::None; 821*06c3fb27SDimitry Andric for (auto Id : ContextIds) { 822*06c3fb27SDimitry Andric AllocType |= (uint8_t)ContextIdToAllocationType[Id]; 823*06c3fb27SDimitry Andric // Bail early if alloc type reached both, no further refinement. 824*06c3fb27SDimitry Andric if (AllocType == BothTypes) 825*06c3fb27SDimitry Andric return AllocType; 826*06c3fb27SDimitry Andric } 827*06c3fb27SDimitry Andric return AllocType; 828*06c3fb27SDimitry Andric } 829*06c3fb27SDimitry Andric 830*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 831*06c3fb27SDimitry Andric uint8_t 832*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl( 833*06c3fb27SDimitry Andric const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) { 834*06c3fb27SDimitry Andric uint8_t BothTypes = 835*06c3fb27SDimitry Andric (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold; 836*06c3fb27SDimitry Andric uint8_t AllocType = (uint8_t)AllocationType::None; 837*06c3fb27SDimitry Andric for (auto Id : Node1Ids) { 838*06c3fb27SDimitry Andric if (!Node2Ids.count(Id)) 839*06c3fb27SDimitry Andric continue; 840*06c3fb27SDimitry Andric AllocType |= (uint8_t)ContextIdToAllocationType[Id]; 841*06c3fb27SDimitry Andric // Bail early if alloc type reached both, no further refinement. 842*06c3fb27SDimitry Andric if (AllocType == BothTypes) 843*06c3fb27SDimitry Andric return AllocType; 844*06c3fb27SDimitry Andric } 845*06c3fb27SDimitry Andric return AllocType; 846*06c3fb27SDimitry Andric } 847*06c3fb27SDimitry Andric 848*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 849*06c3fb27SDimitry Andric uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes( 850*06c3fb27SDimitry Andric const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) { 851*06c3fb27SDimitry Andric if (Node1Ids.size() < Node2Ids.size()) 852*06c3fb27SDimitry Andric return intersectAllocTypesImpl(Node1Ids, Node2Ids); 853*06c3fb27SDimitry Andric else 854*06c3fb27SDimitry Andric return intersectAllocTypesImpl(Node2Ids, Node1Ids); 855*06c3fb27SDimitry Andric } 856*06c3fb27SDimitry Andric 857*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 858*06c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 859*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode( 860*06c3fb27SDimitry Andric CallInfo Call, const FuncTy *F) { 861*06c3fb27SDimitry Andric assert(!getNodeForAlloc(Call)); 862*06c3fb27SDimitry Andric NodeOwner.push_back( 863*06c3fb27SDimitry Andric std::make_unique<ContextNode>(/*IsAllocation=*/true, Call)); 864*06c3fb27SDimitry Andric ContextNode *AllocNode = NodeOwner.back().get(); 865*06c3fb27SDimitry Andric AllocationCallToContextNodeMap[Call] = AllocNode; 866*06c3fb27SDimitry Andric NodeToCallingFunc[AllocNode] = F; 867*06c3fb27SDimitry Andric // Use LastContextId as a uniq id for MIB allocation nodes. 868*06c3fb27SDimitry Andric AllocNode->OrigStackOrAllocId = LastContextId; 869*06c3fb27SDimitry Andric // Alloc type should be updated as we add in the MIBs. We should assert 870*06c3fb27SDimitry Andric // afterwards that it is not still None. 871*06c3fb27SDimitry Andric AllocNode->AllocTypes = (uint8_t)AllocationType::None; 872*06c3fb27SDimitry Andric 873*06c3fb27SDimitry Andric return AllocNode; 874*06c3fb27SDimitry Andric } 875*06c3fb27SDimitry Andric 876*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 877*06c3fb27SDimitry Andric template <class NodeT, class IteratorT> 878*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB( 879*06c3fb27SDimitry Andric ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext, 880*06c3fb27SDimitry Andric CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType) { 881*06c3fb27SDimitry Andric // Treating the hot alloc type as NotCold before the disambiguation for "hot" 882*06c3fb27SDimitry Andric // is done. 883*06c3fb27SDimitry Andric if (AllocType == AllocationType::Hot) 884*06c3fb27SDimitry Andric AllocType = AllocationType::NotCold; 885*06c3fb27SDimitry Andric 886*06c3fb27SDimitry Andric ContextIdToAllocationType[++LastContextId] = AllocType; 887*06c3fb27SDimitry Andric 888*06c3fb27SDimitry Andric // Update alloc type and context ids for this MIB. 889*06c3fb27SDimitry Andric AllocNode->AllocTypes |= (uint8_t)AllocType; 890*06c3fb27SDimitry Andric AllocNode->ContextIds.insert(LastContextId); 891*06c3fb27SDimitry Andric 892*06c3fb27SDimitry Andric // Now add or update nodes for each stack id in alloc's context. 893*06c3fb27SDimitry Andric // Later when processing the stack ids on non-alloc callsites we will adjust 894*06c3fb27SDimitry Andric // for any inlining in the context. 895*06c3fb27SDimitry Andric ContextNode *PrevNode = AllocNode; 896*06c3fb27SDimitry Andric // Look for recursion (direct recursion should have been collapsed by 897*06c3fb27SDimitry Andric // module summary analysis, here we should just be detecting mutual 898*06c3fb27SDimitry Andric // recursion). Mark these nodes so we don't try to clone. 899*06c3fb27SDimitry Andric SmallSet<uint64_t, 8> StackIdSet; 900*06c3fb27SDimitry Andric // Skip any on the allocation call (inlining). 901*06c3fb27SDimitry Andric for (auto ContextIter = StackContext.beginAfterSharedPrefix(CallsiteContext); 902*06c3fb27SDimitry Andric ContextIter != StackContext.end(); ++ContextIter) { 903*06c3fb27SDimitry Andric auto StackId = getStackId(*ContextIter); 904*06c3fb27SDimitry Andric ContextNode *StackNode = getNodeForStackId(StackId); 905*06c3fb27SDimitry Andric if (!StackNode) { 906*06c3fb27SDimitry Andric NodeOwner.push_back( 907*06c3fb27SDimitry Andric std::make_unique<ContextNode>(/*IsAllocation=*/false)); 908*06c3fb27SDimitry Andric StackNode = NodeOwner.back().get(); 909*06c3fb27SDimitry Andric StackEntryIdToContextNodeMap[StackId] = StackNode; 910*06c3fb27SDimitry Andric StackNode->OrigStackOrAllocId = StackId; 911*06c3fb27SDimitry Andric } 912*06c3fb27SDimitry Andric auto Ins = StackIdSet.insert(StackId); 913*06c3fb27SDimitry Andric if (!Ins.second) 914*06c3fb27SDimitry Andric StackNode->Recursive = true; 915*06c3fb27SDimitry Andric StackNode->ContextIds.insert(LastContextId); 916*06c3fb27SDimitry Andric StackNode->AllocTypes |= (uint8_t)AllocType; 917*06c3fb27SDimitry Andric PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId); 918*06c3fb27SDimitry Andric PrevNode = StackNode; 919*06c3fb27SDimitry Andric } 920*06c3fb27SDimitry Andric } 921*06c3fb27SDimitry Andric 922*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 923*06c3fb27SDimitry Andric DenseSet<uint32_t> 924*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds( 925*06c3fb27SDimitry Andric const DenseSet<uint32_t> &StackSequenceContextIds, 926*06c3fb27SDimitry Andric DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) { 927*06c3fb27SDimitry Andric DenseSet<uint32_t> NewContextIds; 928*06c3fb27SDimitry Andric for (auto OldId : StackSequenceContextIds) { 929*06c3fb27SDimitry Andric NewContextIds.insert(++LastContextId); 930*06c3fb27SDimitry Andric OldToNewContextIds[OldId].insert(LastContextId); 931*06c3fb27SDimitry Andric assert(ContextIdToAllocationType.count(OldId)); 932*06c3fb27SDimitry Andric // The new context has the same allocation type as original. 933*06c3fb27SDimitry Andric ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId]; 934*06c3fb27SDimitry Andric } 935*06c3fb27SDimitry Andric return NewContextIds; 936*06c3fb27SDimitry Andric } 937*06c3fb27SDimitry Andric 938*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 939*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: 940*06c3fb27SDimitry Andric propagateDuplicateContextIds( 941*06c3fb27SDimitry Andric const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) { 942*06c3fb27SDimitry Andric // Build a set of duplicated context ids corresponding to the input id set. 943*06c3fb27SDimitry Andric auto GetNewIds = [&OldToNewContextIds](const DenseSet<uint32_t> &ContextIds) { 944*06c3fb27SDimitry Andric DenseSet<uint32_t> NewIds; 945*06c3fb27SDimitry Andric for (auto Id : ContextIds) 946*06c3fb27SDimitry Andric if (auto NewId = OldToNewContextIds.find(Id); 947*06c3fb27SDimitry Andric NewId != OldToNewContextIds.end()) 948*06c3fb27SDimitry Andric NewIds.insert(NewId->second.begin(), NewId->second.end()); 949*06c3fb27SDimitry Andric return NewIds; 950*06c3fb27SDimitry Andric }; 951*06c3fb27SDimitry Andric 952*06c3fb27SDimitry Andric // Recursively update context ids sets along caller edges. 953*06c3fb27SDimitry Andric auto UpdateCallers = [&](ContextNode *Node, 954*06c3fb27SDimitry Andric DenseSet<const ContextEdge *> &Visited, 955*06c3fb27SDimitry Andric auto &&UpdateCallers) -> void { 956*06c3fb27SDimitry Andric for (const auto &Edge : Node->CallerEdges) { 957*06c3fb27SDimitry Andric auto Inserted = Visited.insert(Edge.get()); 958*06c3fb27SDimitry Andric if (!Inserted.second) 959*06c3fb27SDimitry Andric continue; 960*06c3fb27SDimitry Andric ContextNode *NextNode = Edge->Caller; 961*06c3fb27SDimitry Andric DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Edge->getContextIds()); 962*06c3fb27SDimitry Andric // Only need to recursively iterate to NextNode via this caller edge if 963*06c3fb27SDimitry Andric // it resulted in any added ids to NextNode. 964*06c3fb27SDimitry Andric if (!NewIdsToAdd.empty()) { 965*06c3fb27SDimitry Andric Edge->getContextIds().insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); 966*06c3fb27SDimitry Andric NextNode->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); 967*06c3fb27SDimitry Andric UpdateCallers(NextNode, Visited, UpdateCallers); 968*06c3fb27SDimitry Andric } 969*06c3fb27SDimitry Andric } 970*06c3fb27SDimitry Andric }; 971*06c3fb27SDimitry Andric 972*06c3fb27SDimitry Andric DenseSet<const ContextEdge *> Visited; 973*06c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) { 974*06c3fb27SDimitry Andric auto *Node = Entry.second; 975*06c3fb27SDimitry Andric // Update ids on the allocation nodes before calling the recursive 976*06c3fb27SDimitry Andric // update along caller edges, since this simplifies the logic during 977*06c3fb27SDimitry Andric // that traversal. 978*06c3fb27SDimitry Andric DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Node->ContextIds); 979*06c3fb27SDimitry Andric Node->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); 980*06c3fb27SDimitry Andric UpdateCallers(Node, Visited, UpdateCallers); 981*06c3fb27SDimitry Andric } 982*06c3fb27SDimitry Andric } 983*06c3fb27SDimitry Andric 984*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 985*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode( 986*06c3fb27SDimitry Andric ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee) { 987*06c3fb27SDimitry Andric // Make a copy of the context ids, since this will be adjusted below as they 988*06c3fb27SDimitry Andric // are moved. 989*06c3fb27SDimitry Andric DenseSet<uint32_t> RemainingContextIds = NewNode->ContextIds; 990*06c3fb27SDimitry Andric auto &OrigEdges = 991*06c3fb27SDimitry Andric TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges; 992*06c3fb27SDimitry Andric // Increment iterator in loop so that we can remove edges as needed. 993*06c3fb27SDimitry Andric for (auto EI = OrigEdges.begin(); EI != OrigEdges.end();) { 994*06c3fb27SDimitry Andric auto Edge = *EI; 995*06c3fb27SDimitry Andric // Remove any matching context ids from Edge, return set that were found and 996*06c3fb27SDimitry Andric // removed, these are the new edge's context ids. Also update the remaining 997*06c3fb27SDimitry Andric // (not found ids). 998*06c3fb27SDimitry Andric DenseSet<uint32_t> NewEdgeContextIds, NotFoundContextIds; 999*06c3fb27SDimitry Andric set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds, 1000*06c3fb27SDimitry Andric NotFoundContextIds); 1001*06c3fb27SDimitry Andric RemainingContextIds.swap(NotFoundContextIds); 1002*06c3fb27SDimitry Andric // If no matching context ids for this edge, skip it. 1003*06c3fb27SDimitry Andric if (NewEdgeContextIds.empty()) { 1004*06c3fb27SDimitry Andric ++EI; 1005*06c3fb27SDimitry Andric continue; 1006*06c3fb27SDimitry Andric } 1007*06c3fb27SDimitry Andric if (TowardsCallee) { 1008*06c3fb27SDimitry Andric auto NewEdge = std::make_shared<ContextEdge>( 1009*06c3fb27SDimitry Andric Edge->Callee, NewNode, computeAllocType(NewEdgeContextIds), 1010*06c3fb27SDimitry Andric NewEdgeContextIds); 1011*06c3fb27SDimitry Andric NewNode->CalleeEdges.push_back(NewEdge); 1012*06c3fb27SDimitry Andric NewEdge->Callee->CallerEdges.push_back(NewEdge); 1013*06c3fb27SDimitry Andric } else { 1014*06c3fb27SDimitry Andric auto NewEdge = std::make_shared<ContextEdge>( 1015*06c3fb27SDimitry Andric NewNode, Edge->Caller, computeAllocType(NewEdgeContextIds), 1016*06c3fb27SDimitry Andric NewEdgeContextIds); 1017*06c3fb27SDimitry Andric NewNode->CallerEdges.push_back(NewEdge); 1018*06c3fb27SDimitry Andric NewEdge->Caller->CalleeEdges.push_back(NewEdge); 1019*06c3fb27SDimitry Andric } 1020*06c3fb27SDimitry Andric // Remove old edge if context ids empty. 1021*06c3fb27SDimitry Andric if (Edge->getContextIds().empty()) { 1022*06c3fb27SDimitry Andric if (TowardsCallee) { 1023*06c3fb27SDimitry Andric Edge->Callee->eraseCallerEdge(Edge.get()); 1024*06c3fb27SDimitry Andric EI = OrigNode->CalleeEdges.erase(EI); 1025*06c3fb27SDimitry Andric } else { 1026*06c3fb27SDimitry Andric Edge->Caller->eraseCalleeEdge(Edge.get()); 1027*06c3fb27SDimitry Andric EI = OrigNode->CallerEdges.erase(EI); 1028*06c3fb27SDimitry Andric } 1029*06c3fb27SDimitry Andric continue; 1030*06c3fb27SDimitry Andric } 1031*06c3fb27SDimitry Andric ++EI; 1032*06c3fb27SDimitry Andric } 1033*06c3fb27SDimitry Andric } 1034*06c3fb27SDimitry Andric 1035*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1036*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: 1037*06c3fb27SDimitry Andric assignStackNodesPostOrder(ContextNode *Node, 1038*06c3fb27SDimitry Andric DenseSet<const ContextNode *> &Visited, 1039*06c3fb27SDimitry Andric DenseMap<uint64_t, std::vector<CallContextInfo>> 1040*06c3fb27SDimitry Andric &StackIdToMatchingCalls) { 1041*06c3fb27SDimitry Andric auto Inserted = Visited.insert(Node); 1042*06c3fb27SDimitry Andric if (!Inserted.second) 1043*06c3fb27SDimitry Andric return; 1044*06c3fb27SDimitry Andric // Post order traversal. Iterate over a copy since we may add nodes and 1045*06c3fb27SDimitry Andric // therefore new callers during the recursive call, invalidating any 1046*06c3fb27SDimitry Andric // iterator over the original edge vector. We don't need to process these 1047*06c3fb27SDimitry Andric // new nodes as they were already processed on creation. 1048*06c3fb27SDimitry Andric auto CallerEdges = Node->CallerEdges; 1049*06c3fb27SDimitry Andric for (auto &Edge : CallerEdges) { 1050*06c3fb27SDimitry Andric // Skip any that have been removed during the recursion. 1051*06c3fb27SDimitry Andric if (!Edge) 1052*06c3fb27SDimitry Andric continue; 1053*06c3fb27SDimitry Andric assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls); 1054*06c3fb27SDimitry Andric } 1055*06c3fb27SDimitry Andric 1056*06c3fb27SDimitry Andric // If this node's stack id is in the map, update the graph to contain new 1057*06c3fb27SDimitry Andric // nodes representing any inlining at interior callsites. Note we move the 1058*06c3fb27SDimitry Andric // associated context ids over to the new nodes. 1059*06c3fb27SDimitry Andric 1060*06c3fb27SDimitry Andric // Ignore this node if it is for an allocation or we didn't record any 1061*06c3fb27SDimitry Andric // stack id lists ending at it. 1062*06c3fb27SDimitry Andric if (Node->IsAllocation || 1063*06c3fb27SDimitry Andric !StackIdToMatchingCalls.count(Node->OrigStackOrAllocId)) 1064*06c3fb27SDimitry Andric return; 1065*06c3fb27SDimitry Andric 1066*06c3fb27SDimitry Andric auto &Calls = StackIdToMatchingCalls[Node->OrigStackOrAllocId]; 1067*06c3fb27SDimitry Andric // Handle the simple case first. A single call with a single stack id. 1068*06c3fb27SDimitry Andric // In this case there is no need to create any new context nodes, simply 1069*06c3fb27SDimitry Andric // assign the context node for stack id to this Call. 1070*06c3fb27SDimitry Andric if (Calls.size() == 1) { 1071*06c3fb27SDimitry Andric auto &[Call, Ids, Func, SavedContextIds] = Calls[0]; 1072*06c3fb27SDimitry Andric if (Ids.size() == 1) { 1073*06c3fb27SDimitry Andric assert(SavedContextIds.empty()); 1074*06c3fb27SDimitry Andric // It should be this Node 1075*06c3fb27SDimitry Andric assert(Node == getNodeForStackId(Ids[0])); 1076*06c3fb27SDimitry Andric if (Node->Recursive) 1077*06c3fb27SDimitry Andric return; 1078*06c3fb27SDimitry Andric Node->setCall(Call); 1079*06c3fb27SDimitry Andric NonAllocationCallToContextNodeMap[Call] = Node; 1080*06c3fb27SDimitry Andric NodeToCallingFunc[Node] = Func; 1081*06c3fb27SDimitry Andric return; 1082*06c3fb27SDimitry Andric } 1083*06c3fb27SDimitry Andric } 1084*06c3fb27SDimitry Andric 1085*06c3fb27SDimitry Andric // Find the node for the last stack id, which should be the same 1086*06c3fb27SDimitry Andric // across all calls recorded for this id, and is this node's id. 1087*06c3fb27SDimitry Andric uint64_t LastId = Node->OrigStackOrAllocId; 1088*06c3fb27SDimitry Andric ContextNode *LastNode = getNodeForStackId(LastId); 1089*06c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 1090*06c3fb27SDimitry Andric assert(LastNode); 1091*06c3fb27SDimitry Andric 1092*06c3fb27SDimitry Andric for (unsigned I = 0; I < Calls.size(); I++) { 1093*06c3fb27SDimitry Andric auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; 1094*06c3fb27SDimitry Andric // Skip any for which we didn't assign any ids, these don't get a node in 1095*06c3fb27SDimitry Andric // the graph. 1096*06c3fb27SDimitry Andric if (SavedContextIds.empty()) 1097*06c3fb27SDimitry Andric continue; 1098*06c3fb27SDimitry Andric 1099*06c3fb27SDimitry Andric assert(LastId == Ids.back()); 1100*06c3fb27SDimitry Andric 1101*06c3fb27SDimitry Andric ContextNode *FirstNode = getNodeForStackId(Ids[0]); 1102*06c3fb27SDimitry Andric assert(FirstNode); 1103*06c3fb27SDimitry Andric 1104*06c3fb27SDimitry Andric // Recompute the context ids for this stack id sequence (the 1105*06c3fb27SDimitry Andric // intersection of the context ids of the corresponding nodes). 1106*06c3fb27SDimitry Andric // Start with the ids we saved in the map for this call, which could be 1107*06c3fb27SDimitry Andric // duplicated context ids. We have to recompute as we might have overlap 1108*06c3fb27SDimitry Andric // overlap between the saved context ids for different last nodes, and 1109*06c3fb27SDimitry Andric // removed them already during the post order traversal. 1110*06c3fb27SDimitry Andric set_intersect(SavedContextIds, FirstNode->ContextIds); 1111*06c3fb27SDimitry Andric ContextNode *PrevNode = nullptr; 1112*06c3fb27SDimitry Andric for (auto Id : Ids) { 1113*06c3fb27SDimitry Andric ContextNode *CurNode = getNodeForStackId(Id); 1114*06c3fb27SDimitry Andric // We should only have kept stack ids that had nodes and weren't 1115*06c3fb27SDimitry Andric // recursive. 1116*06c3fb27SDimitry Andric assert(CurNode); 1117*06c3fb27SDimitry Andric assert(!CurNode->Recursive); 1118*06c3fb27SDimitry Andric if (!PrevNode) { 1119*06c3fb27SDimitry Andric PrevNode = CurNode; 1120*06c3fb27SDimitry Andric continue; 1121*06c3fb27SDimitry Andric } 1122*06c3fb27SDimitry Andric auto *Edge = CurNode->findEdgeFromCallee(PrevNode); 1123*06c3fb27SDimitry Andric if (!Edge) { 1124*06c3fb27SDimitry Andric SavedContextIds.clear(); 1125*06c3fb27SDimitry Andric break; 1126*06c3fb27SDimitry Andric } 1127*06c3fb27SDimitry Andric PrevNode = CurNode; 1128*06c3fb27SDimitry Andric set_intersect(SavedContextIds, Edge->getContextIds()); 1129*06c3fb27SDimitry Andric 1130*06c3fb27SDimitry Andric // If we now have no context ids for clone, skip this call. 1131*06c3fb27SDimitry Andric if (SavedContextIds.empty()) 1132*06c3fb27SDimitry Andric break; 1133*06c3fb27SDimitry Andric } 1134*06c3fb27SDimitry Andric if (SavedContextIds.empty()) 1135*06c3fb27SDimitry Andric continue; 1136*06c3fb27SDimitry Andric 1137*06c3fb27SDimitry Andric // Create new context node. 1138*06c3fb27SDimitry Andric NodeOwner.push_back( 1139*06c3fb27SDimitry Andric std::make_unique<ContextNode>(/*IsAllocation=*/false, Call)); 1140*06c3fb27SDimitry Andric ContextNode *NewNode = NodeOwner.back().get(); 1141*06c3fb27SDimitry Andric NodeToCallingFunc[NewNode] = Func; 1142*06c3fb27SDimitry Andric NonAllocationCallToContextNodeMap[Call] = NewNode; 1143*06c3fb27SDimitry Andric NewNode->ContextIds = SavedContextIds; 1144*06c3fb27SDimitry Andric NewNode->AllocTypes = computeAllocType(NewNode->ContextIds); 1145*06c3fb27SDimitry Andric 1146*06c3fb27SDimitry Andric // Connect to callees of innermost stack frame in inlined call chain. 1147*06c3fb27SDimitry Andric // This updates context ids for FirstNode's callee's to reflect those 1148*06c3fb27SDimitry Andric // moved to NewNode. 1149*06c3fb27SDimitry Andric connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true); 1150*06c3fb27SDimitry Andric 1151*06c3fb27SDimitry Andric // Connect to callers of outermost stack frame in inlined call chain. 1152*06c3fb27SDimitry Andric // This updates context ids for FirstNode's caller's to reflect those 1153*06c3fb27SDimitry Andric // moved to NewNode. 1154*06c3fb27SDimitry Andric connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false); 1155*06c3fb27SDimitry Andric 1156*06c3fb27SDimitry Andric // Now we need to remove context ids from edges/nodes between First and 1157*06c3fb27SDimitry Andric // Last Node. 1158*06c3fb27SDimitry Andric PrevNode = nullptr; 1159*06c3fb27SDimitry Andric for (auto Id : Ids) { 1160*06c3fb27SDimitry Andric ContextNode *CurNode = getNodeForStackId(Id); 1161*06c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 1162*06c3fb27SDimitry Andric assert(CurNode); 1163*06c3fb27SDimitry Andric 1164*06c3fb27SDimitry Andric // Remove the context ids moved to NewNode from CurNode, and the 1165*06c3fb27SDimitry Andric // edge from the prior node. 1166*06c3fb27SDimitry Andric set_subtract(CurNode->ContextIds, NewNode->ContextIds); 1167*06c3fb27SDimitry Andric if (PrevNode) { 1168*06c3fb27SDimitry Andric auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode); 1169*06c3fb27SDimitry Andric assert(PrevEdge); 1170*06c3fb27SDimitry Andric set_subtract(PrevEdge->getContextIds(), NewNode->ContextIds); 1171*06c3fb27SDimitry Andric if (PrevEdge->getContextIds().empty()) { 1172*06c3fb27SDimitry Andric PrevNode->eraseCallerEdge(PrevEdge); 1173*06c3fb27SDimitry Andric CurNode->eraseCalleeEdge(PrevEdge); 1174*06c3fb27SDimitry Andric } 1175*06c3fb27SDimitry Andric } 1176*06c3fb27SDimitry Andric PrevNode = CurNode; 1177*06c3fb27SDimitry Andric } 1178*06c3fb27SDimitry Andric } 1179*06c3fb27SDimitry Andric } 1180*06c3fb27SDimitry Andric 1181*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1182*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() { 1183*06c3fb27SDimitry Andric // Map of stack id to all calls with that as the last (outermost caller) 1184*06c3fb27SDimitry Andric // callsite id that has a context node (some might not due to pruning 1185*06c3fb27SDimitry Andric // performed during matching of the allocation profile contexts). 1186*06c3fb27SDimitry Andric // The CallContextInfo contains the Call and a list of its stack ids with 1187*06c3fb27SDimitry Andric // ContextNodes, the function containing Call, and the set of context ids 1188*06c3fb27SDimitry Andric // the analysis will eventually identify for use in any new node created 1189*06c3fb27SDimitry Andric // for that callsite. 1190*06c3fb27SDimitry Andric DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls; 1191*06c3fb27SDimitry Andric for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) { 1192*06c3fb27SDimitry Andric for (auto &Call : CallsWithMetadata) { 1193*06c3fb27SDimitry Andric // Ignore allocations, already handled. 1194*06c3fb27SDimitry Andric if (AllocationCallToContextNodeMap.count(Call)) 1195*06c3fb27SDimitry Andric continue; 1196*06c3fb27SDimitry Andric auto StackIdsWithContextNodes = 1197*06c3fb27SDimitry Andric getStackIdsWithContextNodesForCall(Call.call()); 1198*06c3fb27SDimitry Andric // If there were no nodes created for MIBs on allocs (maybe this was in 1199*06c3fb27SDimitry Andric // the unambiguous part of the MIB stack that was pruned), ignore. 1200*06c3fb27SDimitry Andric if (StackIdsWithContextNodes.empty()) 1201*06c3fb27SDimitry Andric continue; 1202*06c3fb27SDimitry Andric // Otherwise, record this Call along with the list of ids for the last 1203*06c3fb27SDimitry Andric // (outermost caller) stack id with a node. 1204*06c3fb27SDimitry Andric StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back( 1205*06c3fb27SDimitry Andric {Call.call(), StackIdsWithContextNodes, Func, {}}); 1206*06c3fb27SDimitry Andric } 1207*06c3fb27SDimitry Andric } 1208*06c3fb27SDimitry Andric 1209*06c3fb27SDimitry Andric // First make a pass through all stack ids that correspond to a call, 1210*06c3fb27SDimitry Andric // as identified in the above loop. Compute the context ids corresponding to 1211*06c3fb27SDimitry Andric // each of these calls when they correspond to multiple stack ids due to 1212*06c3fb27SDimitry Andric // due to inlining. Perform any duplication of context ids required when 1213*06c3fb27SDimitry Andric // there is more than one call with the same stack ids. Their (possibly newly 1214*06c3fb27SDimitry Andric // duplicated) context ids are saved in the StackIdToMatchingCalls map. 1215*06c3fb27SDimitry Andric DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds; 1216*06c3fb27SDimitry Andric for (auto &It : StackIdToMatchingCalls) { 1217*06c3fb27SDimitry Andric auto &Calls = It.getSecond(); 1218*06c3fb27SDimitry Andric // Skip single calls with a single stack id. These don't need a new node. 1219*06c3fb27SDimitry Andric if (Calls.size() == 1) { 1220*06c3fb27SDimitry Andric auto &Ids = std::get<1>(Calls[0]); 1221*06c3fb27SDimitry Andric if (Ids.size() == 1) 1222*06c3fb27SDimitry Andric continue; 1223*06c3fb27SDimitry Andric } 1224*06c3fb27SDimitry Andric // In order to do the best and maximal matching of inlined calls to context 1225*06c3fb27SDimitry Andric // node sequences we will sort the vectors of stack ids in descending order 1226*06c3fb27SDimitry Andric // of length, and within each length, lexicographically by stack id. The 1227*06c3fb27SDimitry Andric // latter is so that we can specially handle calls that have identical stack 1228*06c3fb27SDimitry Andric // id sequences (either due to cloning or artificially because of the MIB 1229*06c3fb27SDimitry Andric // context pruning). 1230*06c3fb27SDimitry Andric std::stable_sort(Calls.begin(), Calls.end(), 1231*06c3fb27SDimitry Andric [](const CallContextInfo &A, const CallContextInfo &B) { 1232*06c3fb27SDimitry Andric auto &IdsA = std::get<1>(A); 1233*06c3fb27SDimitry Andric auto &IdsB = std::get<1>(B); 1234*06c3fb27SDimitry Andric return IdsA.size() > IdsB.size() || 1235*06c3fb27SDimitry Andric (IdsA.size() == IdsB.size() && IdsA < IdsB); 1236*06c3fb27SDimitry Andric }); 1237*06c3fb27SDimitry Andric 1238*06c3fb27SDimitry Andric // Find the node for the last stack id, which should be the same 1239*06c3fb27SDimitry Andric // across all calls recorded for this id, and is the id for this 1240*06c3fb27SDimitry Andric // entry in the StackIdToMatchingCalls map. 1241*06c3fb27SDimitry Andric uint64_t LastId = It.getFirst(); 1242*06c3fb27SDimitry Andric ContextNode *LastNode = getNodeForStackId(LastId); 1243*06c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 1244*06c3fb27SDimitry Andric assert(LastNode); 1245*06c3fb27SDimitry Andric 1246*06c3fb27SDimitry Andric if (LastNode->Recursive) 1247*06c3fb27SDimitry Andric continue; 1248*06c3fb27SDimitry Andric 1249*06c3fb27SDimitry Andric // Initialize the context ids with the last node's. We will subsequently 1250*06c3fb27SDimitry Andric // refine the context ids by computing the intersection along all edges. 1251*06c3fb27SDimitry Andric DenseSet<uint32_t> LastNodeContextIds = LastNode->ContextIds; 1252*06c3fb27SDimitry Andric assert(!LastNodeContextIds.empty()); 1253*06c3fb27SDimitry Andric 1254*06c3fb27SDimitry Andric for (unsigned I = 0; I < Calls.size(); I++) { 1255*06c3fb27SDimitry Andric auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; 1256*06c3fb27SDimitry Andric assert(SavedContextIds.empty()); 1257*06c3fb27SDimitry Andric assert(LastId == Ids.back()); 1258*06c3fb27SDimitry Andric 1259*06c3fb27SDimitry Andric // First compute the context ids for this stack id sequence (the 1260*06c3fb27SDimitry Andric // intersection of the context ids of the corresponding nodes). 1261*06c3fb27SDimitry Andric // Start with the remaining saved ids for the last node. 1262*06c3fb27SDimitry Andric assert(!LastNodeContextIds.empty()); 1263*06c3fb27SDimitry Andric DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds; 1264*06c3fb27SDimitry Andric 1265*06c3fb27SDimitry Andric ContextNode *PrevNode = LastNode; 1266*06c3fb27SDimitry Andric ContextNode *CurNode = LastNode; 1267*06c3fb27SDimitry Andric bool Skip = false; 1268*06c3fb27SDimitry Andric 1269*06c3fb27SDimitry Andric // Iterate backwards through the stack Ids, starting after the last Id 1270*06c3fb27SDimitry Andric // in the list, which was handled once outside for all Calls. 1271*06c3fb27SDimitry Andric for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) { 1272*06c3fb27SDimitry Andric auto Id = *IdIter; 1273*06c3fb27SDimitry Andric CurNode = getNodeForStackId(Id); 1274*06c3fb27SDimitry Andric // We should only have kept stack ids that had nodes. 1275*06c3fb27SDimitry Andric assert(CurNode); 1276*06c3fb27SDimitry Andric 1277*06c3fb27SDimitry Andric if (CurNode->Recursive) { 1278*06c3fb27SDimitry Andric Skip = true; 1279*06c3fb27SDimitry Andric break; 1280*06c3fb27SDimitry Andric } 1281*06c3fb27SDimitry Andric 1282*06c3fb27SDimitry Andric auto *Edge = CurNode->findEdgeFromCaller(PrevNode); 1283*06c3fb27SDimitry Andric // If there is no edge then the nodes belong to different MIB contexts, 1284*06c3fb27SDimitry Andric // and we should skip this inlined context sequence. For example, this 1285*06c3fb27SDimitry Andric // particular inlined context may include stack ids A->B, and we may 1286*06c3fb27SDimitry Andric // indeed have nodes for both A and B, but it is possible that they were 1287*06c3fb27SDimitry Andric // never profiled in sequence in a single MIB for any allocation (i.e. 1288*06c3fb27SDimitry Andric // we might have profiled an allocation that involves the callsite A, 1289*06c3fb27SDimitry Andric // but through a different one of its callee callsites, and we might 1290*06c3fb27SDimitry Andric // have profiled an allocation that involves callsite B, but reached 1291*06c3fb27SDimitry Andric // from a different caller callsite). 1292*06c3fb27SDimitry Andric if (!Edge) { 1293*06c3fb27SDimitry Andric Skip = true; 1294*06c3fb27SDimitry Andric break; 1295*06c3fb27SDimitry Andric } 1296*06c3fb27SDimitry Andric PrevNode = CurNode; 1297*06c3fb27SDimitry Andric 1298*06c3fb27SDimitry Andric // Update the context ids, which is the intersection of the ids along 1299*06c3fb27SDimitry Andric // all edges in the sequence. 1300*06c3fb27SDimitry Andric set_intersect(StackSequenceContextIds, Edge->getContextIds()); 1301*06c3fb27SDimitry Andric 1302*06c3fb27SDimitry Andric // If we now have no context ids for clone, skip this call. 1303*06c3fb27SDimitry Andric if (StackSequenceContextIds.empty()) { 1304*06c3fb27SDimitry Andric Skip = true; 1305*06c3fb27SDimitry Andric break; 1306*06c3fb27SDimitry Andric } 1307*06c3fb27SDimitry Andric } 1308*06c3fb27SDimitry Andric if (Skip) 1309*06c3fb27SDimitry Andric continue; 1310*06c3fb27SDimitry Andric 1311*06c3fb27SDimitry Andric // If some of this call's stack ids did not have corresponding nodes (due 1312*06c3fb27SDimitry Andric // to pruning), don't include any context ids for contexts that extend 1313*06c3fb27SDimitry Andric // beyond these nodes. Otherwise we would be matching part of unrelated / 1314*06c3fb27SDimitry Andric // not fully matching stack contexts. To do this, subtract any context ids 1315*06c3fb27SDimitry Andric // found in caller nodes of the last node found above. 1316*06c3fb27SDimitry Andric if (Ids.back() != getLastStackId(Call)) { 1317*06c3fb27SDimitry Andric for (const auto &PE : CurNode->CallerEdges) { 1318*06c3fb27SDimitry Andric set_subtract(StackSequenceContextIds, PE->getContextIds()); 1319*06c3fb27SDimitry Andric if (StackSequenceContextIds.empty()) 1320*06c3fb27SDimitry Andric break; 1321*06c3fb27SDimitry Andric } 1322*06c3fb27SDimitry Andric // If we now have no context ids for clone, skip this call. 1323*06c3fb27SDimitry Andric if (StackSequenceContextIds.empty()) 1324*06c3fb27SDimitry Andric continue; 1325*06c3fb27SDimitry Andric } 1326*06c3fb27SDimitry Andric 1327*06c3fb27SDimitry Andric // Check if the next set of stack ids is the same (since the Calls vector 1328*06c3fb27SDimitry Andric // of tuples is sorted by the stack ids we can just look at the next one). 1329*06c3fb27SDimitry Andric bool DuplicateContextIds = false; 1330*06c3fb27SDimitry Andric if (I + 1 < Calls.size()) { 1331*06c3fb27SDimitry Andric auto NextIds = std::get<1>(Calls[I + 1]); 1332*06c3fb27SDimitry Andric DuplicateContextIds = Ids == NextIds; 1333*06c3fb27SDimitry Andric } 1334*06c3fb27SDimitry Andric 1335*06c3fb27SDimitry Andric // If we don't have duplicate context ids, then we can assign all the 1336*06c3fb27SDimitry Andric // context ids computed for the original node sequence to this call. 1337*06c3fb27SDimitry Andric // If there are duplicate calls with the same stack ids then we synthesize 1338*06c3fb27SDimitry Andric // new context ids that are duplicates of the originals. These are 1339*06c3fb27SDimitry Andric // assigned to SavedContextIds, which is a reference into the map entry 1340*06c3fb27SDimitry Andric // for this call, allowing us to access these ids later on. 1341*06c3fb27SDimitry Andric OldToNewContextIds.reserve(OldToNewContextIds.size() + 1342*06c3fb27SDimitry Andric StackSequenceContextIds.size()); 1343*06c3fb27SDimitry Andric SavedContextIds = 1344*06c3fb27SDimitry Andric DuplicateContextIds 1345*06c3fb27SDimitry Andric ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds) 1346*06c3fb27SDimitry Andric : StackSequenceContextIds; 1347*06c3fb27SDimitry Andric assert(!SavedContextIds.empty()); 1348*06c3fb27SDimitry Andric 1349*06c3fb27SDimitry Andric if (!DuplicateContextIds) { 1350*06c3fb27SDimitry Andric // Update saved last node's context ids to remove those that are 1351*06c3fb27SDimitry Andric // assigned to other calls, so that it is ready for the next call at 1352*06c3fb27SDimitry Andric // this stack id. 1353*06c3fb27SDimitry Andric set_subtract(LastNodeContextIds, StackSequenceContextIds); 1354*06c3fb27SDimitry Andric if (LastNodeContextIds.empty()) 1355*06c3fb27SDimitry Andric break; 1356*06c3fb27SDimitry Andric } 1357*06c3fb27SDimitry Andric } 1358*06c3fb27SDimitry Andric } 1359*06c3fb27SDimitry Andric 1360*06c3fb27SDimitry Andric // Propagate the duplicate context ids over the graph. 1361*06c3fb27SDimitry Andric propagateDuplicateContextIds(OldToNewContextIds); 1362*06c3fb27SDimitry Andric 1363*06c3fb27SDimitry Andric if (VerifyCCG) 1364*06c3fb27SDimitry Andric check(); 1365*06c3fb27SDimitry Andric 1366*06c3fb27SDimitry Andric // Now perform a post-order traversal over the graph, starting with the 1367*06c3fb27SDimitry Andric // allocation nodes, essentially processing nodes from callers to callees. 1368*06c3fb27SDimitry Andric // For any that contains an id in the map, update the graph to contain new 1369*06c3fb27SDimitry Andric // nodes representing any inlining at interior callsites. Note we move the 1370*06c3fb27SDimitry Andric // associated context ids over to the new nodes. 1371*06c3fb27SDimitry Andric DenseSet<const ContextNode *> Visited; 1372*06c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) 1373*06c3fb27SDimitry Andric assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls); 1374*06c3fb27SDimitry Andric } 1375*06c3fb27SDimitry Andric 1376*06c3fb27SDimitry Andric uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) { 1377*06c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext( 1378*06c3fb27SDimitry Andric Call->getMetadata(LLVMContext::MD_callsite)); 1379*06c3fb27SDimitry Andric return CallsiteContext.back(); 1380*06c3fb27SDimitry Andric } 1381*06c3fb27SDimitry Andric 1382*06c3fb27SDimitry Andric uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) { 1383*06c3fb27SDimitry Andric assert(isa<CallsiteInfo *>(Call.getBase())); 1384*06c3fb27SDimitry Andric CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator> 1385*06c3fb27SDimitry Andric CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase())); 1386*06c3fb27SDimitry Andric // Need to convert index into stack id. 1387*06c3fb27SDimitry Andric return Index.getStackIdAtIndex(CallsiteContext.back()); 1388*06c3fb27SDimitry Andric } 1389*06c3fb27SDimitry Andric 1390*06c3fb27SDimitry Andric static const std::string MemProfCloneSuffix = ".memprof."; 1391*06c3fb27SDimitry Andric 1392*06c3fb27SDimitry Andric static std::string getMemProfFuncName(Twine Base, unsigned CloneNo) { 1393*06c3fb27SDimitry Andric // We use CloneNo == 0 to refer to the original version, which doesn't get 1394*06c3fb27SDimitry Andric // renamed with a suffix. 1395*06c3fb27SDimitry Andric if (!CloneNo) 1396*06c3fb27SDimitry Andric return Base.str(); 1397*06c3fb27SDimitry Andric return (Base + MemProfCloneSuffix + Twine(CloneNo)).str(); 1398*06c3fb27SDimitry Andric } 1399*06c3fb27SDimitry Andric 1400*06c3fb27SDimitry Andric std::string ModuleCallsiteContextGraph::getLabel(const Function *Func, 1401*06c3fb27SDimitry Andric const Instruction *Call, 1402*06c3fb27SDimitry Andric unsigned CloneNo) const { 1403*06c3fb27SDimitry Andric return (Twine(Call->getFunction()->getName()) + " -> " + 1404*06c3fb27SDimitry Andric cast<CallBase>(Call)->getCalledFunction()->getName()) 1405*06c3fb27SDimitry Andric .str(); 1406*06c3fb27SDimitry Andric } 1407*06c3fb27SDimitry Andric 1408*06c3fb27SDimitry Andric std::string IndexCallsiteContextGraph::getLabel(const FunctionSummary *Func, 1409*06c3fb27SDimitry Andric const IndexCall &Call, 1410*06c3fb27SDimitry Andric unsigned CloneNo) const { 1411*06c3fb27SDimitry Andric auto VI = FSToVIMap.find(Func); 1412*06c3fb27SDimitry Andric assert(VI != FSToVIMap.end()); 1413*06c3fb27SDimitry Andric if (isa<AllocInfo *>(Call.getBase())) 1414*06c3fb27SDimitry Andric return (VI->second.name() + " -> alloc").str(); 1415*06c3fb27SDimitry Andric else { 1416*06c3fb27SDimitry Andric auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(Call.getBase()); 1417*06c3fb27SDimitry Andric return (VI->second.name() + " -> " + 1418*06c3fb27SDimitry Andric getMemProfFuncName(Callsite->Callee.name(), 1419*06c3fb27SDimitry Andric Callsite->Clones[CloneNo])) 1420*06c3fb27SDimitry Andric .str(); 1421*06c3fb27SDimitry Andric } 1422*06c3fb27SDimitry Andric } 1423*06c3fb27SDimitry Andric 1424*06c3fb27SDimitry Andric std::vector<uint64_t> 1425*06c3fb27SDimitry Andric ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall( 1426*06c3fb27SDimitry Andric Instruction *Call) { 1427*06c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext( 1428*06c3fb27SDimitry Andric Call->getMetadata(LLVMContext::MD_callsite)); 1429*06c3fb27SDimitry Andric return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>( 1430*06c3fb27SDimitry Andric CallsiteContext); 1431*06c3fb27SDimitry Andric } 1432*06c3fb27SDimitry Andric 1433*06c3fb27SDimitry Andric std::vector<uint64_t> 1434*06c3fb27SDimitry Andric IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) { 1435*06c3fb27SDimitry Andric assert(isa<CallsiteInfo *>(Call.getBase())); 1436*06c3fb27SDimitry Andric CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator> 1437*06c3fb27SDimitry Andric CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase())); 1438*06c3fb27SDimitry Andric return getStackIdsWithContextNodes<CallsiteInfo, 1439*06c3fb27SDimitry Andric SmallVector<unsigned>::const_iterator>( 1440*06c3fb27SDimitry Andric CallsiteContext); 1441*06c3fb27SDimitry Andric } 1442*06c3fb27SDimitry Andric 1443*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1444*06c3fb27SDimitry Andric template <class NodeT, class IteratorT> 1445*06c3fb27SDimitry Andric std::vector<uint64_t> 1446*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes( 1447*06c3fb27SDimitry Andric CallStack<NodeT, IteratorT> &CallsiteContext) { 1448*06c3fb27SDimitry Andric std::vector<uint64_t> StackIds; 1449*06c3fb27SDimitry Andric for (auto IdOrIndex : CallsiteContext) { 1450*06c3fb27SDimitry Andric auto StackId = getStackId(IdOrIndex); 1451*06c3fb27SDimitry Andric ContextNode *Node = getNodeForStackId(StackId); 1452*06c3fb27SDimitry Andric if (!Node) 1453*06c3fb27SDimitry Andric break; 1454*06c3fb27SDimitry Andric StackIds.push_back(StackId); 1455*06c3fb27SDimitry Andric } 1456*06c3fb27SDimitry Andric return StackIds; 1457*06c3fb27SDimitry Andric } 1458*06c3fb27SDimitry Andric 1459*06c3fb27SDimitry Andric ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( 1460*06c3fb27SDimitry Andric Module &M, function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) 1461*06c3fb27SDimitry Andric : Mod(M), OREGetter(OREGetter) { 1462*06c3fb27SDimitry Andric for (auto &F : M) { 1463*06c3fb27SDimitry Andric std::vector<CallInfo> CallsWithMetadata; 1464*06c3fb27SDimitry Andric for (auto &BB : F) { 1465*06c3fb27SDimitry Andric for (auto &I : BB) { 1466*06c3fb27SDimitry Andric if (!isa<CallBase>(I)) 1467*06c3fb27SDimitry Andric continue; 1468*06c3fb27SDimitry Andric if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) { 1469*06c3fb27SDimitry Andric CallsWithMetadata.push_back(&I); 1470*06c3fb27SDimitry Andric auto *AllocNode = addAllocNode(&I, &F); 1471*06c3fb27SDimitry Andric auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite); 1472*06c3fb27SDimitry Andric assert(CallsiteMD); 1473*06c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext(CallsiteMD); 1474*06c3fb27SDimitry Andric // Add all of the MIBs and their stack nodes. 1475*06c3fb27SDimitry Andric for (auto &MDOp : MemProfMD->operands()) { 1476*06c3fb27SDimitry Andric auto *MIBMD = cast<const MDNode>(MDOp); 1477*06c3fb27SDimitry Andric MDNode *StackNode = getMIBStackNode(MIBMD); 1478*06c3fb27SDimitry Andric assert(StackNode); 1479*06c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode); 1480*06c3fb27SDimitry Andric addStackNodesForMIB<MDNode, MDNode::op_iterator>( 1481*06c3fb27SDimitry Andric AllocNode, StackContext, CallsiteContext, 1482*06c3fb27SDimitry Andric getMIBAllocType(MIBMD)); 1483*06c3fb27SDimitry Andric } 1484*06c3fb27SDimitry Andric assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); 1485*06c3fb27SDimitry Andric // Memprof and callsite metadata on memory allocations no longer 1486*06c3fb27SDimitry Andric // needed. 1487*06c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_memprof, nullptr); 1488*06c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_callsite, nullptr); 1489*06c3fb27SDimitry Andric } 1490*06c3fb27SDimitry Andric // For callsite metadata, add to list for this function for later use. 1491*06c3fb27SDimitry Andric else if (I.getMetadata(LLVMContext::MD_callsite)) 1492*06c3fb27SDimitry Andric CallsWithMetadata.push_back(&I); 1493*06c3fb27SDimitry Andric } 1494*06c3fb27SDimitry Andric } 1495*06c3fb27SDimitry Andric if (!CallsWithMetadata.empty()) 1496*06c3fb27SDimitry Andric FuncToCallsWithMetadata.push_back({&F, CallsWithMetadata}); 1497*06c3fb27SDimitry Andric } 1498*06c3fb27SDimitry Andric 1499*06c3fb27SDimitry Andric if (DumpCCG) { 1500*06c3fb27SDimitry Andric dbgs() << "CCG before updating call stack chains:\n"; 1501*06c3fb27SDimitry Andric dbgs() << *this; 1502*06c3fb27SDimitry Andric } 1503*06c3fb27SDimitry Andric 1504*06c3fb27SDimitry Andric if (ExportToDot) 1505*06c3fb27SDimitry Andric exportToDot("prestackupdate"); 1506*06c3fb27SDimitry Andric 1507*06c3fb27SDimitry Andric updateStackNodes(); 1508*06c3fb27SDimitry Andric 1509*06c3fb27SDimitry Andric handleCallsitesWithMultipleTargets(); 1510*06c3fb27SDimitry Andric 1511*06c3fb27SDimitry Andric // Strip off remaining callsite metadata, no longer needed. 1512*06c3fb27SDimitry Andric for (auto &FuncEntry : FuncToCallsWithMetadata) 1513*06c3fb27SDimitry Andric for (auto &Call : FuncEntry.second) 1514*06c3fb27SDimitry Andric Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr); 1515*06c3fb27SDimitry Andric } 1516*06c3fb27SDimitry Andric 1517*06c3fb27SDimitry Andric IndexCallsiteContextGraph::IndexCallsiteContextGraph( 1518*06c3fb27SDimitry Andric ModuleSummaryIndex &Index, 1519*06c3fb27SDimitry Andric function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 1520*06c3fb27SDimitry Andric isPrevailing) 1521*06c3fb27SDimitry Andric : Index(Index) { 1522*06c3fb27SDimitry Andric for (auto &I : Index) { 1523*06c3fb27SDimitry Andric auto VI = Index.getValueInfo(I); 1524*06c3fb27SDimitry Andric for (auto &S : VI.getSummaryList()) { 1525*06c3fb27SDimitry Andric // We should only add the prevailing nodes. Otherwise we may try to clone 1526*06c3fb27SDimitry Andric // in a weak copy that won't be linked (and may be different than the 1527*06c3fb27SDimitry Andric // prevailing version). 1528*06c3fb27SDimitry Andric // We only keep the memprof summary on the prevailing copy now when 1529*06c3fb27SDimitry Andric // building the combined index, as a space optimization, however don't 1530*06c3fb27SDimitry Andric // rely on this optimization. The linker doesn't resolve local linkage 1531*06c3fb27SDimitry Andric // values so don't check whether those are prevailing. 1532*06c3fb27SDimitry Andric if (!GlobalValue::isLocalLinkage(S->linkage()) && 1533*06c3fb27SDimitry Andric !isPrevailing(VI.getGUID(), S.get())) 1534*06c3fb27SDimitry Andric continue; 1535*06c3fb27SDimitry Andric auto *FS = dyn_cast<FunctionSummary>(S.get()); 1536*06c3fb27SDimitry Andric if (!FS) 1537*06c3fb27SDimitry Andric continue; 1538*06c3fb27SDimitry Andric std::vector<CallInfo> CallsWithMetadata; 1539*06c3fb27SDimitry Andric if (!FS->allocs().empty()) { 1540*06c3fb27SDimitry Andric for (auto &AN : FS->mutableAllocs()) { 1541*06c3fb27SDimitry Andric // This can happen because of recursion elimination handling that 1542*06c3fb27SDimitry Andric // currently exists in ModuleSummaryAnalysis. Skip these for now. 1543*06c3fb27SDimitry Andric // We still added them to the summary because we need to be able to 1544*06c3fb27SDimitry Andric // correlate properly in applyImport in the backends. 1545*06c3fb27SDimitry Andric if (AN.MIBs.empty()) 1546*06c3fb27SDimitry Andric continue; 1547*06c3fb27SDimitry Andric CallsWithMetadata.push_back({&AN}); 1548*06c3fb27SDimitry Andric auto *AllocNode = addAllocNode({&AN}, FS); 1549*06c3fb27SDimitry Andric // Pass an empty CallStack to the CallsiteContext (second) 1550*06c3fb27SDimitry Andric // parameter, since for ThinLTO we already collapsed out the inlined 1551*06c3fb27SDimitry Andric // stack ids on the allocation call during ModuleSummaryAnalysis. 1552*06c3fb27SDimitry Andric CallStack<MIBInfo, SmallVector<unsigned>::const_iterator> 1553*06c3fb27SDimitry Andric EmptyContext; 1554*06c3fb27SDimitry Andric // Now add all of the MIBs and their stack nodes. 1555*06c3fb27SDimitry Andric for (auto &MIB : AN.MIBs) { 1556*06c3fb27SDimitry Andric CallStack<MIBInfo, SmallVector<unsigned>::const_iterator> 1557*06c3fb27SDimitry Andric StackContext(&MIB); 1558*06c3fb27SDimitry Andric addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>( 1559*06c3fb27SDimitry Andric AllocNode, StackContext, EmptyContext, MIB.AllocType); 1560*06c3fb27SDimitry Andric } 1561*06c3fb27SDimitry Andric assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); 1562*06c3fb27SDimitry Andric // Initialize version 0 on the summary alloc node to the current alloc 1563*06c3fb27SDimitry Andric // type, unless it has both types in which case make it default, so 1564*06c3fb27SDimitry Andric // that in the case where we aren't able to clone the original version 1565*06c3fb27SDimitry Andric // always ends up with the default allocation behavior. 1566*06c3fb27SDimitry Andric AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes); 1567*06c3fb27SDimitry Andric } 1568*06c3fb27SDimitry Andric } 1569*06c3fb27SDimitry Andric // For callsite metadata, add to list for this function for later use. 1570*06c3fb27SDimitry Andric if (!FS->callsites().empty()) 1571*06c3fb27SDimitry Andric for (auto &SN : FS->mutableCallsites()) 1572*06c3fb27SDimitry Andric CallsWithMetadata.push_back({&SN}); 1573*06c3fb27SDimitry Andric 1574*06c3fb27SDimitry Andric if (!CallsWithMetadata.empty()) 1575*06c3fb27SDimitry Andric FuncToCallsWithMetadata.push_back({FS, CallsWithMetadata}); 1576*06c3fb27SDimitry Andric 1577*06c3fb27SDimitry Andric if (!FS->allocs().empty() || !FS->callsites().empty()) 1578*06c3fb27SDimitry Andric FSToVIMap[FS] = VI; 1579*06c3fb27SDimitry Andric } 1580*06c3fb27SDimitry Andric } 1581*06c3fb27SDimitry Andric 1582*06c3fb27SDimitry Andric if (DumpCCG) { 1583*06c3fb27SDimitry Andric dbgs() << "CCG before updating call stack chains:\n"; 1584*06c3fb27SDimitry Andric dbgs() << *this; 1585*06c3fb27SDimitry Andric } 1586*06c3fb27SDimitry Andric 1587*06c3fb27SDimitry Andric if (ExportToDot) 1588*06c3fb27SDimitry Andric exportToDot("prestackupdate"); 1589*06c3fb27SDimitry Andric 1590*06c3fb27SDimitry Andric updateStackNodes(); 1591*06c3fb27SDimitry Andric 1592*06c3fb27SDimitry Andric handleCallsitesWithMultipleTargets(); 1593*06c3fb27SDimitry Andric } 1594*06c3fb27SDimitry Andric 1595*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1596*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, 1597*06c3fb27SDimitry Andric CallTy>::handleCallsitesWithMultipleTargets() { 1598*06c3fb27SDimitry Andric // Look for and workaround callsites that call multiple functions. 1599*06c3fb27SDimitry Andric // This can happen for indirect calls, which needs better handling, and in 1600*06c3fb27SDimitry Andric // more rare cases (e.g. macro expansion). 1601*06c3fb27SDimitry Andric // TODO: To fix this for indirect calls we will want to perform speculative 1602*06c3fb27SDimitry Andric // devirtualization using either the normal PGO info with ICP, or using the 1603*06c3fb27SDimitry Andric // information in the profiled MemProf contexts. We can do this prior to 1604*06c3fb27SDimitry Andric // this transformation for regular LTO, and for ThinLTO we can simulate that 1605*06c3fb27SDimitry Andric // effect in the summary and perform the actual speculative devirtualization 1606*06c3fb27SDimitry Andric // while cloning in the ThinLTO backend. 1607*06c3fb27SDimitry Andric for (auto Entry = NonAllocationCallToContextNodeMap.begin(); 1608*06c3fb27SDimitry Andric Entry != NonAllocationCallToContextNodeMap.end();) { 1609*06c3fb27SDimitry Andric auto *Node = Entry->second; 1610*06c3fb27SDimitry Andric assert(Node->Clones.empty()); 1611*06c3fb27SDimitry Andric // Check all node callees and see if in the same function. 1612*06c3fb27SDimitry Andric bool Removed = false; 1613*06c3fb27SDimitry Andric auto Call = Node->Call.call(); 1614*06c3fb27SDimitry Andric for (auto &Edge : Node->CalleeEdges) { 1615*06c3fb27SDimitry Andric if (!Edge->Callee->hasCall()) 1616*06c3fb27SDimitry Andric continue; 1617*06c3fb27SDimitry Andric assert(NodeToCallingFunc.count(Edge->Callee)); 1618*06c3fb27SDimitry Andric // Check if the called function matches that of the callee node. 1619*06c3fb27SDimitry Andric if (calleeMatchesFunc(Call, NodeToCallingFunc[Edge->Callee])) 1620*06c3fb27SDimitry Andric continue; 1621*06c3fb27SDimitry Andric // Work around by setting Node to have a null call, so it gets 1622*06c3fb27SDimitry Andric // skipped during cloning. Otherwise assignFunctions will assert 1623*06c3fb27SDimitry Andric // because its data structures are not designed to handle this case. 1624*06c3fb27SDimitry Andric Entry = NonAllocationCallToContextNodeMap.erase(Entry); 1625*06c3fb27SDimitry Andric Node->setCall(CallInfo()); 1626*06c3fb27SDimitry Andric Removed = true; 1627*06c3fb27SDimitry Andric break; 1628*06c3fb27SDimitry Andric } 1629*06c3fb27SDimitry Andric if (!Removed) 1630*06c3fb27SDimitry Andric Entry++; 1631*06c3fb27SDimitry Andric } 1632*06c3fb27SDimitry Andric } 1633*06c3fb27SDimitry Andric 1634*06c3fb27SDimitry Andric uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { 1635*06c3fb27SDimitry Andric // In the Module (IR) case this is already the Id. 1636*06c3fb27SDimitry Andric return IdOrIndex; 1637*06c3fb27SDimitry Andric } 1638*06c3fb27SDimitry Andric 1639*06c3fb27SDimitry Andric uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { 1640*06c3fb27SDimitry Andric // In the Index case this is an index into the stack id list in the summary 1641*06c3fb27SDimitry Andric // index, convert it to an Id. 1642*06c3fb27SDimitry Andric return Index.getStackIdAtIndex(IdOrIndex); 1643*06c3fb27SDimitry Andric } 1644*06c3fb27SDimitry Andric 1645*06c3fb27SDimitry Andric bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call, 1646*06c3fb27SDimitry Andric const Function *Func) { 1647*06c3fb27SDimitry Andric auto *CB = dyn_cast<CallBase>(Call); 1648*06c3fb27SDimitry Andric if (!CB->getCalledOperand()) 1649*06c3fb27SDimitry Andric return false; 1650*06c3fb27SDimitry Andric auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts(); 1651*06c3fb27SDimitry Andric auto *CalleeFunc = dyn_cast<Function>(CalleeVal); 1652*06c3fb27SDimitry Andric if (CalleeFunc == Func) 1653*06c3fb27SDimitry Andric return true; 1654*06c3fb27SDimitry Andric auto *Alias = dyn_cast<GlobalAlias>(CalleeVal); 1655*06c3fb27SDimitry Andric return Alias && Alias->getAliasee() == Func; 1656*06c3fb27SDimitry Andric } 1657*06c3fb27SDimitry Andric 1658*06c3fb27SDimitry Andric bool IndexCallsiteContextGraph::calleeMatchesFunc(IndexCall &Call, 1659*06c3fb27SDimitry Andric const FunctionSummary *Func) { 1660*06c3fb27SDimitry Andric ValueInfo Callee = 1661*06c3fb27SDimitry Andric dyn_cast_if_present<CallsiteInfo *>(Call.getBase())->Callee; 1662*06c3fb27SDimitry Andric // If there is no summary list then this is a call to an externally defined 1663*06c3fb27SDimitry Andric // symbol. 1664*06c3fb27SDimitry Andric AliasSummary *Alias = 1665*06c3fb27SDimitry Andric Callee.getSummaryList().empty() 1666*06c3fb27SDimitry Andric ? nullptr 1667*06c3fb27SDimitry Andric : dyn_cast<AliasSummary>(Callee.getSummaryList()[0].get()); 1668*06c3fb27SDimitry Andric assert(FSToVIMap.count(Func)); 1669*06c3fb27SDimitry Andric return Callee == FSToVIMap[Func] || 1670*06c3fb27SDimitry Andric // If callee is an alias, check the aliasee, since only function 1671*06c3fb27SDimitry Andric // summary base objects will contain the stack node summaries and thus 1672*06c3fb27SDimitry Andric // get a context node. 1673*06c3fb27SDimitry Andric (Alias && Alias->getAliaseeVI() == FSToVIMap[Func]); 1674*06c3fb27SDimitry Andric } 1675*06c3fb27SDimitry Andric 1676*06c3fb27SDimitry Andric static std::string getAllocTypeString(uint8_t AllocTypes) { 1677*06c3fb27SDimitry Andric if (!AllocTypes) 1678*06c3fb27SDimitry Andric return "None"; 1679*06c3fb27SDimitry Andric std::string Str; 1680*06c3fb27SDimitry Andric if (AllocTypes & (uint8_t)AllocationType::NotCold) 1681*06c3fb27SDimitry Andric Str += "NotCold"; 1682*06c3fb27SDimitry Andric if (AllocTypes & (uint8_t)AllocationType::Cold) 1683*06c3fb27SDimitry Andric Str += "Cold"; 1684*06c3fb27SDimitry Andric return Str; 1685*06c3fb27SDimitry Andric } 1686*06c3fb27SDimitry Andric 1687*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1688*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump() 1689*06c3fb27SDimitry Andric const { 1690*06c3fb27SDimitry Andric print(dbgs()); 1691*06c3fb27SDimitry Andric dbgs() << "\n"; 1692*06c3fb27SDimitry Andric } 1693*06c3fb27SDimitry Andric 1694*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1695*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print( 1696*06c3fb27SDimitry Andric raw_ostream &OS) const { 1697*06c3fb27SDimitry Andric OS << "Node " << this << "\n"; 1698*06c3fb27SDimitry Andric OS << "\t"; 1699*06c3fb27SDimitry Andric printCall(OS); 1700*06c3fb27SDimitry Andric if (Recursive) 1701*06c3fb27SDimitry Andric OS << " (recursive)"; 1702*06c3fb27SDimitry Andric OS << "\n"; 1703*06c3fb27SDimitry Andric OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n"; 1704*06c3fb27SDimitry Andric OS << "\tContextIds:"; 1705*06c3fb27SDimitry Andric std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end()); 1706*06c3fb27SDimitry Andric std::sort(SortedIds.begin(), SortedIds.end()); 1707*06c3fb27SDimitry Andric for (auto Id : SortedIds) 1708*06c3fb27SDimitry Andric OS << " " << Id; 1709*06c3fb27SDimitry Andric OS << "\n"; 1710*06c3fb27SDimitry Andric OS << "\tCalleeEdges:\n"; 1711*06c3fb27SDimitry Andric for (auto &Edge : CalleeEdges) 1712*06c3fb27SDimitry Andric OS << "\t\t" << *Edge << "\n"; 1713*06c3fb27SDimitry Andric OS << "\tCallerEdges:\n"; 1714*06c3fb27SDimitry Andric for (auto &Edge : CallerEdges) 1715*06c3fb27SDimitry Andric OS << "\t\t" << *Edge << "\n"; 1716*06c3fb27SDimitry Andric if (!Clones.empty()) { 1717*06c3fb27SDimitry Andric OS << "\tClones: "; 1718*06c3fb27SDimitry Andric FieldSeparator FS; 1719*06c3fb27SDimitry Andric for (auto *Clone : Clones) 1720*06c3fb27SDimitry Andric OS << FS << Clone; 1721*06c3fb27SDimitry Andric OS << "\n"; 1722*06c3fb27SDimitry Andric } else if (CloneOf) { 1723*06c3fb27SDimitry Andric OS << "\tClone of " << CloneOf << "\n"; 1724*06c3fb27SDimitry Andric } 1725*06c3fb27SDimitry Andric } 1726*06c3fb27SDimitry Andric 1727*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1728*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump() 1729*06c3fb27SDimitry Andric const { 1730*06c3fb27SDimitry Andric print(dbgs()); 1731*06c3fb27SDimitry Andric dbgs() << "\n"; 1732*06c3fb27SDimitry Andric } 1733*06c3fb27SDimitry Andric 1734*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1735*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print( 1736*06c3fb27SDimitry Andric raw_ostream &OS) const { 1737*06c3fb27SDimitry Andric OS << "Edge from Callee " << Callee << " to Caller: " << Caller 1738*06c3fb27SDimitry Andric << " AllocTypes: " << getAllocTypeString(AllocTypes); 1739*06c3fb27SDimitry Andric OS << " ContextIds:"; 1740*06c3fb27SDimitry Andric std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end()); 1741*06c3fb27SDimitry Andric std::sort(SortedIds.begin(), SortedIds.end()); 1742*06c3fb27SDimitry Andric for (auto Id : SortedIds) 1743*06c3fb27SDimitry Andric OS << " " << Id; 1744*06c3fb27SDimitry Andric } 1745*06c3fb27SDimitry Andric 1746*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1747*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump() const { 1748*06c3fb27SDimitry Andric print(dbgs()); 1749*06c3fb27SDimitry Andric } 1750*06c3fb27SDimitry Andric 1751*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1752*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print( 1753*06c3fb27SDimitry Andric raw_ostream &OS) const { 1754*06c3fb27SDimitry Andric OS << "Callsite Context Graph:\n"; 1755*06c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 1756*06c3fb27SDimitry Andric for (const auto Node : nodes<GraphType>(this)) { 1757*06c3fb27SDimitry Andric if (Node->isRemoved()) 1758*06c3fb27SDimitry Andric continue; 1759*06c3fb27SDimitry Andric Node->print(OS); 1760*06c3fb27SDimitry Andric OS << "\n"; 1761*06c3fb27SDimitry Andric } 1762*06c3fb27SDimitry Andric } 1763*06c3fb27SDimitry Andric 1764*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1765*06c3fb27SDimitry Andric static void checkEdge( 1766*06c3fb27SDimitry Andric const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) { 1767*06c3fb27SDimitry Andric // Confirm that alloc type is not None and that we have at least one context 1768*06c3fb27SDimitry Andric // id. 1769*06c3fb27SDimitry Andric assert(Edge->AllocTypes != (uint8_t)AllocationType::None); 1770*06c3fb27SDimitry Andric assert(!Edge->ContextIds.empty()); 1771*06c3fb27SDimitry Andric } 1772*06c3fb27SDimitry Andric 1773*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1774*06c3fb27SDimitry Andric static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node, 1775*06c3fb27SDimitry Andric bool CheckEdges = true) { 1776*06c3fb27SDimitry Andric if (Node->isRemoved()) 1777*06c3fb27SDimitry Andric return; 1778*06c3fb27SDimitry Andric // Node's context ids should be the union of both its callee and caller edge 1779*06c3fb27SDimitry Andric // context ids. 1780*06c3fb27SDimitry Andric if (Node->CallerEdges.size()) { 1781*06c3fb27SDimitry Andric auto EI = Node->CallerEdges.begin(); 1782*06c3fb27SDimitry Andric auto &FirstEdge = *EI; 1783*06c3fb27SDimitry Andric EI++; 1784*06c3fb27SDimitry Andric DenseSet<uint32_t> CallerEdgeContextIds(FirstEdge->ContextIds); 1785*06c3fb27SDimitry Andric for (; EI != Node->CallerEdges.end(); EI++) { 1786*06c3fb27SDimitry Andric const auto &Edge = *EI; 1787*06c3fb27SDimitry Andric if (CheckEdges) 1788*06c3fb27SDimitry Andric checkEdge<DerivedCCG, FuncTy, CallTy>(Edge); 1789*06c3fb27SDimitry Andric set_union(CallerEdgeContextIds, Edge->ContextIds); 1790*06c3fb27SDimitry Andric } 1791*06c3fb27SDimitry Andric // Node can have more context ids than callers if some contexts terminate at 1792*06c3fb27SDimitry Andric // node and some are longer. 1793*06c3fb27SDimitry Andric assert(Node->ContextIds == CallerEdgeContextIds || 1794*06c3fb27SDimitry Andric set_is_subset(CallerEdgeContextIds, Node->ContextIds)); 1795*06c3fb27SDimitry Andric } 1796*06c3fb27SDimitry Andric if (Node->CalleeEdges.size()) { 1797*06c3fb27SDimitry Andric auto EI = Node->CalleeEdges.begin(); 1798*06c3fb27SDimitry Andric auto &FirstEdge = *EI; 1799*06c3fb27SDimitry Andric EI++; 1800*06c3fb27SDimitry Andric DenseSet<uint32_t> CalleeEdgeContextIds(FirstEdge->ContextIds); 1801*06c3fb27SDimitry Andric for (; EI != Node->CalleeEdges.end(); EI++) { 1802*06c3fb27SDimitry Andric const auto &Edge = *EI; 1803*06c3fb27SDimitry Andric if (CheckEdges) 1804*06c3fb27SDimitry Andric checkEdge<DerivedCCG, FuncTy, CallTy>(Edge); 1805*06c3fb27SDimitry Andric set_union(CalleeEdgeContextIds, Edge->ContextIds); 1806*06c3fb27SDimitry Andric } 1807*06c3fb27SDimitry Andric assert(Node->ContextIds == CalleeEdgeContextIds); 1808*06c3fb27SDimitry Andric } 1809*06c3fb27SDimitry Andric } 1810*06c3fb27SDimitry Andric 1811*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1812*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const { 1813*06c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 1814*06c3fb27SDimitry Andric for (const auto Node : nodes<GraphType>(this)) { 1815*06c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false); 1816*06c3fb27SDimitry Andric for (auto &Edge : Node->CallerEdges) 1817*06c3fb27SDimitry Andric checkEdge<DerivedCCG, FuncTy, CallTy>(Edge); 1818*06c3fb27SDimitry Andric } 1819*06c3fb27SDimitry Andric } 1820*06c3fb27SDimitry Andric 1821*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1822*06c3fb27SDimitry Andric struct GraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> { 1823*06c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 1824*06c3fb27SDimitry Andric using NodeRef = const ContextNode<DerivedCCG, FuncTy, CallTy> *; 1825*06c3fb27SDimitry Andric 1826*06c3fb27SDimitry Andric using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>; 1827*06c3fb27SDimitry Andric static NodeRef getNode(const NodePtrTy &P) { return P.get(); } 1828*06c3fb27SDimitry Andric 1829*06c3fb27SDimitry Andric using nodes_iterator = 1830*06c3fb27SDimitry Andric mapped_iterator<typename std::vector<NodePtrTy>::const_iterator, 1831*06c3fb27SDimitry Andric decltype(&getNode)>; 1832*06c3fb27SDimitry Andric 1833*06c3fb27SDimitry Andric static nodes_iterator nodes_begin(GraphType G) { 1834*06c3fb27SDimitry Andric return nodes_iterator(G->NodeOwner.begin(), &getNode); 1835*06c3fb27SDimitry Andric } 1836*06c3fb27SDimitry Andric 1837*06c3fb27SDimitry Andric static nodes_iterator nodes_end(GraphType G) { 1838*06c3fb27SDimitry Andric return nodes_iterator(G->NodeOwner.end(), &getNode); 1839*06c3fb27SDimitry Andric } 1840*06c3fb27SDimitry Andric 1841*06c3fb27SDimitry Andric static NodeRef getEntryNode(GraphType G) { 1842*06c3fb27SDimitry Andric return G->NodeOwner.begin()->get(); 1843*06c3fb27SDimitry Andric } 1844*06c3fb27SDimitry Andric 1845*06c3fb27SDimitry Andric using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>; 1846*06c3fb27SDimitry Andric static const ContextNode<DerivedCCG, FuncTy, CallTy> * 1847*06c3fb27SDimitry Andric GetCallee(const EdgePtrTy &P) { 1848*06c3fb27SDimitry Andric return P->Callee; 1849*06c3fb27SDimitry Andric } 1850*06c3fb27SDimitry Andric 1851*06c3fb27SDimitry Andric using ChildIteratorType = 1852*06c3fb27SDimitry Andric mapped_iterator<typename std::vector<std::shared_ptr<ContextEdge< 1853*06c3fb27SDimitry Andric DerivedCCG, FuncTy, CallTy>>>::const_iterator, 1854*06c3fb27SDimitry Andric decltype(&GetCallee)>; 1855*06c3fb27SDimitry Andric 1856*06c3fb27SDimitry Andric static ChildIteratorType child_begin(NodeRef N) { 1857*06c3fb27SDimitry Andric return ChildIteratorType(N->CalleeEdges.begin(), &GetCallee); 1858*06c3fb27SDimitry Andric } 1859*06c3fb27SDimitry Andric 1860*06c3fb27SDimitry Andric static ChildIteratorType child_end(NodeRef N) { 1861*06c3fb27SDimitry Andric return ChildIteratorType(N->CalleeEdges.end(), &GetCallee); 1862*06c3fb27SDimitry Andric } 1863*06c3fb27SDimitry Andric }; 1864*06c3fb27SDimitry Andric 1865*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1866*06c3fb27SDimitry Andric struct DOTGraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> 1867*06c3fb27SDimitry Andric : public DefaultDOTGraphTraits { 1868*06c3fb27SDimitry Andric DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} 1869*06c3fb27SDimitry Andric 1870*06c3fb27SDimitry Andric using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; 1871*06c3fb27SDimitry Andric using GTraits = GraphTraits<GraphType>; 1872*06c3fb27SDimitry Andric using NodeRef = typename GTraits::NodeRef; 1873*06c3fb27SDimitry Andric using ChildIteratorType = typename GTraits::ChildIteratorType; 1874*06c3fb27SDimitry Andric 1875*06c3fb27SDimitry Andric static std::string getNodeLabel(NodeRef Node, GraphType G) { 1876*06c3fb27SDimitry Andric std::string LabelString = 1877*06c3fb27SDimitry Andric (Twine("OrigId: ") + (Node->IsAllocation ? "Alloc" : "") + 1878*06c3fb27SDimitry Andric Twine(Node->OrigStackOrAllocId)) 1879*06c3fb27SDimitry Andric .str(); 1880*06c3fb27SDimitry Andric LabelString += "\n"; 1881*06c3fb27SDimitry Andric if (Node->hasCall()) { 1882*06c3fb27SDimitry Andric auto Func = G->NodeToCallingFunc.find(Node); 1883*06c3fb27SDimitry Andric assert(Func != G->NodeToCallingFunc.end()); 1884*06c3fb27SDimitry Andric LabelString += 1885*06c3fb27SDimitry Andric G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo()); 1886*06c3fb27SDimitry Andric } else { 1887*06c3fb27SDimitry Andric LabelString += "null call"; 1888*06c3fb27SDimitry Andric if (Node->Recursive) 1889*06c3fb27SDimitry Andric LabelString += " (recursive)"; 1890*06c3fb27SDimitry Andric else 1891*06c3fb27SDimitry Andric LabelString += " (external)"; 1892*06c3fb27SDimitry Andric } 1893*06c3fb27SDimitry Andric return LabelString; 1894*06c3fb27SDimitry Andric } 1895*06c3fb27SDimitry Andric 1896*06c3fb27SDimitry Andric static std::string getNodeAttributes(NodeRef Node, GraphType) { 1897*06c3fb27SDimitry Andric std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " + 1898*06c3fb27SDimitry Andric getContextIds(Node->ContextIds) + "\"") 1899*06c3fb27SDimitry Andric .str(); 1900*06c3fb27SDimitry Andric AttributeString += 1901*06c3fb27SDimitry Andric (Twine(",fillcolor=\"") + getColor(Node->AllocTypes) + "\"").str(); 1902*06c3fb27SDimitry Andric AttributeString += ",style=\"filled\""; 1903*06c3fb27SDimitry Andric if (Node->CloneOf) { 1904*06c3fb27SDimitry Andric AttributeString += ",color=\"blue\""; 1905*06c3fb27SDimitry Andric AttributeString += ",style=\"filled,bold,dashed\""; 1906*06c3fb27SDimitry Andric } else 1907*06c3fb27SDimitry Andric AttributeString += ",style=\"filled\""; 1908*06c3fb27SDimitry Andric return AttributeString; 1909*06c3fb27SDimitry Andric } 1910*06c3fb27SDimitry Andric 1911*06c3fb27SDimitry Andric static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, 1912*06c3fb27SDimitry Andric GraphType) { 1913*06c3fb27SDimitry Andric auto &Edge = *(ChildIter.getCurrent()); 1914*06c3fb27SDimitry Andric return (Twine("tooltip=\"") + getContextIds(Edge->ContextIds) + "\"" + 1915*06c3fb27SDimitry Andric Twine(",fillcolor=\"") + getColor(Edge->AllocTypes) + "\"") 1916*06c3fb27SDimitry Andric .str(); 1917*06c3fb27SDimitry Andric } 1918*06c3fb27SDimitry Andric 1919*06c3fb27SDimitry Andric // Since the NodeOwners list includes nodes that are no longer connected to 1920*06c3fb27SDimitry Andric // the graph, skip them here. 1921*06c3fb27SDimitry Andric static bool isNodeHidden(NodeRef Node, GraphType) { 1922*06c3fb27SDimitry Andric return Node->isRemoved(); 1923*06c3fb27SDimitry Andric } 1924*06c3fb27SDimitry Andric 1925*06c3fb27SDimitry Andric private: 1926*06c3fb27SDimitry Andric static std::string getContextIds(const DenseSet<uint32_t> &ContextIds) { 1927*06c3fb27SDimitry Andric std::string IdString = "ContextIds:"; 1928*06c3fb27SDimitry Andric if (ContextIds.size() < 100) { 1929*06c3fb27SDimitry Andric std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end()); 1930*06c3fb27SDimitry Andric std::sort(SortedIds.begin(), SortedIds.end()); 1931*06c3fb27SDimitry Andric for (auto Id : SortedIds) 1932*06c3fb27SDimitry Andric IdString += (" " + Twine(Id)).str(); 1933*06c3fb27SDimitry Andric } else { 1934*06c3fb27SDimitry Andric IdString += (" (" + Twine(ContextIds.size()) + " ids)").str(); 1935*06c3fb27SDimitry Andric } 1936*06c3fb27SDimitry Andric return IdString; 1937*06c3fb27SDimitry Andric } 1938*06c3fb27SDimitry Andric 1939*06c3fb27SDimitry Andric static std::string getColor(uint8_t AllocTypes) { 1940*06c3fb27SDimitry Andric if (AllocTypes == (uint8_t)AllocationType::NotCold) 1941*06c3fb27SDimitry Andric // Color "brown1" actually looks like a lighter red. 1942*06c3fb27SDimitry Andric return "brown1"; 1943*06c3fb27SDimitry Andric if (AllocTypes == (uint8_t)AllocationType::Cold) 1944*06c3fb27SDimitry Andric return "cyan"; 1945*06c3fb27SDimitry Andric if (AllocTypes == 1946*06c3fb27SDimitry Andric ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold)) 1947*06c3fb27SDimitry Andric // Lighter purple. 1948*06c3fb27SDimitry Andric return "mediumorchid1"; 1949*06c3fb27SDimitry Andric return "gray"; 1950*06c3fb27SDimitry Andric } 1951*06c3fb27SDimitry Andric 1952*06c3fb27SDimitry Andric static std::string getNodeId(NodeRef Node) { 1953*06c3fb27SDimitry Andric std::stringstream SStream; 1954*06c3fb27SDimitry Andric SStream << std::hex << "N0x" << (unsigned long long)Node; 1955*06c3fb27SDimitry Andric std::string Result = SStream.str(); 1956*06c3fb27SDimitry Andric return Result; 1957*06c3fb27SDimitry Andric } 1958*06c3fb27SDimitry Andric }; 1959*06c3fb27SDimitry Andric 1960*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1961*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot( 1962*06c3fb27SDimitry Andric std::string Label) const { 1963*06c3fb27SDimitry Andric WriteGraph(this, "", false, Label, 1964*06c3fb27SDimitry Andric DotFilePathPrefix + "ccg." + Label + ".dot"); 1965*06c3fb27SDimitry Andric } 1966*06c3fb27SDimitry Andric 1967*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1968*06c3fb27SDimitry Andric typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * 1969*06c3fb27SDimitry Andric CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone( 1970*06c3fb27SDimitry Andric const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI) { 1971*06c3fb27SDimitry Andric ContextNode *Node = Edge->Callee; 1972*06c3fb27SDimitry Andric NodeOwner.push_back( 1973*06c3fb27SDimitry Andric std::make_unique<ContextNode>(Node->IsAllocation, Node->Call)); 1974*06c3fb27SDimitry Andric ContextNode *Clone = NodeOwner.back().get(); 1975*06c3fb27SDimitry Andric Node->addClone(Clone); 1976*06c3fb27SDimitry Andric assert(NodeToCallingFunc.count(Node)); 1977*06c3fb27SDimitry Andric NodeToCallingFunc[Clone] = NodeToCallingFunc[Node]; 1978*06c3fb27SDimitry Andric moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true); 1979*06c3fb27SDimitry Andric return Clone; 1980*06c3fb27SDimitry Andric } 1981*06c3fb27SDimitry Andric 1982*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 1983*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: 1984*06c3fb27SDimitry Andric moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge, 1985*06c3fb27SDimitry Andric ContextNode *NewCallee, EdgeIter *CallerEdgeI, 1986*06c3fb27SDimitry Andric bool NewClone) { 1987*06c3fb27SDimitry Andric // NewCallee and Edge's current callee must be clones of the same original 1988*06c3fb27SDimitry Andric // node (Edge's current callee may be the original node too). 1989*06c3fb27SDimitry Andric assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode()); 1990*06c3fb27SDimitry Andric auto &EdgeContextIds = Edge->getContextIds(); 1991*06c3fb27SDimitry Andric ContextNode *OldCallee = Edge->Callee; 1992*06c3fb27SDimitry Andric if (CallerEdgeI) 1993*06c3fb27SDimitry Andric *CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI); 1994*06c3fb27SDimitry Andric else 1995*06c3fb27SDimitry Andric OldCallee->eraseCallerEdge(Edge.get()); 1996*06c3fb27SDimitry Andric Edge->Callee = NewCallee; 1997*06c3fb27SDimitry Andric NewCallee->CallerEdges.push_back(Edge); 1998*06c3fb27SDimitry Andric // Don't need to update Edge's context ids since we are simply reconnecting 1999*06c3fb27SDimitry Andric // it. 2000*06c3fb27SDimitry Andric set_subtract(OldCallee->ContextIds, EdgeContextIds); 2001*06c3fb27SDimitry Andric NewCallee->ContextIds.insert(EdgeContextIds.begin(), EdgeContextIds.end()); 2002*06c3fb27SDimitry Andric NewCallee->AllocTypes |= Edge->AllocTypes; 2003*06c3fb27SDimitry Andric OldCallee->AllocTypes = computeAllocType(OldCallee->ContextIds); 2004*06c3fb27SDimitry Andric // OldCallee alloc type should be None iff its context id set is now empty. 2005*06c3fb27SDimitry Andric assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) == 2006*06c3fb27SDimitry Andric OldCallee->ContextIds.empty()); 2007*06c3fb27SDimitry Andric // Now walk the old callee node's callee edges and move Edge's context ids 2008*06c3fb27SDimitry Andric // over to the corresponding edge into the clone (which is created here if 2009*06c3fb27SDimitry Andric // this is a newly created clone). 2010*06c3fb27SDimitry Andric for (auto &OldCalleeEdge : OldCallee->CalleeEdges) { 2011*06c3fb27SDimitry Andric // The context ids moving to the new callee are the subset of this edge's 2012*06c3fb27SDimitry Andric // context ids and the context ids on the caller edge being moved. 2013*06c3fb27SDimitry Andric DenseSet<uint32_t> EdgeContextIdsToMove = 2014*06c3fb27SDimitry Andric set_intersection(OldCalleeEdge->getContextIds(), EdgeContextIds); 2015*06c3fb27SDimitry Andric set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove); 2016*06c3fb27SDimitry Andric OldCalleeEdge->AllocTypes = 2017*06c3fb27SDimitry Andric computeAllocType(OldCalleeEdge->getContextIds()); 2018*06c3fb27SDimitry Andric if (!NewClone) { 2019*06c3fb27SDimitry Andric // Update context ids / alloc type on corresponding edge to NewCallee. 2020*06c3fb27SDimitry Andric // There is a chance this may not exist if we are reusing an existing 2021*06c3fb27SDimitry Andric // clone, specifically during function assignment, where we would have 2022*06c3fb27SDimitry Andric // removed none type edges after creating the clone. If we can't find 2023*06c3fb27SDimitry Andric // a corresponding edge there, fall through to the cloning below. 2024*06c3fb27SDimitry Andric if (auto *NewCalleeEdge = 2025*06c3fb27SDimitry Andric NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) { 2026*06c3fb27SDimitry Andric NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.begin(), 2027*06c3fb27SDimitry Andric EdgeContextIdsToMove.end()); 2028*06c3fb27SDimitry Andric NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove); 2029*06c3fb27SDimitry Andric continue; 2030*06c3fb27SDimitry Andric } 2031*06c3fb27SDimitry Andric } 2032*06c3fb27SDimitry Andric auto NewEdge = std::make_shared<ContextEdge>( 2033*06c3fb27SDimitry Andric OldCalleeEdge->Callee, NewCallee, 2034*06c3fb27SDimitry Andric computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove); 2035*06c3fb27SDimitry Andric NewCallee->CalleeEdges.push_back(NewEdge); 2036*06c3fb27SDimitry Andric NewEdge->Callee->CallerEdges.push_back(NewEdge); 2037*06c3fb27SDimitry Andric } 2038*06c3fb27SDimitry Andric if (VerifyCCG) { 2039*06c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee, /*CheckEdges=*/false); 2040*06c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee, /*CheckEdges=*/false); 2041*06c3fb27SDimitry Andric for (const auto &OldCalleeEdge : OldCallee->CalleeEdges) 2042*06c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee, 2043*06c3fb27SDimitry Andric /*CheckEdges=*/false); 2044*06c3fb27SDimitry Andric for (const auto &NewCalleeEdge : NewCallee->CalleeEdges) 2045*06c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee, 2046*06c3fb27SDimitry Andric /*CheckEdges=*/false); 2047*06c3fb27SDimitry Andric } 2048*06c3fb27SDimitry Andric } 2049*06c3fb27SDimitry Andric 2050*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 2051*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() { 2052*06c3fb27SDimitry Andric DenseSet<const ContextNode *> Visited; 2053*06c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) 2054*06c3fb27SDimitry Andric identifyClones(Entry.second, Visited); 2055*06c3fb27SDimitry Andric } 2056*06c3fb27SDimitry Andric 2057*06c3fb27SDimitry Andric // helper function to check an AllocType is cold or notcold or both. 2058*06c3fb27SDimitry Andric bool checkColdOrNotCold(uint8_t AllocType) { 2059*06c3fb27SDimitry Andric return (AllocType == (uint8_t)AllocationType::Cold) || 2060*06c3fb27SDimitry Andric (AllocType == (uint8_t)AllocationType::NotCold) || 2061*06c3fb27SDimitry Andric (AllocType == 2062*06c3fb27SDimitry Andric ((uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold)); 2063*06c3fb27SDimitry Andric } 2064*06c3fb27SDimitry Andric 2065*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 2066*06c3fb27SDimitry Andric void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones( 2067*06c3fb27SDimitry Andric ContextNode *Node, DenseSet<const ContextNode *> &Visited) { 2068*06c3fb27SDimitry Andric if (VerifyNodes) 2069*06c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node); 2070*06c3fb27SDimitry Andric assert(!Node->CloneOf); 2071*06c3fb27SDimitry Andric 2072*06c3fb27SDimitry Andric // If Node as a null call, then either it wasn't found in the module (regular 2073*06c3fb27SDimitry Andric // LTO) or summary index (ThinLTO), or there were other conditions blocking 2074*06c3fb27SDimitry Andric // cloning (e.g. recursion, calls multiple targets, etc). 2075*06c3fb27SDimitry Andric // Do this here so that we don't try to recursively clone callers below, which 2076*06c3fb27SDimitry Andric // isn't useful at least for this node. 2077*06c3fb27SDimitry Andric if (!Node->hasCall()) 2078*06c3fb27SDimitry Andric return; 2079*06c3fb27SDimitry Andric 2080*06c3fb27SDimitry Andric #ifndef NDEBUG 2081*06c3fb27SDimitry Andric auto Insert = 2082*06c3fb27SDimitry Andric #endif 2083*06c3fb27SDimitry Andric Visited.insert(Node); 2084*06c3fb27SDimitry Andric // We should not have visited this node yet. 2085*06c3fb27SDimitry Andric assert(Insert.second); 2086*06c3fb27SDimitry Andric // The recursive call to identifyClones may delete the current edge from the 2087*06c3fb27SDimitry Andric // CallerEdges vector. Make a copy and iterate on that, simpler than passing 2088*06c3fb27SDimitry Andric // in an iterator and having recursive call erase from it. Other edges may 2089*06c3fb27SDimitry Andric // also get removed during the recursion, which will have null Callee and 2090*06c3fb27SDimitry Andric // Caller pointers (and are deleted later), so we skip those below. 2091*06c3fb27SDimitry Andric { 2092*06c3fb27SDimitry Andric auto CallerEdges = Node->CallerEdges; 2093*06c3fb27SDimitry Andric for (auto &Edge : CallerEdges) { 2094*06c3fb27SDimitry Andric // Skip any that have been removed by an earlier recursive call. 2095*06c3fb27SDimitry Andric if (Edge->Callee == nullptr && Edge->Caller == nullptr) { 2096*06c3fb27SDimitry Andric assert(!std::count(Node->CallerEdges.begin(), Node->CallerEdges.end(), 2097*06c3fb27SDimitry Andric Edge)); 2098*06c3fb27SDimitry Andric continue; 2099*06c3fb27SDimitry Andric } 2100*06c3fb27SDimitry Andric // Ignore any caller we previously visited via another edge. 2101*06c3fb27SDimitry Andric if (!Visited.count(Edge->Caller) && !Edge->Caller->CloneOf) { 2102*06c3fb27SDimitry Andric identifyClones(Edge->Caller, Visited); 2103*06c3fb27SDimitry Andric } 2104*06c3fb27SDimitry Andric } 2105*06c3fb27SDimitry Andric } 2106*06c3fb27SDimitry Andric 2107*06c3fb27SDimitry Andric // Check if we reached an unambiguous call or have have only a single caller. 2108*06c3fb27SDimitry Andric if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1) 2109*06c3fb27SDimitry Andric return; 2110*06c3fb27SDimitry Andric 2111*06c3fb27SDimitry Andric // We need to clone. 2112*06c3fb27SDimitry Andric 2113*06c3fb27SDimitry Andric // Try to keep the original version as alloc type NotCold. This will make 2114*06c3fb27SDimitry Andric // cases with indirect calls or any other situation with an unknown call to 2115*06c3fb27SDimitry Andric // the original function get the default behavior. We do this by sorting the 2116*06c3fb27SDimitry Andric // CallerEdges of the Node we will clone by alloc type. 2117*06c3fb27SDimitry Andric // 2118*06c3fb27SDimitry Andric // Give NotCold edge the lowest sort priority so those edges are at the end of 2119*06c3fb27SDimitry Andric // the caller edges vector, and stay on the original version (since the below 2120*06c3fb27SDimitry Andric // code clones greedily until it finds all remaining edges have the same type 2121*06c3fb27SDimitry Andric // and leaves the remaining ones on the original Node). 2122*06c3fb27SDimitry Andric // 2123*06c3fb27SDimitry Andric // We shouldn't actually have any None type edges, so the sorting priority for 2124*06c3fb27SDimitry Andric // that is arbitrary, and we assert in that case below. 2125*06c3fb27SDimitry Andric const unsigned AllocTypeCloningPriority[] = {/*None*/ 3, /*NotCold*/ 4, 2126*06c3fb27SDimitry Andric /*Cold*/ 1, 2127*06c3fb27SDimitry Andric /*NotColdCold*/ 2}; 2128*06c3fb27SDimitry Andric std::stable_sort(Node->CallerEdges.begin(), Node->CallerEdges.end(), 2129*06c3fb27SDimitry Andric [&](const std::shared_ptr<ContextEdge> &A, 2130*06c3fb27SDimitry Andric const std::shared_ptr<ContextEdge> &B) { 2131*06c3fb27SDimitry Andric assert(checkColdOrNotCold(A->AllocTypes) && 2132*06c3fb27SDimitry Andric checkColdOrNotCold(B->AllocTypes)); 2133*06c3fb27SDimitry Andric 2134*06c3fb27SDimitry Andric if (A->AllocTypes == B->AllocTypes) 2135*06c3fb27SDimitry Andric // Use the first context id for each edge as a 2136*06c3fb27SDimitry Andric // tie-breaker. 2137*06c3fb27SDimitry Andric return *A->ContextIds.begin() < *B->ContextIds.begin(); 2138*06c3fb27SDimitry Andric return AllocTypeCloningPriority[A->AllocTypes] < 2139*06c3fb27SDimitry Andric AllocTypeCloningPriority[B->AllocTypes]; 2140*06c3fb27SDimitry Andric }); 2141*06c3fb27SDimitry Andric 2142*06c3fb27SDimitry Andric assert(Node->AllocTypes != (uint8_t)AllocationType::None); 2143*06c3fb27SDimitry Andric 2144*06c3fb27SDimitry Andric // Iterate until we find no more opportunities for disambiguating the alloc 2145*06c3fb27SDimitry Andric // types via cloning. In most cases this loop will terminate once the Node 2146*06c3fb27SDimitry Andric // has a single allocation type, in which case no more cloning is needed. 2147*06c3fb27SDimitry Andric // We need to be able to remove Edge from CallerEdges, so need to adjust 2148*06c3fb27SDimitry Andric // iterator inside the loop. 2149*06c3fb27SDimitry Andric for (auto EI = Node->CallerEdges.begin(); EI != Node->CallerEdges.end();) { 2150*06c3fb27SDimitry Andric auto CallerEdge = *EI; 2151*06c3fb27SDimitry Andric 2152*06c3fb27SDimitry Andric // See if cloning the prior caller edge left this node with a single alloc 2153*06c3fb27SDimitry Andric // type or a single caller. In that case no more cloning of Node is needed. 2154*06c3fb27SDimitry Andric if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1) 2155*06c3fb27SDimitry Andric break; 2156*06c3fb27SDimitry Andric 2157*06c3fb27SDimitry Andric // Compute the node callee edge alloc types corresponding to the context ids 2158*06c3fb27SDimitry Andric // for this caller edge. 2159*06c3fb27SDimitry Andric std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge; 2160*06c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge.reserve(Node->CalleeEdges.size()); 2161*06c3fb27SDimitry Andric for (auto &CalleeEdge : Node->CalleeEdges) 2162*06c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes( 2163*06c3fb27SDimitry Andric CalleeEdge->getContextIds(), CallerEdge->getContextIds())); 2164*06c3fb27SDimitry Andric 2165*06c3fb27SDimitry Andric // Don't clone if doing so will not disambiguate any alloc types amongst 2166*06c3fb27SDimitry Andric // caller edges (including the callee edges that would be cloned). 2167*06c3fb27SDimitry Andric // Otherwise we will simply move all edges to the clone. 2168*06c3fb27SDimitry Andric // 2169*06c3fb27SDimitry Andric // First check if by cloning we will disambiguate the caller allocation 2170*06c3fb27SDimitry Andric // type from node's allocation type. Query allocTypeToUse so that we don't 2171*06c3fb27SDimitry Andric // bother cloning to distinguish NotCold+Cold from NotCold. Note that 2172*06c3fb27SDimitry Andric // neither of these should be None type. 2173*06c3fb27SDimitry Andric // 2174*06c3fb27SDimitry Andric // Then check if by cloning node at least one of the callee edges will be 2175*06c3fb27SDimitry Andric // disambiguated by splitting out different context ids. 2176*06c3fb27SDimitry Andric assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None); 2177*06c3fb27SDimitry Andric assert(Node->AllocTypes != (uint8_t)AllocationType::None); 2178*06c3fb27SDimitry Andric if (allocTypeToUse(CallerEdge->AllocTypes) == 2179*06c3fb27SDimitry Andric allocTypeToUse(Node->AllocTypes) && 2180*06c3fb27SDimitry Andric allocTypesMatch<DerivedCCG, FuncTy, CallTy>( 2181*06c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) { 2182*06c3fb27SDimitry Andric ++EI; 2183*06c3fb27SDimitry Andric continue; 2184*06c3fb27SDimitry Andric } 2185*06c3fb27SDimitry Andric 2186*06c3fb27SDimitry Andric // First see if we can use an existing clone. Check each clone and its 2187*06c3fb27SDimitry Andric // callee edges for matching alloc types. 2188*06c3fb27SDimitry Andric ContextNode *Clone = nullptr; 2189*06c3fb27SDimitry Andric for (auto *CurClone : Node->Clones) { 2190*06c3fb27SDimitry Andric if (allocTypeToUse(CurClone->AllocTypes) != 2191*06c3fb27SDimitry Andric allocTypeToUse(CallerEdge->AllocTypes)) 2192*06c3fb27SDimitry Andric continue; 2193*06c3fb27SDimitry Andric 2194*06c3fb27SDimitry Andric if (!allocTypesMatch<DerivedCCG, FuncTy, CallTy>( 2195*06c3fb27SDimitry Andric CalleeEdgeAllocTypesForCallerEdge, CurClone->CalleeEdges)) 2196*06c3fb27SDimitry Andric continue; 2197*06c3fb27SDimitry Andric Clone = CurClone; 2198*06c3fb27SDimitry Andric break; 2199*06c3fb27SDimitry Andric } 2200*06c3fb27SDimitry Andric 2201*06c3fb27SDimitry Andric // The edge iterator is adjusted when we move the CallerEdge to the clone. 2202*06c3fb27SDimitry Andric if (Clone) 2203*06c3fb27SDimitry Andric moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI); 2204*06c3fb27SDimitry Andric else 2205*06c3fb27SDimitry Andric Clone = moveEdgeToNewCalleeClone(CallerEdge, &EI); 2206*06c3fb27SDimitry Andric 2207*06c3fb27SDimitry Andric assert(EI == Node->CallerEdges.end() || 2208*06c3fb27SDimitry Andric Node->AllocTypes != (uint8_t)AllocationType::None); 2209*06c3fb27SDimitry Andric // Sanity check that no alloc types on clone or its edges are None. 2210*06c3fb27SDimitry Andric assert(Clone->AllocTypes != (uint8_t)AllocationType::None); 2211*06c3fb27SDimitry Andric assert(llvm::none_of( 2212*06c3fb27SDimitry Andric Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) { 2213*06c3fb27SDimitry Andric return E->AllocTypes == (uint8_t)AllocationType::None; 2214*06c3fb27SDimitry Andric })); 2215*06c3fb27SDimitry Andric } 2216*06c3fb27SDimitry Andric 2217*06c3fb27SDimitry Andric // Cloning may have resulted in some cloned callee edges with type None, 2218*06c3fb27SDimitry Andric // because they aren't carrying any contexts. Remove those edges. 2219*06c3fb27SDimitry Andric for (auto *Clone : Node->Clones) { 2220*06c3fb27SDimitry Andric removeNoneTypeCalleeEdges(Clone); 2221*06c3fb27SDimitry Andric if (VerifyNodes) 2222*06c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Clone); 2223*06c3fb27SDimitry Andric } 2224*06c3fb27SDimitry Andric // We should still have some context ids on the original Node. 2225*06c3fb27SDimitry Andric assert(!Node->ContextIds.empty()); 2226*06c3fb27SDimitry Andric 2227*06c3fb27SDimitry Andric // Remove any callee edges that ended up with alloc type None after creating 2228*06c3fb27SDimitry Andric // clones and updating callee edges. 2229*06c3fb27SDimitry Andric removeNoneTypeCalleeEdges(Node); 2230*06c3fb27SDimitry Andric 2231*06c3fb27SDimitry Andric // Sanity check that no alloc types on node or edges are None. 2232*06c3fb27SDimitry Andric assert(Node->AllocTypes != (uint8_t)AllocationType::None); 2233*06c3fb27SDimitry Andric assert(llvm::none_of(Node->CalleeEdges, 2234*06c3fb27SDimitry Andric [&](const std::shared_ptr<ContextEdge> &E) { 2235*06c3fb27SDimitry Andric return E->AllocTypes == (uint8_t)AllocationType::None; 2236*06c3fb27SDimitry Andric })); 2237*06c3fb27SDimitry Andric assert(llvm::none_of(Node->CallerEdges, 2238*06c3fb27SDimitry Andric [&](const std::shared_ptr<ContextEdge> &E) { 2239*06c3fb27SDimitry Andric return E->AllocTypes == (uint8_t)AllocationType::None; 2240*06c3fb27SDimitry Andric })); 2241*06c3fb27SDimitry Andric 2242*06c3fb27SDimitry Andric if (VerifyNodes) 2243*06c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node); 2244*06c3fb27SDimitry Andric } 2245*06c3fb27SDimitry Andric 2246*06c3fb27SDimitry Andric void ModuleCallsiteContextGraph::updateAllocationCall( 2247*06c3fb27SDimitry Andric CallInfo &Call, AllocationType AllocType) { 2248*06c3fb27SDimitry Andric std::string AllocTypeString = getAllocTypeAttributeString(AllocType); 2249*06c3fb27SDimitry Andric auto A = llvm::Attribute::get(Call.call()->getFunction()->getContext(), 2250*06c3fb27SDimitry Andric "memprof", AllocTypeString); 2251*06c3fb27SDimitry Andric cast<CallBase>(Call.call())->addFnAttr(A); 2252*06c3fb27SDimitry Andric OREGetter(Call.call()->getFunction()) 2253*06c3fb27SDimitry Andric .emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", Call.call()) 2254*06c3fb27SDimitry Andric << ore::NV("AllocationCall", Call.call()) << " in clone " 2255*06c3fb27SDimitry Andric << ore::NV("Caller", Call.call()->getFunction()) 2256*06c3fb27SDimitry Andric << " marked with memprof allocation attribute " 2257*06c3fb27SDimitry Andric << ore::NV("Attribute", AllocTypeString)); 2258*06c3fb27SDimitry Andric } 2259*06c3fb27SDimitry Andric 2260*06c3fb27SDimitry Andric void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &Call, 2261*06c3fb27SDimitry Andric AllocationType AllocType) { 2262*06c3fb27SDimitry Andric auto *AI = Call.call().dyn_cast<AllocInfo *>(); 2263*06c3fb27SDimitry Andric assert(AI); 2264*06c3fb27SDimitry Andric assert(AI->Versions.size() > Call.cloneNo()); 2265*06c3fb27SDimitry Andric AI->Versions[Call.cloneNo()] = (uint8_t)AllocType; 2266*06c3fb27SDimitry Andric } 2267*06c3fb27SDimitry Andric 2268*06c3fb27SDimitry Andric void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall, 2269*06c3fb27SDimitry Andric FuncInfo CalleeFunc) { 2270*06c3fb27SDimitry Andric if (CalleeFunc.cloneNo() > 0) 2271*06c3fb27SDimitry Andric cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func()); 2272*06c3fb27SDimitry Andric OREGetter(CallerCall.call()->getFunction()) 2273*06c3fb27SDimitry Andric .emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call()) 2274*06c3fb27SDimitry Andric << ore::NV("Call", CallerCall.call()) << " in clone " 2275*06c3fb27SDimitry Andric << ore::NV("Caller", CallerCall.call()->getFunction()) 2276*06c3fb27SDimitry Andric << " assigned to call function clone " 2277*06c3fb27SDimitry Andric << ore::NV("Callee", CalleeFunc.func())); 2278*06c3fb27SDimitry Andric } 2279*06c3fb27SDimitry Andric 2280*06c3fb27SDimitry Andric void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall, 2281*06c3fb27SDimitry Andric FuncInfo CalleeFunc) { 2282*06c3fb27SDimitry Andric auto *CI = CallerCall.call().dyn_cast<CallsiteInfo *>(); 2283*06c3fb27SDimitry Andric assert(CI && 2284*06c3fb27SDimitry Andric "Caller cannot be an allocation which should not have profiled calls"); 2285*06c3fb27SDimitry Andric assert(CI->Clones.size() > CallerCall.cloneNo()); 2286*06c3fb27SDimitry Andric CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo(); 2287*06c3fb27SDimitry Andric } 2288*06c3fb27SDimitry Andric 2289*06c3fb27SDimitry Andric CallsiteContextGraph<ModuleCallsiteContextGraph, Function, 2290*06c3fb27SDimitry Andric Instruction *>::FuncInfo 2291*06c3fb27SDimitry Andric ModuleCallsiteContextGraph::cloneFunctionForCallsite( 2292*06c3fb27SDimitry Andric FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap, 2293*06c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) { 2294*06c3fb27SDimitry Andric // Use existing LLVM facilities for cloning and obtaining Call in clone 2295*06c3fb27SDimitry Andric ValueToValueMapTy VMap; 2296*06c3fb27SDimitry Andric auto *NewFunc = CloneFunction(Func.func(), VMap); 2297*06c3fb27SDimitry Andric std::string Name = getMemProfFuncName(Func.func()->getName(), CloneNo); 2298*06c3fb27SDimitry Andric assert(!Func.func()->getParent()->getFunction(Name)); 2299*06c3fb27SDimitry Andric NewFunc->setName(Name); 2300*06c3fb27SDimitry Andric for (auto &Inst : CallsWithMetadataInFunc) { 2301*06c3fb27SDimitry Andric // This map always has the initial version in it. 2302*06c3fb27SDimitry Andric assert(Inst.cloneNo() == 0); 2303*06c3fb27SDimitry Andric CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo}; 2304*06c3fb27SDimitry Andric } 2305*06c3fb27SDimitry Andric OREGetter(Func.func()) 2306*06c3fb27SDimitry Andric .emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", Func.func()) 2307*06c3fb27SDimitry Andric << "created clone " << ore::NV("NewFunction", NewFunc)); 2308*06c3fb27SDimitry Andric return {NewFunc, CloneNo}; 2309*06c3fb27SDimitry Andric } 2310*06c3fb27SDimitry Andric 2311*06c3fb27SDimitry Andric CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, 2312*06c3fb27SDimitry Andric IndexCall>::FuncInfo 2313*06c3fb27SDimitry Andric IndexCallsiteContextGraph::cloneFunctionForCallsite( 2314*06c3fb27SDimitry Andric FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap, 2315*06c3fb27SDimitry Andric std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) { 2316*06c3fb27SDimitry Andric // Check how many clones we have of Call (and therefore function). 2317*06c3fb27SDimitry Andric // The next clone number is the current size of versions array. 2318*06c3fb27SDimitry Andric // Confirm this matches the CloneNo provided by the caller, which is based on 2319*06c3fb27SDimitry Andric // the number of function clones we have. 2320*06c3fb27SDimitry Andric assert(CloneNo == 2321*06c3fb27SDimitry Andric (Call.call().is<AllocInfo *>() 2322*06c3fb27SDimitry Andric ? Call.call().dyn_cast<AllocInfo *>()->Versions.size() 2323*06c3fb27SDimitry Andric : Call.call().dyn_cast<CallsiteInfo *>()->Clones.size())); 2324*06c3fb27SDimitry Andric // Walk all the instructions in this function. Create a new version for 2325*06c3fb27SDimitry Andric // each (by adding an entry to the Versions/Clones summary array), and copy 2326*06c3fb27SDimitry Andric // over the version being called for the function clone being cloned here. 2327*06c3fb27SDimitry Andric // Additionally, add an entry to the CallMap for the new function clone, 2328*06c3fb27SDimitry Andric // mapping the original call (clone 0, what is in CallsWithMetadataInFunc) 2329*06c3fb27SDimitry Andric // to the new call clone. 2330*06c3fb27SDimitry Andric for (auto &Inst : CallsWithMetadataInFunc) { 2331*06c3fb27SDimitry Andric // This map always has the initial version in it. 2332*06c3fb27SDimitry Andric assert(Inst.cloneNo() == 0); 2333*06c3fb27SDimitry Andric if (auto *AI = Inst.call().dyn_cast<AllocInfo *>()) { 2334*06c3fb27SDimitry Andric assert(AI->Versions.size() == CloneNo); 2335*06c3fb27SDimitry Andric // We assign the allocation type later (in updateAllocationCall), just add 2336*06c3fb27SDimitry Andric // an entry for it here. 2337*06c3fb27SDimitry Andric AI->Versions.push_back(0); 2338*06c3fb27SDimitry Andric } else { 2339*06c3fb27SDimitry Andric auto *CI = Inst.call().dyn_cast<CallsiteInfo *>(); 2340*06c3fb27SDimitry Andric assert(CI && CI->Clones.size() == CloneNo); 2341*06c3fb27SDimitry Andric // We assign the clone number later (in updateCall), just add an entry for 2342*06c3fb27SDimitry Andric // it here. 2343*06c3fb27SDimitry Andric CI->Clones.push_back(0); 2344*06c3fb27SDimitry Andric } 2345*06c3fb27SDimitry Andric CallMap[Inst] = {Inst.call(), CloneNo}; 2346*06c3fb27SDimitry Andric } 2347*06c3fb27SDimitry Andric return {Func.func(), CloneNo}; 2348*06c3fb27SDimitry Andric } 2349*06c3fb27SDimitry Andric 2350*06c3fb27SDimitry Andric // This method assigns cloned callsites to functions, cloning the functions as 2351*06c3fb27SDimitry Andric // needed. The assignment is greedy and proceeds roughly as follows: 2352*06c3fb27SDimitry Andric // 2353*06c3fb27SDimitry Andric // For each function Func: 2354*06c3fb27SDimitry Andric // For each call with graph Node having clones: 2355*06c3fb27SDimitry Andric // Initialize ClonesWorklist to Node and its clones 2356*06c3fb27SDimitry Andric // Initialize NodeCloneCount to 0 2357*06c3fb27SDimitry Andric // While ClonesWorklist is not empty: 2358*06c3fb27SDimitry Andric // Clone = pop front ClonesWorklist 2359*06c3fb27SDimitry Andric // NodeCloneCount++ 2360*06c3fb27SDimitry Andric // If Func has been cloned less than NodeCloneCount times: 2361*06c3fb27SDimitry Andric // If NodeCloneCount is 1: 2362*06c3fb27SDimitry Andric // Assign Clone to original Func 2363*06c3fb27SDimitry Andric // Continue 2364*06c3fb27SDimitry Andric // Create a new function clone 2365*06c3fb27SDimitry Andric // If other callers not assigned to call a function clone yet: 2366*06c3fb27SDimitry Andric // Assign them to call new function clone 2367*06c3fb27SDimitry Andric // Continue 2368*06c3fb27SDimitry Andric // Assign any other caller calling the cloned version to new clone 2369*06c3fb27SDimitry Andric // 2370*06c3fb27SDimitry Andric // For each caller of Clone: 2371*06c3fb27SDimitry Andric // If caller is assigned to call a specific function clone: 2372*06c3fb27SDimitry Andric // If we cannot assign Clone to that function clone: 2373*06c3fb27SDimitry Andric // Create new callsite Clone NewClone 2374*06c3fb27SDimitry Andric // Add NewClone to ClonesWorklist 2375*06c3fb27SDimitry Andric // Continue 2376*06c3fb27SDimitry Andric // Assign Clone to existing caller's called function clone 2377*06c3fb27SDimitry Andric // Else: 2378*06c3fb27SDimitry Andric // If Clone not already assigned to a function clone: 2379*06c3fb27SDimitry Andric // Assign to first function clone without assignment 2380*06c3fb27SDimitry Andric // Assign caller to selected function clone 2381*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 2382*06c3fb27SDimitry Andric bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { 2383*06c3fb27SDimitry Andric bool Changed = false; 2384*06c3fb27SDimitry Andric 2385*06c3fb27SDimitry Andric // Keep track of the assignment of nodes (callsites) to function clones they 2386*06c3fb27SDimitry Andric // call. 2387*06c3fb27SDimitry Andric DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap; 2388*06c3fb27SDimitry Andric 2389*06c3fb27SDimitry Andric // Update caller node to call function version CalleeFunc, by recording the 2390*06c3fb27SDimitry Andric // assignment in CallsiteToCalleeFuncCloneMap. 2391*06c3fb27SDimitry Andric auto RecordCalleeFuncOfCallsite = [&](ContextNode *Caller, 2392*06c3fb27SDimitry Andric const FuncInfo &CalleeFunc) { 2393*06c3fb27SDimitry Andric assert(Caller->hasCall()); 2394*06c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[Caller] = CalleeFunc; 2395*06c3fb27SDimitry Andric }; 2396*06c3fb27SDimitry Andric 2397*06c3fb27SDimitry Andric // Walk all functions for which we saw calls with memprof metadata, and handle 2398*06c3fb27SDimitry Andric // cloning for each of its calls. 2399*06c3fb27SDimitry Andric for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) { 2400*06c3fb27SDimitry Andric FuncInfo OrigFunc(Func); 2401*06c3fb27SDimitry Andric // Map from each clone of OrigFunc to a map of remappings of each call of 2402*06c3fb27SDimitry Andric // interest (from original uncloned call to the corresponding cloned call in 2403*06c3fb27SDimitry Andric // that function clone). 2404*06c3fb27SDimitry Andric std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap; 2405*06c3fb27SDimitry Andric for (auto &Call : CallsWithMetadata) { 2406*06c3fb27SDimitry Andric ContextNode *Node = getNodeForInst(Call); 2407*06c3fb27SDimitry Andric // Skip call if we do not have a node for it (all uses of its stack ids 2408*06c3fb27SDimitry Andric // were either on inlined chains or pruned from the MIBs), or if we did 2409*06c3fb27SDimitry Andric // not create any clones for it. 2410*06c3fb27SDimitry Andric if (!Node || Node->Clones.empty()) 2411*06c3fb27SDimitry Andric continue; 2412*06c3fb27SDimitry Andric assert(Node->hasCall() && 2413*06c3fb27SDimitry Andric "Not having a call should have prevented cloning"); 2414*06c3fb27SDimitry Andric 2415*06c3fb27SDimitry Andric // Track the assignment of function clones to clones of the current 2416*06c3fb27SDimitry Andric // callsite Node being handled. 2417*06c3fb27SDimitry Andric std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap; 2418*06c3fb27SDimitry Andric 2419*06c3fb27SDimitry Andric // Assign callsite version CallsiteClone to function version FuncClone, 2420*06c3fb27SDimitry Andric // and also assign (possibly cloned) Call to CallsiteClone. 2421*06c3fb27SDimitry Andric auto AssignCallsiteCloneToFuncClone = [&](const FuncInfo &FuncClone, 2422*06c3fb27SDimitry Andric CallInfo &Call, 2423*06c3fb27SDimitry Andric ContextNode *CallsiteClone, 2424*06c3fb27SDimitry Andric bool IsAlloc) { 2425*06c3fb27SDimitry Andric // Record the clone of callsite node assigned to this function clone. 2426*06c3fb27SDimitry Andric FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone; 2427*06c3fb27SDimitry Andric 2428*06c3fb27SDimitry Andric assert(FuncClonesToCallMap.count(FuncClone)); 2429*06c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone]; 2430*06c3fb27SDimitry Andric CallInfo CallClone(Call); 2431*06c3fb27SDimitry Andric if (CallMap.count(Call)) 2432*06c3fb27SDimitry Andric CallClone = CallMap[Call]; 2433*06c3fb27SDimitry Andric CallsiteClone->setCall(CallClone); 2434*06c3fb27SDimitry Andric }; 2435*06c3fb27SDimitry Andric 2436*06c3fb27SDimitry Andric // Keep track of the clones of callsite Node that need to be assigned to 2437*06c3fb27SDimitry Andric // function clones. This list may be expanded in the loop body below if we 2438*06c3fb27SDimitry Andric // find additional cloning is required. 2439*06c3fb27SDimitry Andric std::deque<ContextNode *> ClonesWorklist; 2440*06c3fb27SDimitry Andric // Ignore original Node if we moved all of its contexts to clones. 2441*06c3fb27SDimitry Andric if (!Node->ContextIds.empty()) 2442*06c3fb27SDimitry Andric ClonesWorklist.push_back(Node); 2443*06c3fb27SDimitry Andric ClonesWorklist.insert(ClonesWorklist.end(), Node->Clones.begin(), 2444*06c3fb27SDimitry Andric Node->Clones.end()); 2445*06c3fb27SDimitry Andric 2446*06c3fb27SDimitry Andric // Now walk through all of the clones of this callsite Node that we need, 2447*06c3fb27SDimitry Andric // and determine the assignment to a corresponding clone of the current 2448*06c3fb27SDimitry Andric // function (creating new function clones as needed). 2449*06c3fb27SDimitry Andric unsigned NodeCloneCount = 0; 2450*06c3fb27SDimitry Andric while (!ClonesWorklist.empty()) { 2451*06c3fb27SDimitry Andric ContextNode *Clone = ClonesWorklist.front(); 2452*06c3fb27SDimitry Andric ClonesWorklist.pop_front(); 2453*06c3fb27SDimitry Andric NodeCloneCount++; 2454*06c3fb27SDimitry Andric if (VerifyNodes) 2455*06c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Clone); 2456*06c3fb27SDimitry Andric 2457*06c3fb27SDimitry Andric // Need to create a new function clone if we have more callsite clones 2458*06c3fb27SDimitry Andric // than existing function clones, which would have been assigned to an 2459*06c3fb27SDimitry Andric // earlier clone in the list (we assign callsite clones to function 2460*06c3fb27SDimitry Andric // clones greedily). 2461*06c3fb27SDimitry Andric if (FuncClonesToCallMap.size() < NodeCloneCount) { 2462*06c3fb27SDimitry Andric // If this is the first callsite copy, assign to original function. 2463*06c3fb27SDimitry Andric if (NodeCloneCount == 1) { 2464*06c3fb27SDimitry Andric // Since FuncClonesToCallMap is empty in this case, no clones have 2465*06c3fb27SDimitry Andric // been created for this function yet, and no callers should have 2466*06c3fb27SDimitry Andric // been assigned a function clone for this callee node yet. 2467*06c3fb27SDimitry Andric assert(llvm::none_of( 2468*06c3fb27SDimitry Andric Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) { 2469*06c3fb27SDimitry Andric return CallsiteToCalleeFuncCloneMap.count(E->Caller); 2470*06c3fb27SDimitry Andric })); 2471*06c3fb27SDimitry Andric // Initialize with empty call map, assign Clone to original function 2472*06c3fb27SDimitry Andric // and its callers, and skip to the next clone. 2473*06c3fb27SDimitry Andric FuncClonesToCallMap[OrigFunc] = {}; 2474*06c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 2475*06c3fb27SDimitry Andric OrigFunc, Call, Clone, 2476*06c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 2477*06c3fb27SDimitry Andric for (auto &CE : Clone->CallerEdges) { 2478*06c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 2479*06c3fb27SDimitry Andric if (!CE->Caller->hasCall()) 2480*06c3fb27SDimitry Andric continue; 2481*06c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(CE->Caller, OrigFunc); 2482*06c3fb27SDimitry Andric } 2483*06c3fb27SDimitry Andric continue; 2484*06c3fb27SDimitry Andric } 2485*06c3fb27SDimitry Andric 2486*06c3fb27SDimitry Andric // First locate which copy of OrigFunc to clone again. If a caller 2487*06c3fb27SDimitry Andric // of this callsite clone was already assigned to call a particular 2488*06c3fb27SDimitry Andric // function clone, we need to redirect all of those callers to the 2489*06c3fb27SDimitry Andric // new function clone, and update their other callees within this 2490*06c3fb27SDimitry Andric // function. 2491*06c3fb27SDimitry Andric FuncInfo PreviousAssignedFuncClone; 2492*06c3fb27SDimitry Andric auto EI = llvm::find_if( 2493*06c3fb27SDimitry Andric Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) { 2494*06c3fb27SDimitry Andric return CallsiteToCalleeFuncCloneMap.count(E->Caller); 2495*06c3fb27SDimitry Andric }); 2496*06c3fb27SDimitry Andric bool CallerAssignedToCloneOfFunc = false; 2497*06c3fb27SDimitry Andric if (EI != Clone->CallerEdges.end()) { 2498*06c3fb27SDimitry Andric const std::shared_ptr<ContextEdge> &Edge = *EI; 2499*06c3fb27SDimitry Andric PreviousAssignedFuncClone = 2500*06c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[Edge->Caller]; 2501*06c3fb27SDimitry Andric CallerAssignedToCloneOfFunc = true; 2502*06c3fb27SDimitry Andric } 2503*06c3fb27SDimitry Andric 2504*06c3fb27SDimitry Andric // Clone function and save it along with the CallInfo map created 2505*06c3fb27SDimitry Andric // during cloning in the FuncClonesToCallMap. 2506*06c3fb27SDimitry Andric std::map<CallInfo, CallInfo> NewCallMap; 2507*06c3fb27SDimitry Andric unsigned CloneNo = FuncClonesToCallMap.size(); 2508*06c3fb27SDimitry Andric assert(CloneNo > 0 && "Clone 0 is the original function, which " 2509*06c3fb27SDimitry Andric "should already exist in the map"); 2510*06c3fb27SDimitry Andric FuncInfo NewFuncClone = cloneFunctionForCallsite( 2511*06c3fb27SDimitry Andric OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo); 2512*06c3fb27SDimitry Andric FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap)); 2513*06c3fb27SDimitry Andric FunctionClonesAnalysis++; 2514*06c3fb27SDimitry Andric Changed = true; 2515*06c3fb27SDimitry Andric 2516*06c3fb27SDimitry Andric // If no caller callsites were already assigned to a clone of this 2517*06c3fb27SDimitry Andric // function, we can simply assign this clone to the new func clone 2518*06c3fb27SDimitry Andric // and update all callers to it, then skip to the next clone. 2519*06c3fb27SDimitry Andric if (!CallerAssignedToCloneOfFunc) { 2520*06c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 2521*06c3fb27SDimitry Andric NewFuncClone, Call, Clone, 2522*06c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 2523*06c3fb27SDimitry Andric for (auto &CE : Clone->CallerEdges) { 2524*06c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 2525*06c3fb27SDimitry Andric if (!CE->Caller->hasCall()) 2526*06c3fb27SDimitry Andric continue; 2527*06c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone); 2528*06c3fb27SDimitry Andric } 2529*06c3fb27SDimitry Andric continue; 2530*06c3fb27SDimitry Andric } 2531*06c3fb27SDimitry Andric 2532*06c3fb27SDimitry Andric // We may need to do additional node cloning in this case. 2533*06c3fb27SDimitry Andric // Reset the CallsiteToCalleeFuncCloneMap entry for any callers 2534*06c3fb27SDimitry Andric // that were previously assigned to call PreviousAssignedFuncClone, 2535*06c3fb27SDimitry Andric // to record that they now call NewFuncClone. 2536*06c3fb27SDimitry Andric for (auto CE : Clone->CallerEdges) { 2537*06c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 2538*06c3fb27SDimitry Andric if (!CE->Caller->hasCall()) 2539*06c3fb27SDimitry Andric continue; 2540*06c3fb27SDimitry Andric 2541*06c3fb27SDimitry Andric if (!CallsiteToCalleeFuncCloneMap.count(CE->Caller) || 2542*06c3fb27SDimitry Andric // We subsequently fall through to later handling that 2543*06c3fb27SDimitry Andric // will perform any additional cloning required for 2544*06c3fb27SDimitry Andric // callers that were calling other function clones. 2545*06c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[CE->Caller] != 2546*06c3fb27SDimitry Andric PreviousAssignedFuncClone) 2547*06c3fb27SDimitry Andric continue; 2548*06c3fb27SDimitry Andric 2549*06c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone); 2550*06c3fb27SDimitry Andric 2551*06c3fb27SDimitry Andric // If we are cloning a function that was already assigned to some 2552*06c3fb27SDimitry Andric // callers, then essentially we are creating new callsite clones 2553*06c3fb27SDimitry Andric // of the other callsites in that function that are reached by those 2554*06c3fb27SDimitry Andric // callers. Clone the other callees of the current callsite's caller 2555*06c3fb27SDimitry Andric // that were already assigned to PreviousAssignedFuncClone 2556*06c3fb27SDimitry Andric // accordingly. This is important since we subsequently update the 2557*06c3fb27SDimitry Andric // calls from the nodes in the graph and their assignments to callee 2558*06c3fb27SDimitry Andric // functions recorded in CallsiteToCalleeFuncCloneMap. 2559*06c3fb27SDimitry Andric for (auto CalleeEdge : CE->Caller->CalleeEdges) { 2560*06c3fb27SDimitry Andric // Skip any that have been removed on an earlier iteration when 2561*06c3fb27SDimitry Andric // cleaning up newly None type callee edges. 2562*06c3fb27SDimitry Andric if (!CalleeEdge) 2563*06c3fb27SDimitry Andric continue; 2564*06c3fb27SDimitry Andric ContextNode *Callee = CalleeEdge->Callee; 2565*06c3fb27SDimitry Andric // Skip the current callsite, we are looking for other 2566*06c3fb27SDimitry Andric // callsites Caller calls, as well as any that does not have a 2567*06c3fb27SDimitry Andric // recorded callsite Call. 2568*06c3fb27SDimitry Andric if (Callee == Clone || !Callee->hasCall()) 2569*06c3fb27SDimitry Andric continue; 2570*06c3fb27SDimitry Andric ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge); 2571*06c3fb27SDimitry Andric removeNoneTypeCalleeEdges(NewClone); 2572*06c3fb27SDimitry Andric // Moving the edge may have resulted in some none type 2573*06c3fb27SDimitry Andric // callee edges on the original Callee. 2574*06c3fb27SDimitry Andric removeNoneTypeCalleeEdges(Callee); 2575*06c3fb27SDimitry Andric assert(NewClone->AllocTypes != (uint8_t)AllocationType::None); 2576*06c3fb27SDimitry Andric // If the Callee node was already assigned to call a specific 2577*06c3fb27SDimitry Andric // function version, make sure its new clone is assigned to call 2578*06c3fb27SDimitry Andric // that same function clone. 2579*06c3fb27SDimitry Andric if (CallsiteToCalleeFuncCloneMap.count(Callee)) 2580*06c3fb27SDimitry Andric RecordCalleeFuncOfCallsite( 2581*06c3fb27SDimitry Andric NewClone, CallsiteToCalleeFuncCloneMap[Callee]); 2582*06c3fb27SDimitry Andric // Update NewClone with the new Call clone of this callsite's Call 2583*06c3fb27SDimitry Andric // created for the new function clone created earlier. 2584*06c3fb27SDimitry Andric // Recall that we have already ensured when building the graph 2585*06c3fb27SDimitry Andric // that each caller can only call callsites within the same 2586*06c3fb27SDimitry Andric // function, so we are guaranteed that Callee Call is in the 2587*06c3fb27SDimitry Andric // current OrigFunc. 2588*06c3fb27SDimitry Andric // CallMap is set up as indexed by original Call at clone 0. 2589*06c3fb27SDimitry Andric CallInfo OrigCall(Callee->getOrigNode()->Call); 2590*06c3fb27SDimitry Andric OrigCall.setCloneNo(0); 2591*06c3fb27SDimitry Andric std::map<CallInfo, CallInfo> &CallMap = 2592*06c3fb27SDimitry Andric FuncClonesToCallMap[NewFuncClone]; 2593*06c3fb27SDimitry Andric assert(CallMap.count(OrigCall)); 2594*06c3fb27SDimitry Andric CallInfo NewCall(CallMap[OrigCall]); 2595*06c3fb27SDimitry Andric assert(NewCall); 2596*06c3fb27SDimitry Andric NewClone->setCall(NewCall); 2597*06c3fb27SDimitry Andric } 2598*06c3fb27SDimitry Andric } 2599*06c3fb27SDimitry Andric // Fall through to handling below to perform the recording of the 2600*06c3fb27SDimitry Andric // function for this callsite clone. This enables handling of cases 2601*06c3fb27SDimitry Andric // where the callers were assigned to different clones of a function. 2602*06c3fb27SDimitry Andric } 2603*06c3fb27SDimitry Andric 2604*06c3fb27SDimitry Andric // See if we can use existing function clone. Walk through 2605*06c3fb27SDimitry Andric // all caller edges to see if any have already been assigned to 2606*06c3fb27SDimitry Andric // a clone of this callsite's function. If we can use it, do so. If not, 2607*06c3fb27SDimitry Andric // because that function clone is already assigned to a different clone 2608*06c3fb27SDimitry Andric // of this callsite, then we need to clone again. 2609*06c3fb27SDimitry Andric // Basically, this checking is needed to handle the case where different 2610*06c3fb27SDimitry Andric // caller functions/callsites may need versions of this function 2611*06c3fb27SDimitry Andric // containing different mixes of callsite clones across the different 2612*06c3fb27SDimitry Andric // callsites within the function. If that happens, we need to create 2613*06c3fb27SDimitry Andric // additional function clones to handle the various combinations. 2614*06c3fb27SDimitry Andric // 2615*06c3fb27SDimitry Andric // Keep track of any new clones of this callsite created by the 2616*06c3fb27SDimitry Andric // following loop, as well as any existing clone that we decided to 2617*06c3fb27SDimitry Andric // assign this clone to. 2618*06c3fb27SDimitry Andric std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap; 2619*06c3fb27SDimitry Andric FuncInfo FuncCloneAssignedToCurCallsiteClone; 2620*06c3fb27SDimitry Andric // We need to be able to remove Edge from CallerEdges, so need to adjust 2621*06c3fb27SDimitry Andric // iterator in the loop. 2622*06c3fb27SDimitry Andric for (auto EI = Clone->CallerEdges.begin(); 2623*06c3fb27SDimitry Andric EI != Clone->CallerEdges.end();) { 2624*06c3fb27SDimitry Andric auto Edge = *EI; 2625*06c3fb27SDimitry Andric // Ignore any caller that does not have a recorded callsite Call. 2626*06c3fb27SDimitry Andric if (!Edge->Caller->hasCall()) { 2627*06c3fb27SDimitry Andric EI++; 2628*06c3fb27SDimitry Andric continue; 2629*06c3fb27SDimitry Andric } 2630*06c3fb27SDimitry Andric // If this caller already assigned to call a version of OrigFunc, need 2631*06c3fb27SDimitry Andric // to ensure we can assign this callsite clone to that function clone. 2632*06c3fb27SDimitry Andric if (CallsiteToCalleeFuncCloneMap.count(Edge->Caller)) { 2633*06c3fb27SDimitry Andric FuncInfo FuncCloneCalledByCaller = 2634*06c3fb27SDimitry Andric CallsiteToCalleeFuncCloneMap[Edge->Caller]; 2635*06c3fb27SDimitry Andric // First we need to confirm that this function clone is available 2636*06c3fb27SDimitry Andric // for use by this callsite node clone. 2637*06c3fb27SDimitry Andric // 2638*06c3fb27SDimitry Andric // While FuncCloneToCurNodeCloneMap is built only for this Node and 2639*06c3fb27SDimitry Andric // its callsite clones, one of those callsite clones X could have 2640*06c3fb27SDimitry Andric // been assigned to the same function clone called by Edge's caller 2641*06c3fb27SDimitry Andric // - if Edge's caller calls another callsite within Node's original 2642*06c3fb27SDimitry Andric // function, and that callsite has another caller reaching clone X. 2643*06c3fb27SDimitry Andric // We need to clone Node again in this case. 2644*06c3fb27SDimitry Andric if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) && 2645*06c3fb27SDimitry Andric FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] != 2646*06c3fb27SDimitry Andric Clone) || 2647*06c3fb27SDimitry Andric // Detect when we have multiple callers of this callsite that 2648*06c3fb27SDimitry Andric // have already been assigned to specific, and different, clones 2649*06c3fb27SDimitry Andric // of OrigFunc (due to other unrelated callsites in Func they 2650*06c3fb27SDimitry Andric // reach via call contexts). Is this Clone of callsite Node 2651*06c3fb27SDimitry Andric // assigned to a different clone of OrigFunc? If so, clone Node 2652*06c3fb27SDimitry Andric // again. 2653*06c3fb27SDimitry Andric (FuncCloneAssignedToCurCallsiteClone && 2654*06c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone != 2655*06c3fb27SDimitry Andric FuncCloneCalledByCaller)) { 2656*06c3fb27SDimitry Andric // We need to use a different newly created callsite clone, in 2657*06c3fb27SDimitry Andric // order to assign it to another new function clone on a 2658*06c3fb27SDimitry Andric // subsequent iteration over the Clones array (adjusted below). 2659*06c3fb27SDimitry Andric // Note we specifically do not reset the 2660*06c3fb27SDimitry Andric // CallsiteToCalleeFuncCloneMap entry for this caller, so that 2661*06c3fb27SDimitry Andric // when this new clone is processed later we know which version of 2662*06c3fb27SDimitry Andric // the function to copy (so that other callsite clones we have 2663*06c3fb27SDimitry Andric // assigned to that function clone are properly cloned over). See 2664*06c3fb27SDimitry Andric // comments in the function cloning handling earlier. 2665*06c3fb27SDimitry Andric 2666*06c3fb27SDimitry Andric // Check if we already have cloned this callsite again while 2667*06c3fb27SDimitry Andric // walking through caller edges, for a caller calling the same 2668*06c3fb27SDimitry Andric // function clone. If so, we can move this edge to that new clone 2669*06c3fb27SDimitry Andric // rather than creating yet another new clone. 2670*06c3fb27SDimitry Andric if (FuncCloneToNewCallsiteCloneMap.count( 2671*06c3fb27SDimitry Andric FuncCloneCalledByCaller)) { 2672*06c3fb27SDimitry Andric ContextNode *NewClone = 2673*06c3fb27SDimitry Andric FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller]; 2674*06c3fb27SDimitry Andric moveEdgeToExistingCalleeClone(Edge, NewClone, &EI); 2675*06c3fb27SDimitry Andric // Cleanup any none type edges cloned over. 2676*06c3fb27SDimitry Andric removeNoneTypeCalleeEdges(NewClone); 2677*06c3fb27SDimitry Andric } else { 2678*06c3fb27SDimitry Andric // Create a new callsite clone. 2679*06c3fb27SDimitry Andric ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge, &EI); 2680*06c3fb27SDimitry Andric removeNoneTypeCalleeEdges(NewClone); 2681*06c3fb27SDimitry Andric FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] = 2682*06c3fb27SDimitry Andric NewClone; 2683*06c3fb27SDimitry Andric // Add to list of clones and process later. 2684*06c3fb27SDimitry Andric ClonesWorklist.push_back(NewClone); 2685*06c3fb27SDimitry Andric assert(EI == Clone->CallerEdges.end() || 2686*06c3fb27SDimitry Andric Clone->AllocTypes != (uint8_t)AllocationType::None); 2687*06c3fb27SDimitry Andric assert(NewClone->AllocTypes != (uint8_t)AllocationType::None); 2688*06c3fb27SDimitry Andric } 2689*06c3fb27SDimitry Andric // Moving the caller edge may have resulted in some none type 2690*06c3fb27SDimitry Andric // callee edges. 2691*06c3fb27SDimitry Andric removeNoneTypeCalleeEdges(Clone); 2692*06c3fb27SDimitry Andric // We will handle the newly created callsite clone in a subsequent 2693*06c3fb27SDimitry Andric // iteration over this Node's Clones. Continue here since we 2694*06c3fb27SDimitry Andric // already adjusted iterator EI while moving the edge. 2695*06c3fb27SDimitry Andric continue; 2696*06c3fb27SDimitry Andric } 2697*06c3fb27SDimitry Andric 2698*06c3fb27SDimitry Andric // Otherwise, we can use the function clone already assigned to this 2699*06c3fb27SDimitry Andric // caller. 2700*06c3fb27SDimitry Andric if (!FuncCloneAssignedToCurCallsiteClone) { 2701*06c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller; 2702*06c3fb27SDimitry Andric // Assign Clone to FuncCloneCalledByCaller 2703*06c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 2704*06c3fb27SDimitry Andric FuncCloneCalledByCaller, Call, Clone, 2705*06c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 2706*06c3fb27SDimitry Andric } else 2707*06c3fb27SDimitry Andric // Don't need to do anything - callsite is already calling this 2708*06c3fb27SDimitry Andric // function clone. 2709*06c3fb27SDimitry Andric assert(FuncCloneAssignedToCurCallsiteClone == 2710*06c3fb27SDimitry Andric FuncCloneCalledByCaller); 2711*06c3fb27SDimitry Andric 2712*06c3fb27SDimitry Andric } else { 2713*06c3fb27SDimitry Andric // We have not already assigned this caller to a version of 2714*06c3fb27SDimitry Andric // OrigFunc. Do the assignment now. 2715*06c3fb27SDimitry Andric 2716*06c3fb27SDimitry Andric // First check if we have already assigned this callsite clone to a 2717*06c3fb27SDimitry Andric // clone of OrigFunc for another caller during this iteration over 2718*06c3fb27SDimitry Andric // its caller edges. 2719*06c3fb27SDimitry Andric if (!FuncCloneAssignedToCurCallsiteClone) { 2720*06c3fb27SDimitry Andric // Find first function in FuncClonesToCallMap without an assigned 2721*06c3fb27SDimitry Andric // clone of this callsite Node. We should always have one 2722*06c3fb27SDimitry Andric // available at this point due to the earlier cloning when the 2723*06c3fb27SDimitry Andric // FuncClonesToCallMap size was smaller than the clone number. 2724*06c3fb27SDimitry Andric for (auto &CF : FuncClonesToCallMap) { 2725*06c3fb27SDimitry Andric if (!FuncCloneToCurNodeCloneMap.count(CF.first)) { 2726*06c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone = CF.first; 2727*06c3fb27SDimitry Andric break; 2728*06c3fb27SDimitry Andric } 2729*06c3fb27SDimitry Andric } 2730*06c3fb27SDimitry Andric assert(FuncCloneAssignedToCurCallsiteClone); 2731*06c3fb27SDimitry Andric // Assign Clone to FuncCloneAssignedToCurCallsiteClone 2732*06c3fb27SDimitry Andric AssignCallsiteCloneToFuncClone( 2733*06c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone, Call, Clone, 2734*06c3fb27SDimitry Andric AllocationCallToContextNodeMap.count(Call)); 2735*06c3fb27SDimitry Andric } else 2736*06c3fb27SDimitry Andric assert(FuncCloneToCurNodeCloneMap 2737*06c3fb27SDimitry Andric [FuncCloneAssignedToCurCallsiteClone] == Clone); 2738*06c3fb27SDimitry Andric // Update callers to record function version called. 2739*06c3fb27SDimitry Andric RecordCalleeFuncOfCallsite(Edge->Caller, 2740*06c3fb27SDimitry Andric FuncCloneAssignedToCurCallsiteClone); 2741*06c3fb27SDimitry Andric } 2742*06c3fb27SDimitry Andric 2743*06c3fb27SDimitry Andric EI++; 2744*06c3fb27SDimitry Andric } 2745*06c3fb27SDimitry Andric } 2746*06c3fb27SDimitry Andric if (VerifyCCG) { 2747*06c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Node); 2748*06c3fb27SDimitry Andric for (const auto &PE : Node->CalleeEdges) 2749*06c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee); 2750*06c3fb27SDimitry Andric for (const auto &CE : Node->CallerEdges) 2751*06c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller); 2752*06c3fb27SDimitry Andric for (auto *Clone : Node->Clones) { 2753*06c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(Clone); 2754*06c3fb27SDimitry Andric for (const auto &PE : Clone->CalleeEdges) 2755*06c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee); 2756*06c3fb27SDimitry Andric for (const auto &CE : Clone->CallerEdges) 2757*06c3fb27SDimitry Andric checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller); 2758*06c3fb27SDimitry Andric } 2759*06c3fb27SDimitry Andric } 2760*06c3fb27SDimitry Andric } 2761*06c3fb27SDimitry Andric } 2762*06c3fb27SDimitry Andric 2763*06c3fb27SDimitry Andric auto UpdateCalls = [&](ContextNode *Node, 2764*06c3fb27SDimitry Andric DenseSet<const ContextNode *> &Visited, 2765*06c3fb27SDimitry Andric auto &&UpdateCalls) { 2766*06c3fb27SDimitry Andric auto Inserted = Visited.insert(Node); 2767*06c3fb27SDimitry Andric if (!Inserted.second) 2768*06c3fb27SDimitry Andric return; 2769*06c3fb27SDimitry Andric 2770*06c3fb27SDimitry Andric for (auto *Clone : Node->Clones) 2771*06c3fb27SDimitry Andric UpdateCalls(Clone, Visited, UpdateCalls); 2772*06c3fb27SDimitry Andric 2773*06c3fb27SDimitry Andric for (auto &Edge : Node->CallerEdges) 2774*06c3fb27SDimitry Andric UpdateCalls(Edge->Caller, Visited, UpdateCalls); 2775*06c3fb27SDimitry Andric 2776*06c3fb27SDimitry Andric // Skip if either no call to update, or if we ended up with no context ids 2777*06c3fb27SDimitry Andric // (we moved all edges onto other clones). 2778*06c3fb27SDimitry Andric if (!Node->hasCall() || Node->ContextIds.empty()) 2779*06c3fb27SDimitry Andric return; 2780*06c3fb27SDimitry Andric 2781*06c3fb27SDimitry Andric if (Node->IsAllocation) { 2782*06c3fb27SDimitry Andric updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes)); 2783*06c3fb27SDimitry Andric return; 2784*06c3fb27SDimitry Andric } 2785*06c3fb27SDimitry Andric 2786*06c3fb27SDimitry Andric if (!CallsiteToCalleeFuncCloneMap.count(Node)) 2787*06c3fb27SDimitry Andric return; 2788*06c3fb27SDimitry Andric 2789*06c3fb27SDimitry Andric auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node]; 2790*06c3fb27SDimitry Andric updateCall(Node->Call, CalleeFunc); 2791*06c3fb27SDimitry Andric }; 2792*06c3fb27SDimitry Andric 2793*06c3fb27SDimitry Andric // Performs DFS traversal starting from allocation nodes to update calls to 2794*06c3fb27SDimitry Andric // reflect cloning decisions recorded earlier. For regular LTO this will 2795*06c3fb27SDimitry Andric // update the actual calls in the IR to call the appropriate function clone 2796*06c3fb27SDimitry Andric // (and add attributes to allocation calls), whereas for ThinLTO the decisions 2797*06c3fb27SDimitry Andric // are recorded in the summary entries. 2798*06c3fb27SDimitry Andric DenseSet<const ContextNode *> Visited; 2799*06c3fb27SDimitry Andric for (auto &Entry : AllocationCallToContextNodeMap) 2800*06c3fb27SDimitry Andric UpdateCalls(Entry.second, Visited, UpdateCalls); 2801*06c3fb27SDimitry Andric 2802*06c3fb27SDimitry Andric return Changed; 2803*06c3fb27SDimitry Andric } 2804*06c3fb27SDimitry Andric 2805*06c3fb27SDimitry Andric static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones( 2806*06c3fb27SDimitry Andric Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, 2807*06c3fb27SDimitry Andric std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>> 2808*06c3fb27SDimitry Andric &FuncToAliasMap) { 2809*06c3fb27SDimitry Andric // The first "clone" is the original copy, we should only call this if we 2810*06c3fb27SDimitry Andric // needed to create new clones. 2811*06c3fb27SDimitry Andric assert(NumClones > 1); 2812*06c3fb27SDimitry Andric SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; 2813*06c3fb27SDimitry Andric VMaps.reserve(NumClones - 1); 2814*06c3fb27SDimitry Andric FunctionsClonedThinBackend++; 2815*06c3fb27SDimitry Andric for (unsigned I = 1; I < NumClones; I++) { 2816*06c3fb27SDimitry Andric VMaps.emplace_back(std::make_unique<ValueToValueMapTy>()); 2817*06c3fb27SDimitry Andric auto *NewF = CloneFunction(&F, *VMaps.back()); 2818*06c3fb27SDimitry Andric FunctionClonesThinBackend++; 2819*06c3fb27SDimitry Andric // Strip memprof and callsite metadata from clone as they are no longer 2820*06c3fb27SDimitry Andric // needed. 2821*06c3fb27SDimitry Andric for (auto &BB : *NewF) { 2822*06c3fb27SDimitry Andric for (auto &Inst : BB) { 2823*06c3fb27SDimitry Andric Inst.setMetadata(LLVMContext::MD_memprof, nullptr); 2824*06c3fb27SDimitry Andric Inst.setMetadata(LLVMContext::MD_callsite, nullptr); 2825*06c3fb27SDimitry Andric } 2826*06c3fb27SDimitry Andric } 2827*06c3fb27SDimitry Andric std::string Name = getMemProfFuncName(F.getName(), I); 2828*06c3fb27SDimitry Andric auto *PrevF = M.getFunction(Name); 2829*06c3fb27SDimitry Andric if (PrevF) { 2830*06c3fb27SDimitry Andric // We might have created this when adjusting callsite in another 2831*06c3fb27SDimitry Andric // function. It should be a declaration. 2832*06c3fb27SDimitry Andric assert(PrevF->isDeclaration()); 2833*06c3fb27SDimitry Andric NewF->takeName(PrevF); 2834*06c3fb27SDimitry Andric PrevF->replaceAllUsesWith(NewF); 2835*06c3fb27SDimitry Andric PrevF->eraseFromParent(); 2836*06c3fb27SDimitry Andric } else 2837*06c3fb27SDimitry Andric NewF->setName(Name); 2838*06c3fb27SDimitry Andric ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) 2839*06c3fb27SDimitry Andric << "created clone " << ore::NV("NewFunction", NewF)); 2840*06c3fb27SDimitry Andric 2841*06c3fb27SDimitry Andric // Now handle aliases to this function, and clone those as well. 2842*06c3fb27SDimitry Andric if (!FuncToAliasMap.count(&F)) 2843*06c3fb27SDimitry Andric continue; 2844*06c3fb27SDimitry Andric for (auto *A : FuncToAliasMap[&F]) { 2845*06c3fb27SDimitry Andric std::string Name = getMemProfFuncName(A->getName(), I); 2846*06c3fb27SDimitry Andric auto *PrevA = M.getNamedAlias(Name); 2847*06c3fb27SDimitry Andric auto *NewA = GlobalAlias::create(A->getValueType(), 2848*06c3fb27SDimitry Andric A->getType()->getPointerAddressSpace(), 2849*06c3fb27SDimitry Andric A->getLinkage(), Name, NewF); 2850*06c3fb27SDimitry Andric NewA->copyAttributesFrom(A); 2851*06c3fb27SDimitry Andric if (PrevA) { 2852*06c3fb27SDimitry Andric // We might have created this when adjusting callsite in another 2853*06c3fb27SDimitry Andric // function. It should be a declaration. 2854*06c3fb27SDimitry Andric assert(PrevA->isDeclaration()); 2855*06c3fb27SDimitry Andric NewA->takeName(PrevA); 2856*06c3fb27SDimitry Andric PrevA->replaceAllUsesWith(NewA); 2857*06c3fb27SDimitry Andric PrevA->eraseFromParent(); 2858*06c3fb27SDimitry Andric } 2859*06c3fb27SDimitry Andric } 2860*06c3fb27SDimitry Andric } 2861*06c3fb27SDimitry Andric return VMaps; 2862*06c3fb27SDimitry Andric } 2863*06c3fb27SDimitry Andric 2864*06c3fb27SDimitry Andric // Locate the summary for F. This is complicated by the fact that it might 2865*06c3fb27SDimitry Andric // have been internalized or promoted. 2866*06c3fb27SDimitry Andric static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, 2867*06c3fb27SDimitry Andric const ModuleSummaryIndex *ImportSummary) { 2868*06c3fb27SDimitry Andric // FIXME: Ideally we would retain the original GUID in some fashion on the 2869*06c3fb27SDimitry Andric // function (e.g. as metadata), but for now do our best to locate the 2870*06c3fb27SDimitry Andric // summary without that information. 2871*06c3fb27SDimitry Andric ValueInfo TheFnVI = ImportSummary->getValueInfo(F.getGUID()); 2872*06c3fb27SDimitry Andric if (!TheFnVI) 2873*06c3fb27SDimitry Andric // See if theFn was internalized, by checking index directly with 2874*06c3fb27SDimitry Andric // original name (this avoids the name adjustment done by getGUID() for 2875*06c3fb27SDimitry Andric // internal symbols). 2876*06c3fb27SDimitry Andric TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(F.getName())); 2877*06c3fb27SDimitry Andric if (TheFnVI) 2878*06c3fb27SDimitry Andric return TheFnVI; 2879*06c3fb27SDimitry Andric // Now query with the original name before any promotion was performed. 2880*06c3fb27SDimitry Andric StringRef OrigName = 2881*06c3fb27SDimitry Andric ModuleSummaryIndex::getOriginalNameBeforePromote(F.getName()); 2882*06c3fb27SDimitry Andric std::string OrigId = GlobalValue::getGlobalIdentifier( 2883*06c3fb27SDimitry Andric OrigName, GlobalValue::InternalLinkage, M.getSourceFileName()); 2884*06c3fb27SDimitry Andric TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(OrigId)); 2885*06c3fb27SDimitry Andric if (TheFnVI) 2886*06c3fb27SDimitry Andric return TheFnVI; 2887*06c3fb27SDimitry Andric // Could be a promoted local imported from another module. We need to pass 2888*06c3fb27SDimitry Andric // down more info here to find the original module id. For now, try with 2889*06c3fb27SDimitry Andric // the OrigName which might have been stored in the OidGuidMap in the 2890*06c3fb27SDimitry Andric // index. This would not work if there were same-named locals in multiple 2891*06c3fb27SDimitry Andric // modules, however. 2892*06c3fb27SDimitry Andric auto OrigGUID = 2893*06c3fb27SDimitry Andric ImportSummary->getGUIDFromOriginalID(GlobalValue::getGUID(OrigName)); 2894*06c3fb27SDimitry Andric if (OrigGUID) 2895*06c3fb27SDimitry Andric TheFnVI = ImportSummary->getValueInfo(OrigGUID); 2896*06c3fb27SDimitry Andric return TheFnVI; 2897*06c3fb27SDimitry Andric } 2898*06c3fb27SDimitry Andric 2899*06c3fb27SDimitry Andric bool MemProfContextDisambiguation::applyImport(Module &M) { 2900*06c3fb27SDimitry Andric assert(ImportSummary); 2901*06c3fb27SDimitry Andric bool Changed = false; 2902*06c3fb27SDimitry Andric 2903*06c3fb27SDimitry Andric auto IsMemProfClone = [](const Function &F) { 2904*06c3fb27SDimitry Andric return F.getName().contains(MemProfCloneSuffix); 2905*06c3fb27SDimitry Andric }; 2906*06c3fb27SDimitry Andric 2907*06c3fb27SDimitry Andric // We also need to clone any aliases that reference cloned functions, because 2908*06c3fb27SDimitry Andric // the modified callsites may invoke via the alias. Keep track of the aliases 2909*06c3fb27SDimitry Andric // for each function. 2910*06c3fb27SDimitry Andric std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>> 2911*06c3fb27SDimitry Andric FuncToAliasMap; 2912*06c3fb27SDimitry Andric for (auto &A : M.aliases()) { 2913*06c3fb27SDimitry Andric auto *Aliasee = A.getAliaseeObject(); 2914*06c3fb27SDimitry Andric if (auto *F = dyn_cast<Function>(Aliasee)) 2915*06c3fb27SDimitry Andric FuncToAliasMap[F].insert(&A); 2916*06c3fb27SDimitry Andric } 2917*06c3fb27SDimitry Andric 2918*06c3fb27SDimitry Andric for (auto &F : M) { 2919*06c3fb27SDimitry Andric if (F.isDeclaration() || IsMemProfClone(F)) 2920*06c3fb27SDimitry Andric continue; 2921*06c3fb27SDimitry Andric 2922*06c3fb27SDimitry Andric OptimizationRemarkEmitter ORE(&F); 2923*06c3fb27SDimitry Andric 2924*06c3fb27SDimitry Andric SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; 2925*06c3fb27SDimitry Andric bool ClonesCreated = false; 2926*06c3fb27SDimitry Andric unsigned NumClonesCreated = 0; 2927*06c3fb27SDimitry Andric auto CloneFuncIfNeeded = [&](unsigned NumClones) { 2928*06c3fb27SDimitry Andric // We should at least have version 0 which is the original copy. 2929*06c3fb27SDimitry Andric assert(NumClones > 0); 2930*06c3fb27SDimitry Andric // If only one copy needed use original. 2931*06c3fb27SDimitry Andric if (NumClones == 1) 2932*06c3fb27SDimitry Andric return; 2933*06c3fb27SDimitry Andric // If we already performed cloning of this function, confirm that the 2934*06c3fb27SDimitry Andric // requested number of clones matches (the thin link should ensure the 2935*06c3fb27SDimitry Andric // number of clones for each constituent callsite is consistent within 2936*06c3fb27SDimitry Andric // each function), before returning. 2937*06c3fb27SDimitry Andric if (ClonesCreated) { 2938*06c3fb27SDimitry Andric assert(NumClonesCreated == NumClones); 2939*06c3fb27SDimitry Andric return; 2940*06c3fb27SDimitry Andric } 2941*06c3fb27SDimitry Andric VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap); 2942*06c3fb27SDimitry Andric // The first "clone" is the original copy, which doesn't have a VMap. 2943*06c3fb27SDimitry Andric assert(VMaps.size() == NumClones - 1); 2944*06c3fb27SDimitry Andric Changed = true; 2945*06c3fb27SDimitry Andric ClonesCreated = true; 2946*06c3fb27SDimitry Andric NumClonesCreated = NumClones; 2947*06c3fb27SDimitry Andric }; 2948*06c3fb27SDimitry Andric 2949*06c3fb27SDimitry Andric // Locate the summary for F. 2950*06c3fb27SDimitry Andric ValueInfo TheFnVI = findValueInfoForFunc(F, M, ImportSummary); 2951*06c3fb27SDimitry Andric // If not found, this could be an imported local (see comment in 2952*06c3fb27SDimitry Andric // findValueInfoForFunc). Skip for now as it will be cloned in its original 2953*06c3fb27SDimitry Andric // module (where it would have been promoted to global scope so should 2954*06c3fb27SDimitry Andric // satisfy any reference in this module). 2955*06c3fb27SDimitry Andric if (!TheFnVI) 2956*06c3fb27SDimitry Andric continue; 2957*06c3fb27SDimitry Andric 2958*06c3fb27SDimitry Andric auto *GVSummary = 2959*06c3fb27SDimitry Andric ImportSummary->findSummaryInModule(TheFnVI, M.getModuleIdentifier()); 2960*06c3fb27SDimitry Andric if (!GVSummary) 2961*06c3fb27SDimitry Andric // Must have been imported, use the first summary (might be multiple if 2962*06c3fb27SDimitry Andric // this was a linkonce_odr). 2963*06c3fb27SDimitry Andric GVSummary = TheFnVI.getSummaryList().front().get(); 2964*06c3fb27SDimitry Andric 2965*06c3fb27SDimitry Andric // If this was an imported alias skip it as we won't have the function 2966*06c3fb27SDimitry Andric // summary, and it should be cloned in the original module. 2967*06c3fb27SDimitry Andric if (isa<AliasSummary>(GVSummary)) 2968*06c3fb27SDimitry Andric continue; 2969*06c3fb27SDimitry Andric 2970*06c3fb27SDimitry Andric auto *FS = cast<FunctionSummary>(GVSummary->getBaseObject()); 2971*06c3fb27SDimitry Andric 2972*06c3fb27SDimitry Andric if (FS->allocs().empty() && FS->callsites().empty()) 2973*06c3fb27SDimitry Andric continue; 2974*06c3fb27SDimitry Andric 2975*06c3fb27SDimitry Andric auto SI = FS->callsites().begin(); 2976*06c3fb27SDimitry Andric auto AI = FS->allocs().begin(); 2977*06c3fb27SDimitry Andric 2978*06c3fb27SDimitry Andric // Assume for now that the instructions are in the exact same order 2979*06c3fb27SDimitry Andric // as when the summary was created, but confirm this is correct by 2980*06c3fb27SDimitry Andric // matching the stack ids. 2981*06c3fb27SDimitry Andric for (auto &BB : F) { 2982*06c3fb27SDimitry Andric for (auto &I : BB) { 2983*06c3fb27SDimitry Andric auto *CB = dyn_cast<CallBase>(&I); 2984*06c3fb27SDimitry Andric // Same handling as when creating module summary. 2985*06c3fb27SDimitry Andric if (!mayHaveMemprofSummary(CB)) 2986*06c3fb27SDimitry Andric continue; 2987*06c3fb27SDimitry Andric 2988*06c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> CallsiteContext( 2989*06c3fb27SDimitry Andric I.getMetadata(LLVMContext::MD_callsite)); 2990*06c3fb27SDimitry Andric auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof); 2991*06c3fb27SDimitry Andric 2992*06c3fb27SDimitry Andric // Include allocs that were already assigned a memprof function 2993*06c3fb27SDimitry Andric // attribute in the statistics. 2994*06c3fb27SDimitry Andric if (CB->getAttributes().hasFnAttr("memprof")) { 2995*06c3fb27SDimitry Andric assert(!MemProfMD); 2996*06c3fb27SDimitry Andric CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold" 2997*06c3fb27SDimitry Andric ? AllocTypeColdThinBackend++ 2998*06c3fb27SDimitry Andric : AllocTypeNotColdThinBackend++; 2999*06c3fb27SDimitry Andric OrigAllocsThinBackend++; 3000*06c3fb27SDimitry Andric AllocVersionsThinBackend++; 3001*06c3fb27SDimitry Andric if (!MaxAllocVersionsThinBackend) 3002*06c3fb27SDimitry Andric MaxAllocVersionsThinBackend = 1; 3003*06c3fb27SDimitry Andric // Remove any remaining callsite metadata and we can skip the rest of 3004*06c3fb27SDimitry Andric // the handling for this instruction, since no cloning needed. 3005*06c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_callsite, nullptr); 3006*06c3fb27SDimitry Andric continue; 3007*06c3fb27SDimitry Andric } 3008*06c3fb27SDimitry Andric 3009*06c3fb27SDimitry Andric if (MemProfMD) { 3010*06c3fb27SDimitry Andric // Consult the next alloc node. 3011*06c3fb27SDimitry Andric assert(AI != FS->allocs().end()); 3012*06c3fb27SDimitry Andric auto &AllocNode = *(AI++); 3013*06c3fb27SDimitry Andric 3014*06c3fb27SDimitry Andric // Sanity check that the MIB stack ids match between the summary and 3015*06c3fb27SDimitry Andric // instruction metadata. 3016*06c3fb27SDimitry Andric auto MIBIter = AllocNode.MIBs.begin(); 3017*06c3fb27SDimitry Andric for (auto &MDOp : MemProfMD->operands()) { 3018*06c3fb27SDimitry Andric assert(MIBIter != AllocNode.MIBs.end()); 3019*06c3fb27SDimitry Andric LLVM_ATTRIBUTE_UNUSED auto StackIdIndexIter = 3020*06c3fb27SDimitry Andric MIBIter->StackIdIndices.begin(); 3021*06c3fb27SDimitry Andric auto *MIBMD = cast<const MDNode>(MDOp); 3022*06c3fb27SDimitry Andric MDNode *StackMDNode = getMIBStackNode(MIBMD); 3023*06c3fb27SDimitry Andric assert(StackMDNode); 3024*06c3fb27SDimitry Andric SmallVector<unsigned> StackIdsFromMetadata; 3025*06c3fb27SDimitry Andric CallStack<MDNode, MDNode::op_iterator> StackContext(StackMDNode); 3026*06c3fb27SDimitry Andric for (auto ContextIter = 3027*06c3fb27SDimitry Andric StackContext.beginAfterSharedPrefix(CallsiteContext); 3028*06c3fb27SDimitry Andric ContextIter != StackContext.end(); ++ContextIter) { 3029*06c3fb27SDimitry Andric // If this is a direct recursion, simply skip the duplicate 3030*06c3fb27SDimitry Andric // entries, to be consistent with how the summary ids were 3031*06c3fb27SDimitry Andric // generated during ModuleSummaryAnalysis. 3032*06c3fb27SDimitry Andric if (!StackIdsFromMetadata.empty() && 3033*06c3fb27SDimitry Andric StackIdsFromMetadata.back() == *ContextIter) 3034*06c3fb27SDimitry Andric continue; 3035*06c3fb27SDimitry Andric assert(StackIdIndexIter != MIBIter->StackIdIndices.end()); 3036*06c3fb27SDimitry Andric assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) == 3037*06c3fb27SDimitry Andric *ContextIter); 3038*06c3fb27SDimitry Andric StackIdIndexIter++; 3039*06c3fb27SDimitry Andric } 3040*06c3fb27SDimitry Andric MIBIter++; 3041*06c3fb27SDimitry Andric } 3042*06c3fb27SDimitry Andric 3043*06c3fb27SDimitry Andric // Perform cloning if not yet done. 3044*06c3fb27SDimitry Andric CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size()); 3045*06c3fb27SDimitry Andric 3046*06c3fb27SDimitry Andric OrigAllocsThinBackend++; 3047*06c3fb27SDimitry Andric AllocVersionsThinBackend += AllocNode.Versions.size(); 3048*06c3fb27SDimitry Andric if (MaxAllocVersionsThinBackend < AllocNode.Versions.size()) 3049*06c3fb27SDimitry Andric MaxAllocVersionsThinBackend = AllocNode.Versions.size(); 3050*06c3fb27SDimitry Andric 3051*06c3fb27SDimitry Andric // If there is only one version that means we didn't end up 3052*06c3fb27SDimitry Andric // considering this function for cloning, and in that case the alloc 3053*06c3fb27SDimitry Andric // will still be none type or should have gotten the default NotCold. 3054*06c3fb27SDimitry Andric // Skip that after calling clone helper since that does some sanity 3055*06c3fb27SDimitry Andric // checks that confirm we haven't decided yet that we need cloning. 3056*06c3fb27SDimitry Andric if (AllocNode.Versions.size() == 1) { 3057*06c3fb27SDimitry Andric assert((AllocationType)AllocNode.Versions[0] == 3058*06c3fb27SDimitry Andric AllocationType::NotCold || 3059*06c3fb27SDimitry Andric (AllocationType)AllocNode.Versions[0] == 3060*06c3fb27SDimitry Andric AllocationType::None); 3061*06c3fb27SDimitry Andric UnclonableAllocsThinBackend++; 3062*06c3fb27SDimitry Andric continue; 3063*06c3fb27SDimitry Andric } 3064*06c3fb27SDimitry Andric 3065*06c3fb27SDimitry Andric // All versions should have a singular allocation type. 3066*06c3fb27SDimitry Andric assert(llvm::none_of(AllocNode.Versions, [](uint8_t Type) { 3067*06c3fb27SDimitry Andric return Type == ((uint8_t)AllocationType::NotCold | 3068*06c3fb27SDimitry Andric (uint8_t)AllocationType::Cold); 3069*06c3fb27SDimitry Andric })); 3070*06c3fb27SDimitry Andric 3071*06c3fb27SDimitry Andric // Update the allocation types per the summary info. 3072*06c3fb27SDimitry Andric for (unsigned J = 0; J < AllocNode.Versions.size(); J++) { 3073*06c3fb27SDimitry Andric // Ignore any that didn't get an assigned allocation type. 3074*06c3fb27SDimitry Andric if (AllocNode.Versions[J] == (uint8_t)AllocationType::None) 3075*06c3fb27SDimitry Andric continue; 3076*06c3fb27SDimitry Andric AllocationType AllocTy = (AllocationType)AllocNode.Versions[J]; 3077*06c3fb27SDimitry Andric AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++ 3078*06c3fb27SDimitry Andric : AllocTypeNotColdThinBackend++; 3079*06c3fb27SDimitry Andric std::string AllocTypeString = getAllocTypeAttributeString(AllocTy); 3080*06c3fb27SDimitry Andric auto A = llvm::Attribute::get(F.getContext(), "memprof", 3081*06c3fb27SDimitry Andric AllocTypeString); 3082*06c3fb27SDimitry Andric CallBase *CBClone; 3083*06c3fb27SDimitry Andric // Copy 0 is the original function. 3084*06c3fb27SDimitry Andric if (!J) 3085*06c3fb27SDimitry Andric CBClone = CB; 3086*06c3fb27SDimitry Andric else 3087*06c3fb27SDimitry Andric // Since VMaps are only created for new clones, we index with 3088*06c3fb27SDimitry Andric // clone J-1 (J==0 is the original clone and does not have a VMaps 3089*06c3fb27SDimitry Andric // entry). 3090*06c3fb27SDimitry Andric CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); 3091*06c3fb27SDimitry Andric CBClone->addFnAttr(A); 3092*06c3fb27SDimitry Andric ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", CBClone) 3093*06c3fb27SDimitry Andric << ore::NV("AllocationCall", CBClone) << " in clone " 3094*06c3fb27SDimitry Andric << ore::NV("Caller", CBClone->getFunction()) 3095*06c3fb27SDimitry Andric << " marked with memprof allocation attribute " 3096*06c3fb27SDimitry Andric << ore::NV("Attribute", AllocTypeString)); 3097*06c3fb27SDimitry Andric } 3098*06c3fb27SDimitry Andric } else if (!CallsiteContext.empty()) { 3099*06c3fb27SDimitry Andric // Consult the next callsite node. 3100*06c3fb27SDimitry Andric assert(SI != FS->callsites().end()); 3101*06c3fb27SDimitry Andric auto &StackNode = *(SI++); 3102*06c3fb27SDimitry Andric 3103*06c3fb27SDimitry Andric #ifndef NDEBUG 3104*06c3fb27SDimitry Andric // Sanity check that the stack ids match between the summary and 3105*06c3fb27SDimitry Andric // instruction metadata. 3106*06c3fb27SDimitry Andric auto StackIdIndexIter = StackNode.StackIdIndices.begin(); 3107*06c3fb27SDimitry Andric for (auto StackId : CallsiteContext) { 3108*06c3fb27SDimitry Andric assert(StackIdIndexIter != StackNode.StackIdIndices.end()); 3109*06c3fb27SDimitry Andric assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) == 3110*06c3fb27SDimitry Andric StackId); 3111*06c3fb27SDimitry Andric StackIdIndexIter++; 3112*06c3fb27SDimitry Andric } 3113*06c3fb27SDimitry Andric #endif 3114*06c3fb27SDimitry Andric 3115*06c3fb27SDimitry Andric // Perform cloning if not yet done. 3116*06c3fb27SDimitry Andric CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size()); 3117*06c3fb27SDimitry Andric 3118*06c3fb27SDimitry Andric // Should have skipped indirect calls via mayHaveMemprofSummary. 3119*06c3fb27SDimitry Andric assert(CB->getCalledFunction()); 3120*06c3fb27SDimitry Andric assert(!IsMemProfClone(*CB->getCalledFunction())); 3121*06c3fb27SDimitry Andric 3122*06c3fb27SDimitry Andric // Update the calls per the summary info. 3123*06c3fb27SDimitry Andric // Save orig name since it gets updated in the first iteration 3124*06c3fb27SDimitry Andric // below. 3125*06c3fb27SDimitry Andric auto CalleeOrigName = CB->getCalledFunction()->getName(); 3126*06c3fb27SDimitry Andric for (unsigned J = 0; J < StackNode.Clones.size(); J++) { 3127*06c3fb27SDimitry Andric // Do nothing if this version calls the original version of its 3128*06c3fb27SDimitry Andric // callee. 3129*06c3fb27SDimitry Andric if (!StackNode.Clones[J]) 3130*06c3fb27SDimitry Andric continue; 3131*06c3fb27SDimitry Andric auto NewF = M.getOrInsertFunction( 3132*06c3fb27SDimitry Andric getMemProfFuncName(CalleeOrigName, StackNode.Clones[J]), 3133*06c3fb27SDimitry Andric CB->getCalledFunction()->getFunctionType()); 3134*06c3fb27SDimitry Andric CallBase *CBClone; 3135*06c3fb27SDimitry Andric // Copy 0 is the original function. 3136*06c3fb27SDimitry Andric if (!J) 3137*06c3fb27SDimitry Andric CBClone = CB; 3138*06c3fb27SDimitry Andric else 3139*06c3fb27SDimitry Andric CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); 3140*06c3fb27SDimitry Andric CBClone->setCalledFunction(NewF); 3141*06c3fb27SDimitry Andric ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone) 3142*06c3fb27SDimitry Andric << ore::NV("Call", CBClone) << " in clone " 3143*06c3fb27SDimitry Andric << ore::NV("Caller", CBClone->getFunction()) 3144*06c3fb27SDimitry Andric << " assigned to call function clone " 3145*06c3fb27SDimitry Andric << ore::NV("Callee", NewF.getCallee())); 3146*06c3fb27SDimitry Andric } 3147*06c3fb27SDimitry Andric } 3148*06c3fb27SDimitry Andric // Memprof and callsite metadata on memory allocations no longer needed. 3149*06c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_memprof, nullptr); 3150*06c3fb27SDimitry Andric I.setMetadata(LLVMContext::MD_callsite, nullptr); 3151*06c3fb27SDimitry Andric } 3152*06c3fb27SDimitry Andric } 3153*06c3fb27SDimitry Andric } 3154*06c3fb27SDimitry Andric 3155*06c3fb27SDimitry Andric return Changed; 3156*06c3fb27SDimitry Andric } 3157*06c3fb27SDimitry Andric 3158*06c3fb27SDimitry Andric template <typename DerivedCCG, typename FuncTy, typename CallTy> 3159*06c3fb27SDimitry Andric bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() { 3160*06c3fb27SDimitry Andric if (DumpCCG) { 3161*06c3fb27SDimitry Andric dbgs() << "CCG before cloning:\n"; 3162*06c3fb27SDimitry Andric dbgs() << *this; 3163*06c3fb27SDimitry Andric } 3164*06c3fb27SDimitry Andric if (ExportToDot) 3165*06c3fb27SDimitry Andric exportToDot("postbuild"); 3166*06c3fb27SDimitry Andric 3167*06c3fb27SDimitry Andric if (VerifyCCG) { 3168*06c3fb27SDimitry Andric check(); 3169*06c3fb27SDimitry Andric } 3170*06c3fb27SDimitry Andric 3171*06c3fb27SDimitry Andric identifyClones(); 3172*06c3fb27SDimitry Andric 3173*06c3fb27SDimitry Andric if (VerifyCCG) { 3174*06c3fb27SDimitry Andric check(); 3175*06c3fb27SDimitry Andric } 3176*06c3fb27SDimitry Andric 3177*06c3fb27SDimitry Andric if (DumpCCG) { 3178*06c3fb27SDimitry Andric dbgs() << "CCG after cloning:\n"; 3179*06c3fb27SDimitry Andric dbgs() << *this; 3180*06c3fb27SDimitry Andric } 3181*06c3fb27SDimitry Andric if (ExportToDot) 3182*06c3fb27SDimitry Andric exportToDot("cloned"); 3183*06c3fb27SDimitry Andric 3184*06c3fb27SDimitry Andric bool Changed = assignFunctions(); 3185*06c3fb27SDimitry Andric 3186*06c3fb27SDimitry Andric if (DumpCCG) { 3187*06c3fb27SDimitry Andric dbgs() << "CCG after assigning function clones:\n"; 3188*06c3fb27SDimitry Andric dbgs() << *this; 3189*06c3fb27SDimitry Andric } 3190*06c3fb27SDimitry Andric if (ExportToDot) 3191*06c3fb27SDimitry Andric exportToDot("clonefuncassign"); 3192*06c3fb27SDimitry Andric 3193*06c3fb27SDimitry Andric return Changed; 3194*06c3fb27SDimitry Andric } 3195*06c3fb27SDimitry Andric 3196*06c3fb27SDimitry Andric bool MemProfContextDisambiguation::processModule( 3197*06c3fb27SDimitry Andric Module &M, 3198*06c3fb27SDimitry Andric function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) { 3199*06c3fb27SDimitry Andric 3200*06c3fb27SDimitry Andric // If we have an import summary, then the cloning decisions were made during 3201*06c3fb27SDimitry Andric // the thin link on the index. Apply them and return. 3202*06c3fb27SDimitry Andric if (ImportSummary) 3203*06c3fb27SDimitry Andric return applyImport(M); 3204*06c3fb27SDimitry Andric 3205*06c3fb27SDimitry Andric // TODO: If/when other types of memprof cloning are enabled beyond just for 3206*06c3fb27SDimitry Andric // hot and cold, we will need to change this to individually control the 3207*06c3fb27SDimitry Andric // AllocationType passed to addStackNodesForMIB during CCG construction. 3208*06c3fb27SDimitry Andric // Note that we specifically check this after applying imports above, so that 3209*06c3fb27SDimitry Andric // the option isn't needed to be passed to distributed ThinLTO backend 3210*06c3fb27SDimitry Andric // clang processes, which won't necessarily have visibility into the linker 3211*06c3fb27SDimitry Andric // dependences. Instead the information is communicated from the LTO link to 3212*06c3fb27SDimitry Andric // the backends via the combined summary index. 3213*06c3fb27SDimitry Andric if (!SupportsHotColdNew) 3214*06c3fb27SDimitry Andric return false; 3215*06c3fb27SDimitry Andric 3216*06c3fb27SDimitry Andric ModuleCallsiteContextGraph CCG(M, OREGetter); 3217*06c3fb27SDimitry Andric return CCG.process(); 3218*06c3fb27SDimitry Andric } 3219*06c3fb27SDimitry Andric 3220*06c3fb27SDimitry Andric MemProfContextDisambiguation::MemProfContextDisambiguation( 3221*06c3fb27SDimitry Andric const ModuleSummaryIndex *Summary) 3222*06c3fb27SDimitry Andric : ImportSummary(Summary) { 3223*06c3fb27SDimitry Andric if (ImportSummary) { 3224*06c3fb27SDimitry Andric // The MemProfImportSummary should only be used for testing ThinLTO 3225*06c3fb27SDimitry Andric // distributed backend handling via opt, in which case we don't have a 3226*06c3fb27SDimitry Andric // summary from the pass pipeline. 3227*06c3fb27SDimitry Andric assert(MemProfImportSummary.empty()); 3228*06c3fb27SDimitry Andric return; 3229*06c3fb27SDimitry Andric } 3230*06c3fb27SDimitry Andric if (MemProfImportSummary.empty()) 3231*06c3fb27SDimitry Andric return; 3232*06c3fb27SDimitry Andric 3233*06c3fb27SDimitry Andric auto ReadSummaryFile = 3234*06c3fb27SDimitry Andric errorOrToExpected(MemoryBuffer::getFile(MemProfImportSummary)); 3235*06c3fb27SDimitry Andric if (!ReadSummaryFile) { 3236*06c3fb27SDimitry Andric logAllUnhandledErrors(ReadSummaryFile.takeError(), errs(), 3237*06c3fb27SDimitry Andric "Error loading file '" + MemProfImportSummary + 3238*06c3fb27SDimitry Andric "': "); 3239*06c3fb27SDimitry Andric return; 3240*06c3fb27SDimitry Andric } 3241*06c3fb27SDimitry Andric auto ImportSummaryForTestingOrErr = getModuleSummaryIndex(**ReadSummaryFile); 3242*06c3fb27SDimitry Andric if (!ImportSummaryForTestingOrErr) { 3243*06c3fb27SDimitry Andric logAllUnhandledErrors(ImportSummaryForTestingOrErr.takeError(), errs(), 3244*06c3fb27SDimitry Andric "Error parsing file '" + MemProfImportSummary + 3245*06c3fb27SDimitry Andric "': "); 3246*06c3fb27SDimitry Andric return; 3247*06c3fb27SDimitry Andric } 3248*06c3fb27SDimitry Andric ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr); 3249*06c3fb27SDimitry Andric ImportSummary = ImportSummaryForTesting.get(); 3250*06c3fb27SDimitry Andric } 3251*06c3fb27SDimitry Andric 3252*06c3fb27SDimitry Andric PreservedAnalyses MemProfContextDisambiguation::run(Module &M, 3253*06c3fb27SDimitry Andric ModuleAnalysisManager &AM) { 3254*06c3fb27SDimitry Andric auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 3255*06c3fb27SDimitry Andric auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & { 3256*06c3fb27SDimitry Andric return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); 3257*06c3fb27SDimitry Andric }; 3258*06c3fb27SDimitry Andric if (!processModule(M, OREGetter)) 3259*06c3fb27SDimitry Andric return PreservedAnalyses::all(); 3260*06c3fb27SDimitry Andric return PreservedAnalyses::none(); 3261*06c3fb27SDimitry Andric } 3262*06c3fb27SDimitry Andric 3263*06c3fb27SDimitry Andric void MemProfContextDisambiguation::run( 3264*06c3fb27SDimitry Andric ModuleSummaryIndex &Index, 3265*06c3fb27SDimitry Andric function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 3266*06c3fb27SDimitry Andric isPrevailing) { 3267*06c3fb27SDimitry Andric // TODO: If/when other types of memprof cloning are enabled beyond just for 3268*06c3fb27SDimitry Andric // hot and cold, we will need to change this to individually control the 3269*06c3fb27SDimitry Andric // AllocationType passed to addStackNodesForMIB during CCG construction. 3270*06c3fb27SDimitry Andric // The index was set from the option, so these should be in sync. 3271*06c3fb27SDimitry Andric assert(Index.withSupportsHotColdNew() == SupportsHotColdNew); 3272*06c3fb27SDimitry Andric if (!SupportsHotColdNew) 3273*06c3fb27SDimitry Andric return; 3274*06c3fb27SDimitry Andric 3275*06c3fb27SDimitry Andric IndexCallsiteContextGraph CCG(Index, isPrevailing); 3276*06c3fb27SDimitry Andric CCG.process(); 3277*06c3fb27SDimitry Andric } 3278