Lines Matching +full:node +full:- +full:version
1 //==-- MemProfContextDisambiguation.cpp - Disambiguate contexts -------------=//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
21 //===----------------------------------------------------------------------===//
54 #define DEBUG_TYPE "memprof-context-disambiguation"
94 "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden,
98 static cl::opt<bool> ExportToDot("memprof-export-to-dot", cl::init(false),
103 DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden,
107 VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden,
111 VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden,
115 "memprof-import-summary",
120 TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5),
127 "memprof-allow-recursive-callsites", cl::init(false), cl::Hidden,
134 // hinted bytes reporting a bit when -memprof-report-hinted-sizes is enabled.
136 "memprof-allow-recursive-contexts", cl::init(true), cl::Hidden,
141 "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
147 "supports-hot-cold-new", cl::init(false), cl::Hidden,
151 "memprof-require-definition-for-promotion", cl::init(false), cl::Hidden,
218 explicit operator bool() const { return this->first != nullptr; }
219 FuncTy *func() const { return this->first; }
220 unsigned cloneNo() const { return this->second; }
229 explicit operator bool() const { return (bool)this->first; }
230 CallTy call() const { return this->first; }
231 unsigned cloneNo() const { return this->second; }
232 void setCloneNo(unsigned N) { this->second = N; }
239 call()->print(OS);
254 /// Node in the Callsite Context Graph
266 // for contexts including this node.
270 // for which we have created this node.
280 // callsite stack nodes it is the original stack id when the node is
289 // TODO: Should this be a map (from Callee node) for more efficient lookup?
293 // TODO: Should this be a map (from Caller node) for more efficient lookup?
297 // such as the context ids and allocation type of this node.
300 // If node has any callees, compute from those, otherwise compute from
301 // callers (i.e. if this is the leaf allocation node).
305 // A node with caller edges but no callee edges must be the allocation
306 // node.
313 // Compute the context ids for this node from the union of its edge context
322 Count += Edge->getContextIds().size();
325 ContextIds.insert(Edge->getContextIds().begin(),
326 Edge->getContextIds().end());
330 // Compute the allocation type for this node from the OR of its edge
340 AllocType |= Edge->AllocTypes;
348 // The context ids set for this node is empty if its edge context ids are
355 if (!Edge->getContextIds().empty())
364 // If a clone, points to the original uncloned node.
374 CloneOf->Clones.push_back(Clone);
375 Clone->CloneOf = CloneOf;
378 assert(!Clone->CloneOf);
379 Clone->CloneOf = this;
403 // True if this node was effectively removed from the graph, in which case
414 friend raw_ostream &operator<<(raw_ostream &OS, const ContextNode &Node) {
415 Node.print(OS);
474 void removeNoneTypeCalleeEdges(ContextNode *Node);
475 void removeNoneTypeCallerEdges(ContextNode *Node);
477 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *Node,
508 // Try to partition calls on the given node (already placed into the AllCalls
509 // array) by callee function, creating new copies of Node as needed to hold
513 ContextNode *Node, ArrayRef<CallInfo> AllCalls,
520 /// Map from callsite node to the enclosing caller function.
527 // non-allocation callsites onto context nodes created from the allocation
532 // The callsites stack ids that have a context node in the graph.
537 // ids for use in a new context node created for this callsite.
546 /// enable removal while iterating over a copy of a node's edge list).
550 /// Assigns the given Node to calls at or inlined into the location with
551 /// the Node's stack id, after post order traversing and processing its
557 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
583 return static_cast<const DerivedCCG *>(this)->getStackId(IdOrIndex);
599 return static_cast<DerivedCCG *>(this)->getCalleeFunc(Call);
608 return static_cast<DerivedCCG *>(this)->calleeMatchesFunc(
614 return static_cast<DerivedCCG *>(this)->sameCallee(Call1, Call2);
620 return static_cast<DerivedCCG *>(this)->getStackIdsWithContextNodesForCall(
626 return static_cast<DerivedCCG *>(this)->getLastStackId(Call);
632 static_cast<DerivedCCG *>(this)->updateAllocationCall(Call, AllocType);
637 return static_cast<const DerivedCCG *>(this)->getAllocationCallType(Call);
640 /// Update non-allocation call to invoke (possibly cloned) function
643 static_cast<DerivedCCG *>(this)->updateCall(CallerCall, CalleeFunc);
652 return static_cast<DerivedCCG *>(this)->cloneFunctionForCallsite(
660 return static_cast<const DerivedCCG *>(this)->getLabel(Func, Call, CloneNo);
673 /// Helpers to find the node corresponding to the given call or stackid.
693 /// Create a clone of Edge's callee and move Edge to that new callee node,
695 /// If ContextIdsToMove is non-empty, only that subset of Edge's ids are
703 /// If ContextIdsToMove is non-empty, only that subset of Edge's ids are
713 /// a simplified version of it as we always move the given edge and all of its
718 /// Recursively perform cloning on the graph for the given Node and its
722 void identifyClones(ContextNode *Node, DenseSet<const ContextNode *> &Visited,
734 /// Identifies the context node created for a stack id when adding the MIB
809 /// allocation or an interior callsite node in an allocation's context.
818 IndexCall *operator->() { return this; }
863 FS->addCallsite(*Callsite.second);
932 // If we can't clone a node that has NotCold+Cold alloc type, we will fall
959 // Can share if one of the edges is None type - don't
963 r->AllocTypes == (uint8_t)AllocationType::None)
965 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
971 // clone. Because the InAllocTypes were computed from the original node's callee
978 const ContextNode<DerivedCCG, FuncTy, CallTy> *Node = Clone->CloneOf;
979 assert(Node);
980 // InAllocTypes should have been computed for the original node's callee
982 assert(InAllocTypes.size() == Node->CalleeEdges.size());
986 for (const auto &E : Clone->CalleeEdges) {
987 assert(!EdgeCalleeMap.contains(E->Callee));
988 EdgeCalleeMap[E->Callee] = E->AllocTypes;
990 // Next, walk the original node's callees, and look for the corresponding
992 for (unsigned I = 0; I < Node->CalleeEdges.size(); I++) {
993 auto Iter = EdgeCalleeMap.find(Node->CalleeEdges[I]->Callee);
997 // Can share if one of the edges is None type - don't
1001 Iter->second == (uint8_t)AllocationType::None)
1003 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[I]))
1015 ContextNode *Node = getNodeForAlloc(C);
1016 if (Node)
1017 return Node;
1035 return StackEntryNode->second;
1044 if (Edge->Caller == Caller) {
1045 Edge->AllocTypes |= (uint8_t)AllocType;
1046 Edge->getContextIds().insert(ContextId);
1053 Caller->CalleeEdges.push_back(Edge);
1059 assert(!EI || (*EI)->get() == Edge);
1064 auto *Callee = Edge->Callee;
1065 auto *Caller = Edge->Caller;
1070 Edge->clear();
1073 Callee->eraseCallerEdge(Edge);
1074 Caller->eraseCalleeEdge(Edge);
1076 Callee->eraseCallerEdge(Edge);
1077 *EI = Caller->CalleeEdges.erase(*EI);
1079 Caller->eraseCalleeEdge(Edge);
1080 *EI = Callee->CallerEdges.erase(*EI);
1086 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1087 for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) {
1089 if (Edge->AllocTypes == (uint8_t)AllocationType::None) {
1090 assert(Edge->ContextIds.empty());
1099 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1100 for (auto EI = Node->CallerEdges.begin(); EI != Node->CallerEdges.end();) {
1102 if (Edge->AllocTypes == (uint8_t)AllocationType::None) {
1103 assert(Edge->ContextIds.empty());
1104 Edge->Caller->eraseCalleeEdge(Edge.get());
1105 EI = Node->CallerEdges.erase(EI);
1116 if (Edge->Callee == Callee)
1126 if (Edge->Caller == Caller)
1205 AllocNode->OrigStackOrAllocId = LastContextId;
1208 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1243 AllocNode->AllocTypes |= (uint8_t)AllocType;
1246 // Later when processing the stack ids on non-alloc callsites we will adjust
1261 StackNode->OrigStackOrAllocId = StackId;
1263 // Marking a node recursive will prevent its cloning completely, even for
1264 // non-recursive contexts flowing through it.
1268 StackNode->Recursive = true;
1270 StackNode->AllocTypes |= (uint8_t)AllocType;
1271 PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId);
1302 NewIds.insert(NewId->second.begin(), NewId->second.end());
1307 auto UpdateCallers = [&](ContextNode *Node,
1309 auto &&UpdateCallers) -> void {
1310 for (const auto &Edge : Node->CallerEdges) {
1314 ContextNode *NextNode = Edge->Caller;
1315 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Edge->getContextIds());
1319 Edge->getContextIds().insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
1327 auto *Node = Entry.second;
1328 UpdateCallers(Node, Visited, UpdateCallers);
1339 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1347 set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1358 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1359 NewNode->CalleeEdges.push_back(NewEdge);
1360 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1364 NewNode, Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1365 NewNode->CallerEdges.push_back(NewEdge);
1366 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1369 if (Edge->getContextIds().empty()) {
1382 assert(Edge->AllocTypes != (uint8_t)AllocationType::None);
1383 assert(!Edge->ContextIds.empty());
1387 static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
1389 if (Node->isRemoved())
1392 // Compute node's context ids once for use in asserts.
1393 auto NodeContextIds = Node->getContextIds();
1395 // Node's context ids should be the union of both its callee and caller edge
1397 if (Node->CallerEdges.size()) {
1399 Node->CallerEdges.front()->ContextIds);
1400 for (const auto &Edge : llvm::drop_begin(Node->CallerEdges)) {
1403 set_union(CallerEdgeContextIds, Edge->ContextIds);
1405 // Node can have more context ids than callers if some contexts terminate at
1406 // node and some are longer. If we are allowing recursive callsites but
1413 if (Node->CalleeEdges.size()) {
1415 Node->CalleeEdges.front()->ContextIds);
1416 for (const auto &Edge : llvm::drop_begin(Node->CalleeEdges)) {
1419 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1430 for (const auto &E : Node->CalleeEdges)
1431 NodeSet.insert(E->Callee);
1432 assert(NodeSet.size() == Node->CalleeEdges.size());
1439 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
1443 auto Inserted = Visited.insert(Node);
1450 auto CallerEdges = Node->CallerEdges;
1453 if (Edge->isRemoved()) {
1454 assert(!is_contained(Node->CallerEdges, Edge));
1457 assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls,
1461 // If this node's stack id is in the map, update the graph to contain new
1465 // Ignore this node if it is for an allocation or we didn't record any
1467 if (Node->IsAllocation ||
1468 !StackIdToMatchingCalls.count(Node->OrigStackOrAllocId))
1471 auto &Calls = StackIdToMatchingCalls[Node->OrigStackOrAllocId];
1474 // assign the context node for stack id to this Call.
1479 // It should be this Node
1480 assert(Node == getNodeForStackId(Ids[0]));
1481 if (Node->Recursive)
1483 Node->setCall(Call);
1484 NonAllocationCallToContextNodeMap[Call] = Node;
1485 NodeToCallingFunc[Node] = Func;
1491 // Find the node for the last stack id, which should be the same
1492 // across all calls recorded for this id, and is this node's id.
1493 uint64_t LastId = Node->OrigStackOrAllocId;
1497 assert(LastNode == Node);
1499 ContextNode *LastNode = Node;
1502 // Compute the last node's context ids once, as it is shared by all calls in
1504 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1512 // Skip any for which we didn't assign any ids, these don't get a node in
1516 // having the same stack ids), simply add it to the context node created
1524 // create a node because of the below recomputation of context ids
1529 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1553 assert(!CurNode->Recursive);
1555 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1564 set_intersect(SavedContextIds, Edge->getContextIds());
1575 // Create new context node.
1579 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1595 // Last Node.
1603 // edge from the prior node.
1605 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1607 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1608 if (PrevEdge->getContextIds().empty())
1612 // edges. This isn't an alloc node, so if there are no callee edges, the
1614 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1616 : CurNode->computeAllocType();
1634 // callsite id that has a context node (some might not due to pruning
1638 // the analysis will eventually identify for use in any new node created
1653 // (outermost caller) stack id with a node.
1672 // Skip single calls with a single stack id. These don't need a new node.
1679 // node sequences we will sort the vectors of stack ids in descending order
1684 // facilitate efficiently mapping them to the same context node.
1701 // Find the node for the last stack id, which should be the same
1709 if (LastNode->Recursive)
1712 // Initialize the context ids with the last node's. We will subsequently
1714 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1721 // all such matching calls onto the same context node.
1733 if (I > 0 && Ids != Calls[I - 1].StackIds)
1739 // Start with the remaining saved ids for the last node.
1755 if (CurNode->Recursive) {
1760 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1763 // particular inlined context may include stack ids A->B, and we may
1778 set_intersect(StackSequenceContextIds, Edge->getContextIds());
1793 // found in caller nodes of the last node found above.
1795 for (const auto &PE : LastNode->CallerEdges) {
1796 set_subtract(StackSequenceContextIds, PE->getContextIds());
1819 // assigned to the same context node, and skip them.
1829 // node, must duplicate ids for it.
1840 // context ids computed for the original node sequence to this call.
1854 // Update saved last node's context ids to remove those that are
1870 // Now perform a post-order traversal over the graph, starting with the
1885 Call->getMetadata(LLVMContext::MD_callsite));
1900 // We use CloneNo == 0 to refer to the original version, which doesn't get
1914 return (Twine(Call->getFunction()->getName()) + " -> " +
1915 cast<CallBase>(Call)->getCalledFunction()->getName())
1925 return (VI->second.name() + " -> alloc").str();
1928 return (VI->second.name() + " -> " +
1929 getMemProfFuncName(Callsite->Callee.name(),
1930 Callsite->Clones[CloneNo]))
1939 Call->getMetadata(LLVMContext::MD_callsite));
1962 ContextNode *Node = getNodeForStackId(StackId);
1963 if (!Node)
1987 for (auto &MDOp : MemProfMD->operands()) {
1991 if (MIBMD->getNumOperands() > 2) {
1992 for (unsigned I = 2; I < MIBMD->getNumOperands(); I++) {
1994 dyn_cast<MDNode>(MIBMD->getOperand(I));
1995 assert(ContextSizePair->getNumOperands() == 2);
1997 ContextSizePair->getOperand(0))
1998 ->getZExtValue();
2000 ContextSizePair->getOperand(1))
2001 ->getZExtValue();
2012 assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
2043 Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr);
2056 // prevailing version).
2061 if (!GlobalValue::isLocalLinkage(S->linkage()) &&
2068 if (!FS->allocs().empty()) {
2069 for (auto &AN : FS->mutableAllocs()) {
2102 assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
2103 // Initialize version 0 on the summary alloc node to the current alloc
2105 // that in the case where we aren't able to clone the original version
2107 AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2111 if (!FS->callsites().empty())
2112 for (auto &SN : FS->mutableCallsites()) {
2120 if (!FS->allocs().empty() || !FS->callsites().empty())
2157 auto *Node = Entry.second;
2158 assert(Node->Clones.empty());
2159 // Check all node callees and see if in the same function.
2160 // We need to check all of the calls recorded in this Node, because in some
2163 // constructed in the paramter list - the destructor call of the object has
2167 AllCalls.reserve(Node->MatchingCalls.size() + 1);
2168 AllCalls.push_back(Node->Call);
2169 AllCalls.insert(AllCalls.end(), Node->MatchingCalls.begin(),
2170 Node->MatchingCalls.end());
2176 // same callsite stack ids and would be sharing a context node at this
2182 // struct with list of matching calls, assigned node.
2183 if (partitionCallsByCallee(Node, AllCalls, NewCallToNode))
2191 for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();
2194 if (!Edge->Callee->hasCall())
2196 assert(NodeToCallingFunc.count(Edge->Callee));
2197 // Check if the called function matches that of the callee node.
2205 // If the first match is not the primary call on the Node, update it
2207 if (Node->Call != ThisCall) {
2208 Node->setCall(ThisCall);
2212 NewCallToNode.push_back({ThisCall, Node});
2219 Node->MatchingCalls.clear();
2221 // nodes was found, clear the call information in the node.
2224 // Work around by setting Node to have a null call, so it gets
2227 Node->setCall(CallInfo());
2231 // matching primary call on Node.
2234 if (!sameCallee(Node->Call.call(), ThisCall.call()))
2236 Node->MatchingCalls.push_back(ThisCall);
2240 // Remove all mismatched nodes identified in the above loop from the node map
2244 // Also remove any entries if we updated the node's primary call above.
2246 return !it.second->hasCall() || it.second->Call != it.first;
2250 for (auto &[Call, Node] : NewCallToNode)
2251 NonAllocationCallToContextNodeMap[Call] = Node;
2255 for (auto &[Call, Node] : TailCallToContextNodeMap)
2256 NonAllocationCallToContextNodeMap[Call] = Node;
2261 ContextNode *Node, ArrayRef<CallInfo> AllCalls,
2264 // and the node we eventually assign to them. Eventually we will record the
2265 // context node assigned to this group of calls.
2268 ContextNode *Node = nullptr;
2280 // Next, walk through all callee edges. For each callee node, get its
2282 // have at least one matching call). Build another map from each callee node
2286 for (const auto &Edge : Node->CalleeEdges) {
2287 if (!Edge->Callee->hasCall())
2289 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2291 CalleeNodeToCallInfo[Edge->Callee] =
2303 // new caller node (UnmatchedCalleesNode) which gets a null call so that it is
2304 // ignored during cloning. If it is in the map, then we use the node recorded
2306 // The first callee will use the original node instead of creating a new one.
2307 // Note that any of the original calls on this node (in AllCalls) that didn't
2308 // have a callee function automatically get dropped from the node as part of
2311 // Track whether we already assigned original node to a callee.
2313 assert(NodeToCallingFunc[Node]);
2314 // Iterate over a copy of Node's callee edges, since we may need to remove
2316 // makes it less error-prone.
2317 auto CalleeEdges = Node->CalleeEdges;
2319 if (!Edge->Callee->hasCall())
2322 // Will be updated below to point to whatever (caller) node this callee edge
2328 if (!CalleeNodeToCallInfo.contains(Edge->Callee)) {
2331 createNewNode(/*IsAllocation=*/false, NodeToCallingFunc[Node]);
2334 // Look up the information recorded for this callee node, and use the
2335 // recorded caller node (creating it if needed).
2336 auto *Info = CalleeNodeToCallInfo[Edge->Callee];
2337 if (!Info->Node) {
2338 // If we haven't assigned any callees to the original node use it.
2340 Info->Node = Node;
2342 Node->MatchingCalls.clear();
2345 Info->Node =
2346 createNewNode(/*IsAllocation=*/false, NodeToCallingFunc[Node]);
2347 assert(!Info->Calls.empty());
2348 // The first call becomes the primary call for this caller node, and the
2350 Info->Node->setCall(Info->Calls.front());
2351 Info->Node->MatchingCalls.insert(Info->Node->MatchingCalls.end(),
2352 Info->Calls.begin() + 1,
2353 Info->Calls.end());
2354 // Save the primary call to node correspondence so that we can update
2357 NewCallToNode.push_back({Info->Node->Call, Info->Node});
2359 CallerNodeToUse = Info->Node;
2362 // Don't need to move edge if we are using the original node;
2363 if (CallerNodeToUse == Node)
2370 // caller edges from Node are replicated onto the new callers, and it
2374 removeNoneTypeCallerEdges(I.second->Node);
2377 removeNoneTypeCallerEdges(Node);
2398 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2399 const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller];
2412 auto *CurEdge = Callee->findEdgeFromCaller(Caller);
2416 CurEdge->ContextIds.insert(Edge->ContextIds.begin(),
2417 Edge->ContextIds.end());
2418 CurEdge->AllocTypes |= Edge->AllocTypes;
2424 Callee, Caller, Edge->AllocTypes, Edge->ContextIds);
2425 Callee->CallerEdges.push_back(NewEdge);
2426 if (Caller == Edge->Caller) {
2430 EI = Caller->CalleeEdges.insert(EI, NewEdge);
2435 Caller->CalleeEdges.push_back(NewEdge);
2440 auto *CurCalleeNode = Edge->Callee;
2443 // First check if we have already synthesized a node for this tail call.
2446 NewNode->AllocTypes |= Edge->AllocTypes;
2449 // Create Node and record node info.
2452 NewNode->AllocTypes = Edge->AllocTypes;
2455 // Hook up node to its callee node
2461 // Hook up edge's original caller to new callee node.
2462 AddEdge(Edge->Caller, CurCalleeNode);
2466 auto *Caller = Edge->Caller;
2474 // to Edge->Caller, or found an existing one. Either way we are guaranteed
2476 assert(!Caller->CalleeEdges.empty());
2477 --EI;
2499 CalleeFunc = dyn_cast<Function>(Alias->getAliasee());
2511 if (!CB || !CB->isTailCall())
2513 auto *CalledValue = CB->getCalledOperand();
2514 auto *CalledFunction = CB->getCalledFunction();
2516 CalledValue = CalledValue->stripPointerCasts();
2525 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
2562 if (!CB->getCalledOperand() || CB->isIndirectCall())
2564 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2567 return dyn_cast<Function>(Alias->getAliasee());
2575 if (!CB->getCalledOperand() || CB->isIndirectCall())
2577 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2582 if (Alias && Alias->getAliasee() == Func)
2598 << Func->getName() << " from " << CallerFunc->getName()
2599 << " that actually called " << CalleeVal->getName()
2615 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
2617 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
2620 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
2622 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
2656 if (!GlobalValue::isLocalLinkage(S->linkage()) &&
2659 auto *FS = dyn_cast<FunctionSummary>(S->getBaseObject());
2665 FSVI = AS->getAliaseeVI();
2666 for (auto &CallEdge : FS->calls()) {
2708 ValueInfo Callee = dyn_cast_if_present<CallsiteInfo *>(Call)->Callee;
2711 return dyn_cast<FunctionSummary>(Callee.getSummaryList()[0]->getBaseObject());
2718 ValueInfo Callee = dyn_cast_if_present<CallsiteInfo *>(Call)->Callee;
2729 // summary base objects will contain the stack node summaries and thus
2730 // get a context node.
2731 (Alias && Alias->getAliaseeVI() == FuncVI))
2761 ValueInfo Callee1 = dyn_cast_if_present<CallsiteInfo *>(Call1)->Callee;
2762 ValueInfo Callee2 = dyn_cast_if_present<CallsiteInfo *>(Call2)->Callee;
2776 OS << "Node " << this << "\n";
2845 for (const auto Node : nodes<GraphType>(this)) {
2846 if (Node->isRemoved())
2848 Node->print(OS);
2857 for (const auto Node : nodes<GraphType>(this)) {
2858 if (Node->isRemoved())
2860 if (!Node->IsAllocation)
2862 DenseSet<uint32_t> ContextIds = Node->getContextIds();
2863 auto AllocTypeFromCall = getAllocationCallType(Node->Call);
2871 for (auto &Info : CSI->second) {
2873 << getAllocTypeString((uint8_t)TypeI->second)
2876 << getAllocTypeString(Node->AllocTypes) << " after cloning";
2877 if (allocTypeToUse(Node->AllocTypes) != AllocTypeFromCall)
2890 for (const auto Node : nodes<GraphType>(this)) {
2891 checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false);
2892 for (auto &Edge : Node->CallerEdges)
2910 return nodes_iterator(G->NodeOwner.begin(), &getNode);
2914 return nodes_iterator(G->NodeOwner.end(), &getNode);
2918 return G->NodeOwner.begin()->get();
2924 return P->Callee;
2933 return ChildIteratorType(N->CalleeEdges.begin(), &GetCallee);
2937 return ChildIteratorType(N->CalleeEdges.end(), &GetCallee);
2951 static std::string getNodeLabel(NodeRef Node, GraphType G) {
2953 (Twine("OrigId: ") + (Node->IsAllocation ? "Alloc" : "") +
2954 Twine(Node->OrigStackOrAllocId))
2957 if (Node->hasCall()) {
2958 auto Func = G->NodeToCallingFunc.find(Node);
2959 assert(Func != G->NodeToCallingFunc.end());
2961 G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());
2964 if (Node->Recursive)
2972 static std::string getNodeAttributes(NodeRef Node, GraphType) {
2973 std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " +
2974 getContextIds(Node->getContextIds()) + "\"")
2977 (Twine(",fillcolor=\"") + getColor(Node->AllocTypes) + "\"").str();
2979 if (Node->CloneOf) {
2990 return (Twine("tooltip=\"") + getContextIds(Edge->ContextIds) + "\"" +
2991 Twine(",fillcolor=\"") + getColor(Edge->AllocTypes) + "\"")
2997 static bool isNodeHidden(NodeRef Node, GraphType) {
2998 return Node->isRemoved();
3028 static std::string getNodeId(NodeRef Node) {
3030 SStream << std::hex << "N0x" << (unsigned long long)Node;
3048 ContextNode *Node = Edge->Callee;
3049 assert(NodeToCallingFunc.count(Node));
3051 createNewNode(Node->IsAllocation, NodeToCallingFunc[Node], Node->Call);
3052 Node->addClone(Clone);
3053 Clone->MatchingCalls = Node->MatchingCalls;
3065 // node (Edge's current callee may be the original node too).
3066 assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
3068 ContextNode *OldCallee = Edge->Callee;
3072 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(Edge->Caller);
3077 ContextIdsToMove = Edge->getContextIds();
3081 if (Edge->getContextIds().size() == ContextIdsToMove.size()) {
3084 NewCallee->AllocTypes |= Edge->AllocTypes;
3089 ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.begin(),
3091 ExistingEdgeToNewCallee->AllocTypes |= Edge->AllocTypes;
3092 assert(Edge->ContextIds == ContextIdsToMove);
3096 Edge->Callee = NewCallee;
3097 NewCallee->CallerEdges.push_back(Edge);
3099 OldCallee->eraseCallerEdge(Edge.get());
3110 ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.begin(),
3112 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3116 NewCallee, Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3117 Edge->Caller->CalleeEdges.push_back(NewEdge);
3118 NewCallee->CallerEdges.push_back(NewEdge);
3122 NewCallee->AllocTypes |= CallerEdgeAllocType;
3123 set_subtract(Edge->ContextIds, ContextIdsToMove);
3124 Edge->AllocTypes = computeAllocType(Edge->ContextIds);
3126 // Now walk the old callee node's callee edges and move Edge's context ids
3129 for (auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3133 set_intersection(OldCalleeEdge->getContextIds(), ContextIdsToMove);
3134 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3135 OldCalleeEdge->AllocTypes =
3136 computeAllocType(OldCalleeEdge->getContextIds());
3144 NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) {
3145 NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.begin(),
3147 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3152 OldCalleeEdge->Callee, NewCallee,
3154 NewCallee->CalleeEdges.push_back(NewEdge);
3155 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3157 // Recompute the node alloc type now that its callee edges have been
3159 OldCallee->AllocTypes = OldCallee->computeAllocType();
3161 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3162 OldCallee->emptyContextIds());
3166 for (const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3167 checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee,
3169 for (const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3170 checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee,
3180 ContextNode *OldCaller = Edge->Caller;
3181 OldCaller->eraseCalleeEdge(Edge.get());
3185 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(Edge->Callee);
3190 ExistingEdgeToNewCaller->getContextIds().insert(
3191 Edge->getContextIds().begin(), Edge->getContextIds().end());
3192 ExistingEdgeToNewCaller->AllocTypes |= Edge->AllocTypes;
3193 Edge->ContextIds.clear();
3194 Edge->AllocTypes = (uint8_t)AllocationType::None;
3195 Edge->Callee->eraseCallerEdge(Edge.get());
3198 Edge->Caller = NewCaller;
3199 NewCaller->CalleeEdges.push_back(Edge);
3204 NewCaller->AllocTypes |= Edge->AllocTypes;
3206 // Now walk the old caller node's caller edges and move Edge's context ids
3207 // over to the corresponding edge into the node (which is created here if
3208 // this is a newly created node). We can tell whether this is a newly created
3209 // node by seeing if it has any caller edges yet.
3211 bool IsNewNode = NewCaller->CallerEdges.empty();
3213 for (auto &OldCallerEdge : OldCaller->CallerEdges) {
3217 set_intersection(OldCallerEdge->getContextIds(), Edge->getContextIds());
3218 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3219 OldCallerEdge->AllocTypes =
3220 computeAllocType(OldCallerEdge->getContextIds());
3221 // In this function we expect that any pre-existing node already has edges
3222 // from the same callers as the old node. That should be true in the current
3223 // use case, where we will remove None-type edges after copying over all
3226 NewCaller->findEdgeFromCaller(OldCallerEdge->Caller);
3229 ExistingCallerEdge->getContextIds().insert(EdgeContextIdsToMove.begin(),
3231 ExistingCallerEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3235 NewCaller, OldCallerEdge->Caller,
3237 NewCaller->CallerEdges.push_back(NewEdge);
3238 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3240 // Recompute the node alloc type now that its caller edges have been
3242 OldCaller->AllocTypes = OldCaller->computeAllocType();
3244 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3245 OldCaller->emptyContextIds());
3249 for (const auto &OldCallerEdge : OldCaller->CallerEdges)
3250 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallerEdge->Caller,
3252 for (const auto &NewCallerEdge : NewCaller->CallerEdges)
3253 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallerEdge->Caller,
3261 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3262 auto Inserted = Visited.insert(Node);
3266 removeNoneTypeCalleeEdges(Node);
3268 for (auto *Clone : Node->Clones)
3271 // The recursive call may remove some of this Node's caller edges.
3273 auto CallerEdges = Node->CallerEdges;
3276 if (Edge->isRemoved()) {
3277 assert(!is_contained(Node->CallerEdges, Edge));
3280 recursivelyRemoveNoneTypeCalleeEdges(Edge->Caller, Visited);
3289 identifyClones(Entry.second, Visited, Entry.second->getContextIds());
3308 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3311 checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false);
3312 assert(!Node->CloneOf);
3314 // If Node as a null call, then either it wasn't found in the module (regular
3318 // isn't useful at least for this node.
3319 if (!Node->hasCall())
3325 Visited.insert(Node);
3326 // We should not have visited this node yet.
3334 auto CallerEdges = Node->CallerEdges;
3337 if (Edge->isRemoved()) {
3338 assert(!is_contained(Node->CallerEdges, Edge));
3342 if (!Visited.count(Edge->Caller) && !Edge->Caller->CloneOf) {
3343 identifyClones(Edge->Caller, Visited, AllocContextIds);
3349 if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
3354 // Try to keep the original version as alloc type NotCold. This will make
3357 // CallerEdges of the Node we will clone by alloc type.
3360 // the caller edges vector, and stay on the original version (since the below
3362 // and leaves the remaining ones on the original Node).
3369 std::stable_sort(Node->CallerEdges.begin(), Node->CallerEdges.end(),
3372 // Nodes with non-empty context ids should be sorted before
3374 if (A->ContextIds.empty())
3375 // Either B ContextIds are non-empty (in which case we
3380 if (B->ContextIds.empty())
3383 if (A->AllocTypes == B->AllocTypes)
3385 // tie-breaker.
3386 return *A->ContextIds.begin() < *B->ContextIds.begin();
3387 return AllocTypeCloningPriority[A->AllocTypes] <
3388 AllocTypeCloningPriority[B->AllocTypes];
3391 assert(Node->AllocTypes != (uint8_t)AllocationType::None);
3398 for (auto &CE : Node->CallerEdges) {
3401 AllCallerContextIds.reserve(CE->getContextIds().size());
3402 for (auto Id : CE->getContextIds())
3409 // types via cloning. In most cases this loop will terminate once the Node
3411 // Iterate over a copy of Node's caller edges, since we may need to remove
3413 // makes it less error-prone.
3414 auto CallerEdges = Node->CallerEdges;
3416 // See if cloning the prior caller edge left this node with a single alloc
3417 // type or a single caller. In that case no more cloning of Node is needed.
3418 if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
3423 if (!CallerEdge->Caller->hasCall())
3429 set_intersection(CallerEdge->getContextIds(), AllocContextIds);
3438 // Compute the node callee edge alloc types corresponding to the context ids
3441 CalleeEdgeAllocTypesForCallerEdge.reserve(Node->CalleeEdges.size());
3442 for (auto &CalleeEdge : Node->CalleeEdges)
3444 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3451 // type from node's allocation type. Query allocTypeToUse so that we don't
3455 // Then check if by cloning node at least one of the callee edges will be
3457 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
3458 assert(Node->AllocTypes != (uint8_t)AllocationType::None);
3460 allocTypeToUse(Node->AllocTypes) &&
3462 CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges))
3468 for (auto *CurClone : Node->Clones) {
3469 if (allocTypeToUse(CurClone->AllocTypes) !=
3473 bool BothSingleAlloc = hasSingleAllocType(CurClone->AllocTypes) &&
3478 CurClone->AllocTypes == CallerAllocTypeForAlloc);
3482 // allocation on Node's callee edges (CalleeEdgeAllocTypesForCallerEdge),
3499 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
3502 // We should still have some context ids on the original Node.
3503 assert(!Node->emptyContextIds());
3505 // Sanity check that no alloc types on node or edges are None.
3506 assert(Node->AllocTypes != (uint8_t)AllocationType::None);
3509 checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false);
3515 auto A = llvm::Attribute::get(Call.call()->getFunction()->getContext(),
3517 cast<CallBase>(Call.call())->addFnAttr(A);
3518 OREGetter(Call.call()->getFunction())
3521 << ore::NV("Caller", Call.call()->getFunction())
3530 assert(AI->Versions.size() > Call.cloneNo());
3531 AI->Versions[Call.cloneNo()] = (uint8_t)AllocType;
3537 if (!CB->getAttributes().hasFnAttr("memprof"))
3539 return CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold"
3547 assert(AI->Versions.size() > Call.cloneNo());
3548 return (AllocationType)AI->Versions[Call.cloneNo()];
3554 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
3555 OREGetter(CallerCall.call()->getFunction())
3558 << ore::NV("Caller", CallerCall.call()->getFunction())
3568 assert(CI->Clones.size() > CallerCall.cloneNo());
3569 CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
3580 std::string Name = getMemProfFuncName(Func.func()->getName(), CloneNo);
3581 assert(!Func.func()->getParent()->getFunction(Name));
3582 NewFunc->setName(Name);
3584 // This map always has the initial version in it.
3604 ? cast<AllocInfo *>(Call.call())->Versions.size()
3605 : cast<CallsiteInfo *>(Call.call())->Clones.size()));
3606 // Walk all the instructions in this function. Create a new version for
3608 // over the version being called for the function clone being cloned here.
3613 // This map always has the initial version in it.
3616 assert(AI->Versions.size() == CloneNo);
3619 AI->Versions.push_back(0);
3622 assert(CI && CI->Clones.size() == CloneNo);
3625 CI->Clones.push_back(0);
3636 // For each call with graph Node having clones:
3637 // Initialize ClonesWorklist to Node and its clones
3650 // Assign any other caller calling the cloned version to new clone
3671 // Update caller node to call function version CalleeFunc, by recording the
3675 assert(Caller->hasCall());
3688 ContextNode *Node = getNodeForInst(Call);
3689 // Skip call if we do not have a node for it (all uses of its stack ids
3692 if (!Node || Node->Clones.empty())
3694 assert(Node->hasCall() &&
3698 // callsite Node being handled.
3701 // Assign callsite version CallsiteClone to function version FuncClone,
3707 // Record the clone of callsite node assigned to this function clone.
3715 CallsiteClone->setCall(CallClone);
3717 for (auto &MatchingCall : Node->MatchingCalls) {
3726 // Keep track of the clones of callsite Node that need to be assigned to
3730 // Ignore original Node if we moved all of its contexts to clones.
3731 if (!Node->emptyContextIds())
3732 ClonesWorklist.push_back(Node);
3733 ClonesWorklist.insert(ClonesWorklist.end(), Node->Clones.begin(),
3734 Node->Clones.end());
3736 // Now walk through all of the clones of this callsite Node that we need,
3756 // been assigned a function clone for this callee node yet.
3758 Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) {
3759 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3767 for (auto &CE : Clone->CallerEdges) {
3769 if (!CE->Caller->hasCall())
3771 RecordCalleeFuncOfCallsite(CE->Caller, OrigFunc);
3783 Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) {
3784 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3787 if (EI != Clone->CallerEdges.end()) {
3790 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3813 for (auto &CE : Clone->CallerEdges) {
3815 if (!CE->Caller->hasCall())
3817 RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone);
3822 // We may need to do additional node cloning in this case.
3829 auto CallerEdges = Clone->CallerEdges;
3832 if (CE->isRemoved()) {
3833 assert(!is_contained(Clone->CallerEdges, CE));
3838 if (!CE->Caller->hasCall())
3841 if (!CallsiteToCalleeFuncCloneMap.count(CE->Caller) ||
3845 CallsiteToCalleeFuncCloneMap[CE->Caller] !=
3849 RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone);
3862 auto CalleeEdges = CE->Caller->CalleeEdges;
3866 if (CalleeEdge->isRemoved()) {
3867 assert(!is_contained(CE->Caller->CalleeEdges, CalleeEdge));
3871 ContextNode *Callee = CalleeEdge->Callee;
3875 if (Callee == Clone || !Callee->hasCall())
3882 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
3883 // If the Callee node was already assigned to call a specific
3884 // function version, make sure its new clone is assigned to call
3896 CallInfo OrigCall(Callee->getOrigNode()->Call);
3903 NewClone->setCall(NewCall);
3905 for (auto &MatchingCall : NewClone->MatchingCalls) {
3939 // handling and makes it less error-prone.
3940 auto CloneCallerEdges = Clone->CallerEdges;
3943 if (!Edge->Caller->hasCall())
3945 // If this caller already assigned to call a version of OrigFunc, need
3947 if (CallsiteToCalleeFuncCloneMap.count(Edge->Caller)) {
3949 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3951 // for use by this callsite node clone.
3953 // While FuncCloneToCurNodeCloneMap is built only for this Node and
3956 // - if Edge's caller calls another callsite within Node's original
3958 // We need to clone Node again in this case.
3965 // reach via call contexts). Is this Clone of callsite Node
3966 // assigned to a different clone of OrigFunc? If so, clone Node
3976 // when this new clone is processed later we know which version of
4000 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4006 // iteration over this Node's Clones.
4019 // Don't need to do anything - callsite is already calling this
4025 // We have not already assigned this caller to a version of
4033 // clone of this callsite Node. We should always have one
4050 // Update callers to record function version called.
4051 RecordCalleeFuncOfCallsite(Edge->Caller,
4057 checkNode<DerivedCCG, FuncTy, CallTy>(Node);
4058 for (const auto &PE : Node->CalleeEdges)
4059 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
4060 for (const auto &CE : Node->CallerEdges)
4061 checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller);
4062 for (auto *Clone : Node->Clones) {
4064 for (const auto &PE : Clone->CalleeEdges)
4065 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
4066 for (const auto &CE : Clone->CallerEdges)
4067 checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller);
4076 auto UpdateCalls = [&](ContextNode *Node,
4079 auto Inserted = Visited.insert(Node);
4083 for (auto *Clone : Node->Clones)
4086 for (auto &Edge : Node->CallerEdges)
4087 UpdateCalls(Edge->Caller, Visited, UpdateCalls);
4091 if (!Node->hasCall() || Node->emptyContextIds())
4094 if (Node->IsAllocation) {
4095 auto AT = allocTypeToUse(Node->AllocTypes);
4100 if (Node->AllocTypes == BothTypes && MinClonedColdBytePercent < 100 &&
4104 for (auto Id : Node->getContextIds()) {
4109 for (auto &Info : CSI->second) {
4111 if (TypeI->second == AllocationType::Cold)
4119 updateAllocationCall(Node->Call, AT);
4120 assert(Node->MatchingCalls.empty());
4124 if (!CallsiteToCalleeFuncCloneMap.count(Node))
4127 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node];
4128 updateCall(Node->Call, CalleeFunc);
4130 for (auto &Call : Node->MatchingCalls)
4154 VMaps.reserve(NumClones - 1);
4173 assert(PrevF->isDeclaration());
4174 NewF->takeName(PrevF);
4175 PrevF->replaceAllUsesWith(NewF);
4176 PrevF->eraseFromParent();
4178 NewF->setName(Name);
4186 std::string Name = getMemProfFuncName(A->getName(), I);
4188 auto *NewA = GlobalAlias::create(A->getValueType(),
4189 A->getType()->getPointerAddressSpace(),
4190 A->getLinkage(), Name, NewF);
4191 NewA->copyAttributesFrom(A);
4195 assert(PrevA->isDeclaration());
4196 NewA->takeName(PrevA);
4197 PrevA->replaceAllUsesWith(NewA);
4198 PrevA->eraseFromParent();
4212 ValueInfo TheFnVI = ImportSummary->getValueInfo(F.getGUID());
4217 TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(F.getName()));
4225 TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(OrigId));
4231 // index. This would not work if there were same-named locals in multiple
4234 ImportSummary->getGUIDFromOriginalID(GlobalValue::getGUID(OrigName));
4236 TheFnVI = ImportSummary->getValueInfo(OrigGUID);
4254 if (Error E = Symtab->create(M, /*InLTO=*/true, /*AddCanonical=*/false)) {
4290 // We should at least have version 0 which is the original copy.
4305 assert(VMaps.size() == NumClones - 1);
4321 auto CalleeOrigName = CalledFunction->getName();
4323 // Do nothing if this version calls the original version of its
4329 CalledFunction->getFunctionType());
4335 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4336 CBClone->setCalledFunction(NewF);
4339 << ore::NV("Caller", CBClone->getFunction())
4355 ImportSummary->findSummaryInModule(TheFnVI, M.getModuleIdentifier());
4361 "enable-import-metadata is needed to emit thinlto_src_module");
4363 dyn_cast<MDString>(SrcModuleMD->getOperand(0))->getString();
4365 if (GVS->modulePath() == SrcModule) {
4370 assert(GVSummary && GVSummary->modulePath() == SrcModule);
4378 auto *FS = cast<FunctionSummary>(GVSummary->getBaseObject());
4380 if (FS->allocs().empty() && FS->callsites().empty())
4383 auto SI = FS->callsites().begin();
4384 auto AI = FS->allocs().begin();
4392 for (auto CallsiteIt = FS->callsites().rbegin();
4393 CallsiteIt != FS->callsites().rend(); CallsiteIt++) {
4395 // Stop as soon as we see a non-synthesized callsite info (see comment
4416 auto *CalledValue = CB->getCalledOperand();
4417 auto *CalledFunction = CB->getCalledFunction();
4419 CalledValue = CalledValue->stripPointerCasts();
4428 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
4437 if (CB->getAttributes().hasFnAttr("memprof")) {
4439 CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold"
4450 // Consult the next alloc node.
4451 assert(AI != FS->allocs().end());
4457 for (auto &MDOp : MemProfMD->operands()) {
4460 MIBIter->StackIdIndices.begin();
4481 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
4482 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
4497 // If there is only one version that means we didn't end up
4502 // We might have a single version that is cold due to the
4538 // clone J-1 (J==0 is the original clone and does not have a VMaps
4540 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4541 CBClone->addFnAttr(A);
4544 << ore::NV("Caller", CBClone->getFunction())
4553 assert(!CI || !CI->isInlineAsm());
4563 recordICPInfo(CB, FS->callsites(), SI, ICallAnalysisInfo);
4573 // Consult the next callsite node.
4574 assert(SI != FS->callsites().end());
4583 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
4591 } else if (CB->isTailCall() && CalledFunction) {
4599 CloneCallsite(Callsite->second, CB, CalledFunction);
4606 performICP(M, FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
4637 ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
4652 ImportSummary->getValueInfo(Candidate.Value);
4655 assert(!CalleeValueInfo || SI->Callee == CalleeValueInfo);
4659 // profiled target should call a cloned version of the profiled
4687 // version of the target, which is what is in the vtable.
4704 // target (or version of the code), and we need to be conservative
4706 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
4713 TargetFunction->isDeclaration())) {
4747 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4755 // Call original if this version calls the original version of its
4760 getMemProfFuncName(TargetFunction->getName(),
4762 TargetFunction->getFunctionType())
4768 << ore::NV("Caller", CBClone->getFunction())
4774 TotalCount -= Candidate.Count;
4783 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4785 CBClone->setMetadata(LLVMContext::MD_prof, nullptr);
4787 // Otherwise we need update with the un-promoted records back.
4896 auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {