11d99d7a6SLei Wang //===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===// 21d99d7a6SLei Wang // 31d99d7a6SLei Wang // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 41d99d7a6SLei Wang // See https://llvm.org/LICENSE.txt for license information. 51d99d7a6SLei Wang // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 61d99d7a6SLei Wang // 71d99d7a6SLei Wang //===----------------------------------------------------------------------===// 81d99d7a6SLei Wang // 91d99d7a6SLei Wang // This file implements the SampleProfileMatcher used for stale 101d99d7a6SLei Wang // profile matching. 111d99d7a6SLei Wang // 121d99d7a6SLei Wang //===----------------------------------------------------------------------===// 131d99d7a6SLei Wang 141d99d7a6SLei Wang #include "llvm/Transforms/IPO/SampleProfileMatcher.h" 151d99d7a6SLei Wang #include "llvm/IR/IntrinsicInst.h" 161d99d7a6SLei Wang #include "llvm/IR/MDBuilder.h" 1721ba91c4SKrzysztof Pszeniczny #include "llvm/Support/CommandLine.h" 18*3a5791e7SKazu Hirata #include "llvm/Transforms/Utils/LongestCommonSequence.h" 191d99d7a6SLei Wang 201d99d7a6SLei Wang using namespace llvm; 211d99d7a6SLei Wang using namespace sampleprof; 221d99d7a6SLei Wang 231d99d7a6SLei Wang #define DEBUG_TYPE "sample-profile-matcher" 241d99d7a6SLei Wang 2518cdfa72SLei Wang static cl::opt<unsigned> FuncProfileSimilarityThreshold( 2618cdfa72SLei Wang "func-profile-similarity-threshold", cl::Hidden, cl::init(80), 2718cdfa72SLei Wang cl::desc("Consider a profile matches a function if the similarity of their " 2818cdfa72SLei Wang "callee sequences is above the specified percentile.")); 2918cdfa72SLei Wang 3018cdfa72SLei Wang static cl::opt<unsigned> MinFuncCountForCGMatching( 3118cdfa72SLei Wang "min-func-count-for-cg-matching", cl::Hidden, cl::init(5), 3218cdfa72SLei Wang cl::desc("The minimum number of basic blocks required for a function to " 3318cdfa72SLei Wang "run stale profile call graph matching.")); 3418cdfa72SLei Wang 3518cdfa72SLei Wang static cl::opt<unsigned> MinCallCountForCGMatching( 3618cdfa72SLei Wang "min-call-count-for-cg-matching", cl::Hidden, cl::init(3), 3718cdfa72SLei Wang cl::desc("The minimum number of call anchors required for a function to " 3818cdfa72SLei Wang "run stale profile call graph matching.")); 3918cdfa72SLei Wang 406e60330aSLei Wang static cl::opt<bool> LoadFuncProfileforCGMatching( 416e60330aSLei Wang "load-func-profile-for-cg-matching", cl::Hidden, cl::init(false), 426e60330aSLei Wang cl::desc( 436e60330aSLei Wang "Load top-level profiles that the sample reader initially skipped for " 446e60330aSLei Wang "the call-graph matching (only meaningful for extended binary " 456e60330aSLei Wang "format)")); 466e60330aSLei Wang 471d99d7a6SLei Wang extern cl::opt<bool> SalvageStaleProfile; 4818cdfa72SLei Wang extern cl::opt<bool> SalvageUnusedProfile; 491d99d7a6SLei Wang extern cl::opt<bool> PersistProfileStaleness; 501d99d7a6SLei Wang extern cl::opt<bool> ReportProfileStaleness; 511d99d7a6SLei Wang 5221ba91c4SKrzysztof Pszeniczny static cl::opt<unsigned> SalvageStaleProfileMaxCallsites( 5321ba91c4SKrzysztof Pszeniczny "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX), 5421ba91c4SKrzysztof Pszeniczny cl::desc("The maximum number of callsites in a function, above which stale " 5521ba91c4SKrzysztof Pszeniczny "profile matching will be skipped.")); 5621ba91c4SKrzysztof Pszeniczny 575b6f1511SLei Wang void SampleProfileMatcher::findIRAnchors(const Function &F, 5818cdfa72SLei Wang AnchorMap &IRAnchors) const { 591d99d7a6SLei Wang // For inlined code, recover the original callsite and callee by finding the 601d99d7a6SLei Wang // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the 611d99d7a6SLei Wang // top-level frame is "main:1", the callsite is "1" and the callee is "foo". 621d99d7a6SLei Wang auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) { 631d99d7a6SLei Wang assert((DIL && DIL->getInlinedAt()) && "No inlined callsite"); 641d99d7a6SLei Wang const DILocation *PrevDIL = nullptr; 651d99d7a6SLei Wang do { 661d99d7a6SLei Wang PrevDIL = DIL; 671d99d7a6SLei Wang DIL = DIL->getInlinedAt(); 681d99d7a6SLei Wang } while (DIL->getInlinedAt()); 691d99d7a6SLei Wang 70e06d6ed1SKrzysztof Pszeniczny LineLocation Callsite = FunctionSamples::getCallSiteIdentifier( 71e06d6ed1SKrzysztof Pszeniczny DIL, FunctionSamples::ProfileIsFS); 721d99d7a6SLei Wang StringRef CalleeName = PrevDIL->getSubprogramLinkageName(); 735b6f1511SLei Wang return std::make_pair(Callsite, FunctionId(CalleeName)); 741d99d7a6SLei Wang }; 751d99d7a6SLei Wang 761d99d7a6SLei Wang auto GetCanonicalCalleeName = [](const CallBase *CB) { 771d99d7a6SLei Wang StringRef CalleeName = UnknownIndirectCallee; 781d99d7a6SLei Wang if (Function *Callee = CB->getCalledFunction()) 791d99d7a6SLei Wang CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName()); 801d99d7a6SLei Wang return CalleeName; 811d99d7a6SLei Wang }; 821d99d7a6SLei Wang 831d99d7a6SLei Wang // Extract profile matching anchors in the IR. 841d99d7a6SLei Wang for (auto &BB : F) { 851d99d7a6SLei Wang for (auto &I : BB) { 861d99d7a6SLei Wang DILocation *DIL = I.getDebugLoc(); 871d99d7a6SLei Wang if (!DIL) 881d99d7a6SLei Wang continue; 891d99d7a6SLei Wang 901d99d7a6SLei Wang if (FunctionSamples::ProfileIsProbeBased) { 911d99d7a6SLei Wang if (auto Probe = extractProbe(I)) { 921d99d7a6SLei Wang // Flatten inlined IR for the matching. 931d99d7a6SLei Wang if (DIL->getInlinedAt()) { 941d99d7a6SLei Wang IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL)); 951d99d7a6SLei Wang } else { 961d99d7a6SLei Wang // Use empty StringRef for basic block probe. 971d99d7a6SLei Wang StringRef CalleeName; 981d99d7a6SLei Wang if (const auto *CB = dyn_cast<CallBase>(&I)) { 991d99d7a6SLei Wang // Skip the probe inst whose callee name is "llvm.pseudoprobe". 1001d99d7a6SLei Wang if (!isa<IntrinsicInst>(&I)) 1011d99d7a6SLei Wang CalleeName = GetCanonicalCalleeName(CB); 1021d99d7a6SLei Wang } 1035b6f1511SLei Wang LineLocation Loc = LineLocation(Probe->Id, 0); 1045b6f1511SLei Wang IRAnchors.emplace(Loc, FunctionId(CalleeName)); 1051d99d7a6SLei Wang } 1061d99d7a6SLei Wang } 1071d99d7a6SLei Wang } else { 1081d99d7a6SLei Wang // TODO: For line-number based profile(AutoFDO), currently only support 1091d99d7a6SLei Wang // find callsite anchors. In future, we need to parse all the non-call 1101d99d7a6SLei Wang // instructions to extract the line locations for profile matching. 1111d99d7a6SLei Wang if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I)) 1121d99d7a6SLei Wang continue; 1131d99d7a6SLei Wang 1141d99d7a6SLei Wang if (DIL->getInlinedAt()) { 1151d99d7a6SLei Wang IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL)); 1161d99d7a6SLei Wang } else { 117e06d6ed1SKrzysztof Pszeniczny LineLocation Callsite = FunctionSamples::getCallSiteIdentifier( 118e06d6ed1SKrzysztof Pszeniczny DIL, FunctionSamples::ProfileIsFS); 1191d99d7a6SLei Wang StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I)); 1205b6f1511SLei Wang IRAnchors.emplace(Callsite, FunctionId(CalleeName)); 1211d99d7a6SLei Wang } 1221d99d7a6SLei Wang } 1231d99d7a6SLei Wang } 1241d99d7a6SLei Wang } 1251d99d7a6SLei Wang } 1261d99d7a6SLei Wang 1275b6f1511SLei Wang void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS, 12818cdfa72SLei Wang AnchorMap &ProfileAnchors) const { 1291d99d7a6SLei Wang auto isInvalidLineOffset = [](uint32_t LineOffset) { 1301d99d7a6SLei Wang return LineOffset & 0x8000; 1311d99d7a6SLei Wang }; 1321d99d7a6SLei Wang 1335b6f1511SLei Wang auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName, 1345b6f1511SLei Wang AnchorMap &ProfileAnchors) { 1355b6f1511SLei Wang auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName); 1365b6f1511SLei Wang if (!Ret.second) { 1375b6f1511SLei Wang // For multiple callees, which indicates it's an indirect call, we use a 1385b6f1511SLei Wang // dummy name(UnknownIndirectCallee) as the indrect callee name. 1395b6f1511SLei Wang Ret.first->second = FunctionId(UnknownIndirectCallee); 1405b6f1511SLei Wang } 1415b6f1511SLei Wang }; 1425b6f1511SLei Wang 1431d99d7a6SLei Wang for (const auto &I : FS.getBodySamples()) { 1441d99d7a6SLei Wang const LineLocation &Loc = I.first; 1451d99d7a6SLei Wang if (isInvalidLineOffset(Loc.LineOffset)) 1461d99d7a6SLei Wang continue; 1475b6f1511SLei Wang for (const auto &C : I.second.getCallTargets()) 1485b6f1511SLei Wang InsertAnchor(Loc, C.first, ProfileAnchors); 1491d99d7a6SLei Wang } 1501d99d7a6SLei Wang 1511d99d7a6SLei Wang for (const auto &I : FS.getCallsiteSamples()) { 1521d99d7a6SLei Wang const LineLocation &Loc = I.first; 1531d99d7a6SLei Wang if (isInvalidLineOffset(Loc.LineOffset)) 1541d99d7a6SLei Wang continue; 1555b6f1511SLei Wang for (const auto &C : I.second) 1565b6f1511SLei Wang InsertAnchor(Loc, C.first, ProfileAnchors); 1571d99d7a6SLei Wang } 1581d99d7a6SLei Wang } 1591d99d7a6SLei Wang 16018cdfa72SLei Wang bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName, 16118cdfa72SLei Wang Function *&FuncWithoutProfile) { 16218cdfa72SLei Wang FuncWithoutProfile = nullptr; 16318cdfa72SLei Wang auto R = FunctionsWithoutProfile.find(IRFuncName); 16418cdfa72SLei Wang if (R != FunctionsWithoutProfile.end()) 16518cdfa72SLei Wang FuncWithoutProfile = R->second; 16618cdfa72SLei Wang return !FuncWithoutProfile; 16718cdfa72SLei Wang } 16818cdfa72SLei Wang 16918cdfa72SLei Wang bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) { 17018cdfa72SLei Wang return SymbolMap->find(ProfileFuncName) == SymbolMap->end(); 17118cdfa72SLei Wang } 17218cdfa72SLei Wang 17318cdfa72SLei Wang bool SampleProfileMatcher::functionMatchesProfile( 17418cdfa72SLei Wang const FunctionId &IRFuncName, const FunctionId &ProfileFuncName, 17518cdfa72SLei Wang bool FindMatchedProfileOnly) { 17618cdfa72SLei Wang if (IRFuncName == ProfileFuncName) 17718cdfa72SLei Wang return true; 17818cdfa72SLei Wang if (!SalvageUnusedProfile) 17918cdfa72SLei Wang return false; 18018cdfa72SLei Wang 18118cdfa72SLei Wang // If IR function doesn't have profile and the profile is unused, try 18218cdfa72SLei Wang // matching them. 18318cdfa72SLei Wang Function *IRFunc = nullptr; 18418cdfa72SLei Wang if (functionHasProfile(IRFuncName, IRFunc) || 18518cdfa72SLei Wang !isProfileUnused(ProfileFuncName)) 18618cdfa72SLei Wang return false; 18718cdfa72SLei Wang 18818cdfa72SLei Wang assert(FunctionId(IRFunc->getName()) != ProfileFuncName && 18918cdfa72SLei Wang "IR function should be different from profile function to match"); 19018cdfa72SLei Wang return functionMatchesProfile(*IRFunc, ProfileFuncName, 19118cdfa72SLei Wang FindMatchedProfileOnly); 19218cdfa72SLei Wang } 19318cdfa72SLei Wang 19418cdfa72SLei Wang LocToLocMap 19518cdfa72SLei Wang SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1, 19618cdfa72SLei Wang const AnchorList &AnchorList2, 19718cdfa72SLei Wang bool MatchUnusedFunction) { 198*3a5791e7SKazu Hirata LocToLocMap MatchedAnchors; 199*3a5791e7SKazu Hirata llvm::longestCommonSequence<LineLocation, FunctionId>( 200*3a5791e7SKazu Hirata AnchorList1, AnchorList2, 201*3a5791e7SKazu Hirata [&](const FunctionId &A, const FunctionId &B) { 202*3a5791e7SKazu Hirata return functionMatchesProfile( 203*3a5791e7SKazu Hirata A, B, 204*3a5791e7SKazu Hirata !MatchUnusedFunction // Find matched function only 205*3a5791e7SKazu Hirata ); 206*3a5791e7SKazu Hirata }, 207*3a5791e7SKazu Hirata [&](LineLocation A, LineLocation B) { 208*3a5791e7SKazu Hirata MatchedAnchors.try_emplace(A, B); 209*3a5791e7SKazu Hirata }); 210*3a5791e7SKazu Hirata return MatchedAnchors; 2115b6f1511SLei Wang } 2125b6f1511SLei Wang 2135b6f1511SLei Wang void SampleProfileMatcher::matchNonCallsiteLocs( 2145b6f1511SLei Wang const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors, 2151d99d7a6SLei Wang LocToLocMap &IRToProfileLocationMap) { 2161d99d7a6SLei Wang auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) { 2171d99d7a6SLei Wang // Skip the unchanged location mapping to save memory. 2181d99d7a6SLei Wang if (From != To) 2191d99d7a6SLei Wang IRToProfileLocationMap.insert({From, To}); 2201d99d7a6SLei Wang }; 2211d99d7a6SLei Wang 2221d99d7a6SLei Wang // Use function's beginning location as the initial anchor. 2231d99d7a6SLei Wang int32_t LocationDelta = 0; 2241d99d7a6SLei Wang SmallVector<LineLocation> LastMatchedNonAnchors; 2251d99d7a6SLei Wang for (const auto &IR : IRAnchors) { 2261d99d7a6SLei Wang const auto &Loc = IR.first; 2271d99d7a6SLei Wang bool IsMatchedAnchor = false; 2281d99d7a6SLei Wang // Match the anchor location in lexical order. 2295b6f1511SLei Wang auto R = MatchedAnchors.find(Loc); 2305b6f1511SLei Wang if (R != MatchedAnchors.end()) { 2315b6f1511SLei Wang const auto &Candidate = R->second; 2321d99d7a6SLei Wang InsertMatching(Loc, Candidate); 2335b6f1511SLei Wang LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef() 2341d99d7a6SLei Wang << " is matched from " << Loc << " to " << Candidate 2351d99d7a6SLei Wang << "\n"); 2361d99d7a6SLei Wang LocationDelta = Candidate.LineOffset - Loc.LineOffset; 2371d99d7a6SLei Wang 2381d99d7a6SLei Wang // Match backwards for non-anchor locations. 2391d99d7a6SLei Wang // The locations in LastMatchedNonAnchors have been matched forwards 2401d99d7a6SLei Wang // based on the previous anchor, spilt it evenly and overwrite the 2411d99d7a6SLei Wang // second half based on the current anchor. 2421d99d7a6SLei Wang for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2; 2431d99d7a6SLei Wang I < LastMatchedNonAnchors.size(); I++) { 2441d99d7a6SLei Wang const auto &L = LastMatchedNonAnchors[I]; 2451d99d7a6SLei Wang uint32_t CandidateLineOffset = L.LineOffset + LocationDelta; 2461d99d7a6SLei Wang LineLocation Candidate(CandidateLineOffset, L.Discriminator); 2471d99d7a6SLei Wang InsertMatching(L, Candidate); 2481d99d7a6SLei Wang LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L 2491d99d7a6SLei Wang << " to " << Candidate << "\n"); 2501d99d7a6SLei Wang } 2511d99d7a6SLei Wang 2521d99d7a6SLei Wang IsMatchedAnchor = true; 2531d99d7a6SLei Wang LastMatchedNonAnchors.clear(); 2541d99d7a6SLei Wang } 2551d99d7a6SLei Wang 2561d99d7a6SLei Wang // Match forwards for non-anchor locations. 2571d99d7a6SLei Wang if (!IsMatchedAnchor) { 2581d99d7a6SLei Wang uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta; 2591d99d7a6SLei Wang LineLocation Candidate(CandidateLineOffset, Loc.Discriminator); 2601d99d7a6SLei Wang InsertMatching(Loc, Candidate); 2611d99d7a6SLei Wang LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to " 2621d99d7a6SLei Wang << Candidate << "\n"); 2631d99d7a6SLei Wang LastMatchedNonAnchors.emplace_back(Loc); 2641d99d7a6SLei Wang } 2651d99d7a6SLei Wang } 2661d99d7a6SLei Wang } 2671d99d7a6SLei Wang 26818cdfa72SLei Wang // Filter the non-call locations from IRAnchors and ProfileAnchors and write 26918cdfa72SLei Wang // them into a list for random access later. 27018cdfa72SLei Wang void SampleProfileMatcher::getFilteredAnchorList( 27118cdfa72SLei Wang const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors, 27218cdfa72SLei Wang AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) { 27318cdfa72SLei Wang for (const auto &I : IRAnchors) { 27418cdfa72SLei Wang if (I.second.stringRef().empty()) 27518cdfa72SLei Wang continue; 27618cdfa72SLei Wang FilteredIRAnchorsList.emplace_back(I); 27718cdfa72SLei Wang } 27818cdfa72SLei Wang 27918cdfa72SLei Wang for (const auto &I : ProfileAnchors) 28018cdfa72SLei Wang FilteredProfileAnchorList.emplace_back(I); 28118cdfa72SLei Wang } 28218cdfa72SLei Wang 2835b6f1511SLei Wang // Call target name anchor based profile fuzzy matching. 2845b6f1511SLei Wang // Input: 2855b6f1511SLei Wang // For IR locations, the anchor is the callee name of direct callsite; For 2865b6f1511SLei Wang // profile locations, it's the call target name for BodySamples or inlinee's 2875b6f1511SLei Wang // profile name for CallsiteSamples. 2885b6f1511SLei Wang // Matching heuristic: 2895b6f1511SLei Wang // First match all the anchors using the diff algorithm, then split the 2905b6f1511SLei Wang // non-anchor locations between the two anchors evenly, first half are matched 2915b6f1511SLei Wang // based on the start anchor, second half are matched based on the end anchor. 2925b6f1511SLei Wang // For example, given: 2935b6f1511SLei Wang // IR locations: [1, 2(foo), 3, 5, 6(bar), 7] 2945b6f1511SLei Wang // Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9] 2955b6f1511SLei Wang // The matching gives: 2965b6f1511SLei Wang // [1, 2(foo), 3, 5, 6(bar), 7] 2975b6f1511SLei Wang // | | | | | | 2985b6f1511SLei Wang // [1, 2, 3(foo), 4, 7, 8(bar), 9] 2995b6f1511SLei Wang // The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9]. 3005b6f1511SLei Wang void SampleProfileMatcher::runStaleProfileMatching( 3015b6f1511SLei Wang const Function &F, const AnchorMap &IRAnchors, 30218cdfa72SLei Wang const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap, 30318cdfa72SLei Wang bool RunCFGMatching, bool RunCGMatching) { 30418cdfa72SLei Wang if (!RunCFGMatching && !RunCGMatching) 30518cdfa72SLei Wang return; 3065b6f1511SLei Wang LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName() 3075b6f1511SLei Wang << "\n"); 3085b6f1511SLei Wang assert(IRToProfileLocationMap.empty() && 3095b6f1511SLei Wang "Run stale profile matching only once per function"); 3105b6f1511SLei Wang 3115b6f1511SLei Wang AnchorList FilteredProfileAnchorList; 3125b6f1511SLei Wang AnchorList FilteredIRAnchorsList; 31318cdfa72SLei Wang getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList, 31418cdfa72SLei Wang FilteredProfileAnchorList); 3155b6f1511SLei Wang 3165b6f1511SLei Wang if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty()) 3175b6f1511SLei Wang return; 3185b6f1511SLei Wang 31921ba91c4SKrzysztof Pszeniczny if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites || 32021ba91c4SKrzysztof Pszeniczny FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) { 32121ba91c4SKrzysztof Pszeniczny LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName() 32221ba91c4SKrzysztof Pszeniczny << " because the number of callsites in the IR is " 32321ba91c4SKrzysztof Pszeniczny << FilteredIRAnchorsList.size() 32421ba91c4SKrzysztof Pszeniczny << " and in the profile is " 32521ba91c4SKrzysztof Pszeniczny << FilteredProfileAnchorList.size() << "\n"); 32621ba91c4SKrzysztof Pszeniczny return; 32721ba91c4SKrzysztof Pszeniczny } 32821ba91c4SKrzysztof Pszeniczny 3295b6f1511SLei Wang // Match the callsite anchors by finding the longest common subsequence 33018cdfa72SLei Wang // between IR and profile. 33118cdfa72SLei Wang // Define a match between two anchors as follows: 33218cdfa72SLei Wang // 1) The function names of anchors are the same. 33318cdfa72SLei Wang // 2) The similarity between the anchor functions is above a threshold if 33418cdfa72SLei Wang // RunCGMatching is set. 33518cdfa72SLei Wang // For 2), we only consider the anchor functions from IR and profile don't 33618cdfa72SLei Wang // appear on either side to reduce the matching scope. Note that we need to 33718cdfa72SLei Wang // use IR anchor as base(A side) to align with the order of 3385b6f1511SLei Wang // IRToProfileLocationMap. 33918cdfa72SLei Wang LocToLocMap MatchedAnchors = 34018cdfa72SLei Wang longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList, 34118cdfa72SLei Wang RunCGMatching /* Match unused functions */); 34218cdfa72SLei Wang 34318cdfa72SLei Wang // CFG level matching: 34418cdfa72SLei Wang // Apply the callsite matchings to infer matching for the basic 34518cdfa72SLei Wang // block(non-callsite) locations and write the result to 34618cdfa72SLei Wang // IRToProfileLocationMap. 34718cdfa72SLei Wang if (RunCFGMatching) 3485b6f1511SLei Wang matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap); 3495b6f1511SLei Wang } 3505b6f1511SLei Wang 3511d99d7a6SLei Wang void SampleProfileMatcher::runOnFunction(Function &F) { 3521d99d7a6SLei Wang // We need to use flattened function samples for matching. 3531d99d7a6SLei Wang // Unlike IR, which includes all callsites from the source code, the callsites 3541d99d7a6SLei Wang // in profile only show up when they are hit by samples, i,e. the profile 3551d99d7a6SLei Wang // callsites in one context may differ from those in another context. To get 3561d99d7a6SLei Wang // the maximum number of callsites, we merge the function profiles from all 3571d99d7a6SLei Wang // contexts, aka, the flattened profile to find profile anchors. 3586e60330aSLei Wang const auto *FSForMatching = getFlattenedSamplesFor(F); 3596e60330aSLei Wang if (SalvageUnusedProfile && !FSForMatching) { 36018cdfa72SLei Wang // Apply the matching in place to find the new function's matched profile. 36118cdfa72SLei Wang auto R = FuncToProfileNameMap.find(&F); 3626e60330aSLei Wang if (R != FuncToProfileNameMap.end()) { 3636e60330aSLei Wang FSForMatching = getFlattenedSamplesFor(R->second); 3646e60330aSLei Wang // Try to find the salvaged top-level profiles that are explicitly loaded 3656e60330aSLei Wang // for the matching, see "functionMatchesProfileHelper" for the details. 3666e60330aSLei Wang if (!FSForMatching && LoadFuncProfileforCGMatching) 3676e60330aSLei Wang FSForMatching = Reader.getSamplesFor(R->second.stringRef()); 36818cdfa72SLei Wang } 3696e60330aSLei Wang } 3706e60330aSLei Wang if (!FSForMatching) 3711d99d7a6SLei Wang return; 3721d99d7a6SLei Wang 3731d99d7a6SLei Wang // Anchors for IR. It's a map from IR location to callee name, callee name is 3741d99d7a6SLei Wang // empty for non-call instruction and use a dummy name(UnknownIndirectCallee) 3751d99d7a6SLei Wang // for unknown indrect callee name. 3765b6f1511SLei Wang AnchorMap IRAnchors; 3771d99d7a6SLei Wang findIRAnchors(F, IRAnchors); 3781d99d7a6SLei Wang // Anchors for profile. It's a map from callsite location to a set of callee 3791d99d7a6SLei Wang // name. 3805b6f1511SLei Wang AnchorMap ProfileAnchors; 3816e60330aSLei Wang findProfileAnchors(*FSForMatching, ProfileAnchors); 3821d99d7a6SLei Wang 3831d99d7a6SLei Wang // Compute the callsite match states for profile staleness report. 3841d99d7a6SLei Wang if (ReportProfileStaleness || PersistProfileStaleness) 3851d99d7a6SLei Wang recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr); 3861d99d7a6SLei Wang 38718cdfa72SLei Wang if (!SalvageStaleProfile) 38818cdfa72SLei Wang return; 38918cdfa72SLei Wang // For probe-based profiles, run matching only when profile checksum is 39018cdfa72SLei Wang // mismatched. 39118cdfa72SLei Wang bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased && 3926e60330aSLei Wang !ProbeManager->profileIsValid(F, *FSForMatching); 39318cdfa72SLei Wang bool RunCFGMatching = 39418cdfa72SLei Wang !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch; 39518cdfa72SLei Wang bool RunCGMatching = SalvageUnusedProfile; 3961d99d7a6SLei Wang // For imported functions, the checksum metadata(pseudo_probe_desc) are 3971d99d7a6SLei Wang // dropped, so we leverage function attribute(profile-checksum-mismatch) to 3981d99d7a6SLei Wang // transfer the info: add the attribute during pre-link phase and check it 3991d99d7a6SLei Wang // during post-link phase(see "profileIsValid"). 40018cdfa72SLei Wang if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) 4011d99d7a6SLei Wang F.addFnAttr("profile-checksum-mismatch"); 4021d99d7a6SLei Wang 4031d99d7a6SLei Wang // The matching result will be saved to IRToProfileLocationMap, create a 4041d99d7a6SLei Wang // new map for each function. 4051d99d7a6SLei Wang auto &IRToProfileLocationMap = getIRToProfileLocationMap(F); 40618cdfa72SLei Wang runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap, 40718cdfa72SLei Wang RunCFGMatching, RunCGMatching); 4081d99d7a6SLei Wang // Find and update callsite match states after matching. 40918cdfa72SLei Wang if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness)) 4101d99d7a6SLei Wang recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, 4111d99d7a6SLei Wang &IRToProfileLocationMap); 4121d99d7a6SLei Wang } 4131d99d7a6SLei Wang 4141d99d7a6SLei Wang void SampleProfileMatcher::recordCallsiteMatchStates( 4155b6f1511SLei Wang const Function &F, const AnchorMap &IRAnchors, 4165b6f1511SLei Wang const AnchorMap &ProfileAnchors, 4171d99d7a6SLei Wang const LocToLocMap *IRToProfileLocationMap) { 4181d99d7a6SLei Wang bool IsPostMatch = IRToProfileLocationMap != nullptr; 4191d99d7a6SLei Wang auto &CallsiteMatchStates = 4201d99d7a6SLei Wang FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())]; 4211d99d7a6SLei Wang 4221d99d7a6SLei Wang auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) { 4231d99d7a6SLei Wang // IRToProfileLocationMap is null in pre-match phrase. 4241d99d7a6SLei Wang if (!IRToProfileLocationMap) 4251d99d7a6SLei Wang return IRLoc; 4261d99d7a6SLei Wang const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc); 4271d99d7a6SLei Wang if (ProfileLoc != IRToProfileLocationMap->end()) 4281d99d7a6SLei Wang return ProfileLoc->second; 4291d99d7a6SLei Wang else 4301d99d7a6SLei Wang return IRLoc; 4311d99d7a6SLei Wang }; 4321d99d7a6SLei Wang 4331d99d7a6SLei Wang for (const auto &I : IRAnchors) { 4341d99d7a6SLei Wang // After fuzzy profile matching, use the matching result to remap the 4351d99d7a6SLei Wang // current IR callsite. 4361d99d7a6SLei Wang const auto &ProfileLoc = MapIRLocToProfileLoc(I.first); 4375b6f1511SLei Wang const auto &IRCalleeId = I.second; 4381d99d7a6SLei Wang const auto &It = ProfileAnchors.find(ProfileLoc); 4391d99d7a6SLei Wang if (It == ProfileAnchors.end()) 4401d99d7a6SLei Wang continue; 4415b6f1511SLei Wang const auto &ProfCalleeId = It->second; 4425b6f1511SLei Wang if (IRCalleeId == ProfCalleeId) { 4431d99d7a6SLei Wang auto It = CallsiteMatchStates.find(ProfileLoc); 4441d99d7a6SLei Wang if (It == CallsiteMatchStates.end()) 4451d99d7a6SLei Wang CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch); 4461d99d7a6SLei Wang else if (IsPostMatch) { 4471d99d7a6SLei Wang if (It->second == MatchState::InitialMatch) 4481d99d7a6SLei Wang It->second = MatchState::UnchangedMatch; 4491d99d7a6SLei Wang else if (It->second == MatchState::InitialMismatch) 4501d99d7a6SLei Wang It->second = MatchState::RecoveredMismatch; 4511d99d7a6SLei Wang } 4521d99d7a6SLei Wang } 4531d99d7a6SLei Wang } 4541d99d7a6SLei Wang 4551d99d7a6SLei Wang // Check if there are any callsites in the profile that does not match to any 4561d99d7a6SLei Wang // IR callsites. 4571d99d7a6SLei Wang for (const auto &I : ProfileAnchors) { 4581d99d7a6SLei Wang const auto &Loc = I.first; 4595b6f1511SLei Wang assert(!I.second.stringRef().empty() && "Callees should not be empty"); 4601d99d7a6SLei Wang auto It = CallsiteMatchStates.find(Loc); 4611d99d7a6SLei Wang if (It == CallsiteMatchStates.end()) 4621d99d7a6SLei Wang CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch); 4631d99d7a6SLei Wang else if (IsPostMatch) { 4641d99d7a6SLei Wang // Update the state if it's not matched(UnchangedMatch or 4651d99d7a6SLei Wang // RecoveredMismatch). 4661d99d7a6SLei Wang if (It->second == MatchState::InitialMismatch) 4671d99d7a6SLei Wang It->second = MatchState::UnchangedMismatch; 4681d99d7a6SLei Wang else if (It->second == MatchState::InitialMatch) 4691d99d7a6SLei Wang It->second = MatchState::RemovedMatch; 4701d99d7a6SLei Wang } 4711d99d7a6SLei Wang } 4721d99d7a6SLei Wang } 4731d99d7a6SLei Wang 4741d99d7a6SLei Wang void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS, 4751d99d7a6SLei Wang bool IsTopLevel) { 4761d99d7a6SLei Wang const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID()); 4771d99d7a6SLei Wang // Skip the function that is external or renamed. 4781d99d7a6SLei Wang if (!FuncDesc) 4791d99d7a6SLei Wang return; 4801d99d7a6SLei Wang 4811d99d7a6SLei Wang if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) { 4821d99d7a6SLei Wang if (IsTopLevel) 4831d99d7a6SLei Wang NumStaleProfileFunc++; 4841d99d7a6SLei Wang // Given currently all probe ids are after block probe ids, once the 4851d99d7a6SLei Wang // checksum is mismatched, it's likely all the callites are mismatched and 4861d99d7a6SLei Wang // dropped. We conservatively count all the samples as mismatched and stop 4871d99d7a6SLei Wang // counting the inlinees' profiles. 4881d99d7a6SLei Wang MismatchedFunctionSamples += FS.getTotalSamples(); 4891d99d7a6SLei Wang return; 4901d99d7a6SLei Wang } 4911d99d7a6SLei Wang 4921d99d7a6SLei Wang // Even the current-level function checksum is matched, it's possible that the 4931d99d7a6SLei Wang // nested inlinees' checksums are mismatched that affect the inlinee's sample 4941d99d7a6SLei Wang // loading, we need to go deeper to check the inlinees' function samples. 4951d99d7a6SLei Wang // Similarly, count all the samples as mismatched if the inlinee's checksum is 4961d99d7a6SLei Wang // mismatched using this recursive function. 4971d99d7a6SLei Wang for (const auto &I : FS.getCallsiteSamples()) 4981d99d7a6SLei Wang for (const auto &CS : I.second) 4991d99d7a6SLei Wang countMismatchedFuncSamples(CS.second, false); 5001d99d7a6SLei Wang } 5011d99d7a6SLei Wang 5021d99d7a6SLei Wang void SampleProfileMatcher::countMismatchedCallsiteSamples( 5031d99d7a6SLei Wang const FunctionSamples &FS) { 5041d99d7a6SLei Wang auto It = FuncCallsiteMatchStates.find(FS.getFuncName()); 5051d99d7a6SLei Wang // Skip it if no mismatched callsite or this is an external function. 5061d99d7a6SLei Wang if (It == FuncCallsiteMatchStates.end() || It->second.empty()) 5071d99d7a6SLei Wang return; 5081d99d7a6SLei Wang const auto &CallsiteMatchStates = It->second; 5091d99d7a6SLei Wang 5101d99d7a6SLei Wang auto findMatchState = [&](const LineLocation &Loc) { 5111d99d7a6SLei Wang auto It = CallsiteMatchStates.find(Loc); 5121d99d7a6SLei Wang if (It == CallsiteMatchStates.end()) 5131d99d7a6SLei Wang return MatchState::Unknown; 5141d99d7a6SLei Wang return It->second; 5151d99d7a6SLei Wang }; 5161d99d7a6SLei Wang 5171d99d7a6SLei Wang auto AttributeMismatchedSamples = [&](const enum MatchState &State, 5181d99d7a6SLei Wang uint64_t Samples) { 5191d99d7a6SLei Wang if (isMismatchState(State)) 5201d99d7a6SLei Wang MismatchedCallsiteSamples += Samples; 5211d99d7a6SLei Wang else if (State == MatchState::RecoveredMismatch) 5221d99d7a6SLei Wang RecoveredCallsiteSamples += Samples; 5231d99d7a6SLei Wang }; 5241d99d7a6SLei Wang 5251d99d7a6SLei Wang // The non-inlined callsites are saved in the body samples of function 5261d99d7a6SLei Wang // profile, go through it to count the non-inlined callsite samples. 5271d99d7a6SLei Wang for (const auto &I : FS.getBodySamples()) 5281d99d7a6SLei Wang AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples()); 5291d99d7a6SLei Wang 5301d99d7a6SLei Wang // Count the inlined callsite samples. 5311d99d7a6SLei Wang for (const auto &I : FS.getCallsiteSamples()) { 5321d99d7a6SLei Wang auto State = findMatchState(I.first); 5331d99d7a6SLei Wang uint64_t CallsiteSamples = 0; 5341d99d7a6SLei Wang for (const auto &CS : I.second) 5351d99d7a6SLei Wang CallsiteSamples += CS.second.getTotalSamples(); 5361d99d7a6SLei Wang AttributeMismatchedSamples(State, CallsiteSamples); 5371d99d7a6SLei Wang 5381d99d7a6SLei Wang if (isMismatchState(State)) 5391d99d7a6SLei Wang continue; 5401d99d7a6SLei Wang 5411d99d7a6SLei Wang // When the current level of inlined call site matches the profiled call 5421d99d7a6SLei Wang // site, we need to go deeper along the inline tree to count mismatches from 5431d99d7a6SLei Wang // lower level inlinees. 5441d99d7a6SLei Wang for (const auto &CS : I.second) 5451d99d7a6SLei Wang countMismatchedCallsiteSamples(CS.second); 5461d99d7a6SLei Wang } 5471d99d7a6SLei Wang } 5481d99d7a6SLei Wang 5491d99d7a6SLei Wang void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) { 5501d99d7a6SLei Wang auto It = FuncCallsiteMatchStates.find(FS.getFuncName()); 5511d99d7a6SLei Wang // Skip it if no mismatched callsite or this is an external function. 5521d99d7a6SLei Wang if (It == FuncCallsiteMatchStates.end() || It->second.empty()) 5531d99d7a6SLei Wang return; 5541d99d7a6SLei Wang const auto &MatchStates = It->second; 5551d99d7a6SLei Wang [[maybe_unused]] bool OnInitialState = 5561d99d7a6SLei Wang isInitialState(MatchStates.begin()->second); 5571d99d7a6SLei Wang for (const auto &I : MatchStates) { 5581d99d7a6SLei Wang TotalProfiledCallsites++; 5591d99d7a6SLei Wang assert( 5601d99d7a6SLei Wang (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) && 5611d99d7a6SLei Wang "Profile matching state is inconsistent"); 5621d99d7a6SLei Wang 5631d99d7a6SLei Wang if (isMismatchState(I.second)) 5641d99d7a6SLei Wang NumMismatchedCallsites++; 5651d99d7a6SLei Wang else if (I.second == MatchState::RecoveredMismatch) 5661d99d7a6SLei Wang NumRecoveredCallsites++; 5671d99d7a6SLei Wang } 5681d99d7a6SLei Wang } 5691d99d7a6SLei Wang 57018cdfa72SLei Wang void SampleProfileMatcher::countCallGraphRecoveredSamples( 57118cdfa72SLei Wang const FunctionSamples &FS, 57218cdfa72SLei Wang std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) { 57318cdfa72SLei Wang if (CallGraphRecoveredProfiles.count(FS.getFunction())) { 57418cdfa72SLei Wang NumCallGraphRecoveredFuncSamples += FS.getTotalSamples(); 57518cdfa72SLei Wang return; 57618cdfa72SLei Wang } 57718cdfa72SLei Wang 57818cdfa72SLei Wang for (const auto &CM : FS.getCallsiteSamples()) { 57918cdfa72SLei Wang for (const auto &CS : CM.second) { 58018cdfa72SLei Wang countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles); 58118cdfa72SLei Wang } 58218cdfa72SLei Wang } 58318cdfa72SLei Wang } 58418cdfa72SLei Wang 5851d99d7a6SLei Wang void SampleProfileMatcher::computeAndReportProfileStaleness() { 5861d99d7a6SLei Wang if (!ReportProfileStaleness && !PersistProfileStaleness) 5871d99d7a6SLei Wang return; 5881d99d7a6SLei Wang 58918cdfa72SLei Wang std::unordered_set<FunctionId> CallGraphRecoveredProfiles; 59018cdfa72SLei Wang if (SalvageUnusedProfile) { 59118cdfa72SLei Wang for (const auto &I : FuncToProfileNameMap) { 59218cdfa72SLei Wang CallGraphRecoveredProfiles.insert(I.second); 59318cdfa72SLei Wang if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage())) 59418cdfa72SLei Wang continue; 59518cdfa72SLei Wang NumCallGraphRecoveredProfiledFunc++; 59618cdfa72SLei Wang } 59718cdfa72SLei Wang } 59818cdfa72SLei Wang 5991d99d7a6SLei Wang // Count profile mismatches for profile staleness report. 6001d99d7a6SLei Wang for (const auto &F : M) { 6011d99d7a6SLei Wang if (skipProfileForFunction(F)) 6021d99d7a6SLei Wang continue; 6031d99d7a6SLei Wang // As the stats will be merged by linker, skip reporting the metrics for 6041d99d7a6SLei Wang // imported functions to avoid repeated counting. 6051d99d7a6SLei Wang if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage())) 6061d99d7a6SLei Wang continue; 6071d99d7a6SLei Wang const auto *FS = Reader.getSamplesFor(F); 6081d99d7a6SLei Wang if (!FS) 6091d99d7a6SLei Wang continue; 6101d99d7a6SLei Wang TotalProfiledFunc++; 6111d99d7a6SLei Wang TotalFunctionSamples += FS->getTotalSamples(); 6121d99d7a6SLei Wang 61318cdfa72SLei Wang if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty()) 61418cdfa72SLei Wang countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles); 61518cdfa72SLei Wang 6161d99d7a6SLei Wang // Checksum mismatch is only used in pseudo-probe mode. 6171d99d7a6SLei Wang if (FunctionSamples::ProfileIsProbeBased) 6181d99d7a6SLei Wang countMismatchedFuncSamples(*FS, true); 6191d99d7a6SLei Wang 6201d99d7a6SLei Wang // Count mismatches and samples for calliste. 6211d99d7a6SLei Wang countMismatchCallsites(*FS); 6221d99d7a6SLei Wang countMismatchedCallsiteSamples(*FS); 6231d99d7a6SLei Wang } 6241d99d7a6SLei Wang 6251d99d7a6SLei Wang if (ReportProfileStaleness) { 6261d99d7a6SLei Wang if (FunctionSamples::ProfileIsProbeBased) { 6271d99d7a6SLei Wang errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc 6281d99d7a6SLei Wang << ") of functions' profile are invalid and (" 6291d99d7a6SLei Wang << MismatchedFunctionSamples << "/" << TotalFunctionSamples 6301d99d7a6SLei Wang << ") of samples are discarded due to function hash mismatch.\n"; 6311d99d7a6SLei Wang } 63218cdfa72SLei Wang if (SalvageUnusedProfile) { 63318cdfa72SLei Wang errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/" 63418cdfa72SLei Wang << TotalProfiledFunc << ") of functions' profile are matched and (" 63518cdfa72SLei Wang << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples 63618cdfa72SLei Wang << ") of samples are reused by call graph matching.\n"; 63718cdfa72SLei Wang } 63818cdfa72SLei Wang 6391d99d7a6SLei Wang errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/" 6401d99d7a6SLei Wang << TotalProfiledCallsites 6411d99d7a6SLei Wang << ") of callsites' profile are invalid and (" 6421d99d7a6SLei Wang << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/" 6431d99d7a6SLei Wang << TotalFunctionSamples 6441d99d7a6SLei Wang << ") of samples are discarded due to callsite location mismatch.\n"; 6451d99d7a6SLei Wang errs() << "(" << NumRecoveredCallsites << "/" 6461d99d7a6SLei Wang << (NumRecoveredCallsites + NumMismatchedCallsites) 6471d99d7a6SLei Wang << ") of callsites and (" << RecoveredCallsiteSamples << "/" 6481d99d7a6SLei Wang << (RecoveredCallsiteSamples + MismatchedCallsiteSamples) 6491d99d7a6SLei Wang << ") of samples are recovered by stale profile matching.\n"; 6501d99d7a6SLei Wang } 6511d99d7a6SLei Wang 6521d99d7a6SLei Wang if (PersistProfileStaleness) { 6531d99d7a6SLei Wang LLVMContext &Ctx = M.getContext(); 6541d99d7a6SLei Wang MDBuilder MDB(Ctx); 6551d99d7a6SLei Wang 6561d99d7a6SLei Wang SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec; 6571d99d7a6SLei Wang if (FunctionSamples::ProfileIsProbeBased) { 6581d99d7a6SLei Wang ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc); 6591d99d7a6SLei Wang ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc); 6601d99d7a6SLei Wang ProfStatsVec.emplace_back("MismatchedFunctionSamples", 6611d99d7a6SLei Wang MismatchedFunctionSamples); 6621d99d7a6SLei Wang ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples); 6631d99d7a6SLei Wang } 6641d99d7a6SLei Wang 66518cdfa72SLei Wang if (SalvageUnusedProfile) { 66618cdfa72SLei Wang ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc", 66718cdfa72SLei Wang NumCallGraphRecoveredProfiledFunc); 66818cdfa72SLei Wang ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples", 66918cdfa72SLei Wang NumCallGraphRecoveredFuncSamples); 67018cdfa72SLei Wang } 67118cdfa72SLei Wang 6721d99d7a6SLei Wang ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites); 6731d99d7a6SLei Wang ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites); 6741d99d7a6SLei Wang ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites); 6751d99d7a6SLei Wang ProfStatsVec.emplace_back("MismatchedCallsiteSamples", 6761d99d7a6SLei Wang MismatchedCallsiteSamples); 6771d99d7a6SLei Wang ProfStatsVec.emplace_back("RecoveredCallsiteSamples", 6781d99d7a6SLei Wang RecoveredCallsiteSamples); 6791d99d7a6SLei Wang 6801d99d7a6SLei Wang auto *MD = MDB.createLLVMStats(ProfStatsVec); 6811d99d7a6SLei Wang auto *NMD = M.getOrInsertNamedMetadata("llvm.stats"); 6821d99d7a6SLei Wang NMD->addOperand(MD); 6831d99d7a6SLei Wang } 6841d99d7a6SLei Wang } 6851d99d7a6SLei Wang 68618cdfa72SLei Wang void SampleProfileMatcher::findFunctionsWithoutProfile() { 68718cdfa72SLei Wang // TODO: Support MD5 profile. 68818cdfa72SLei Wang if (FunctionSamples::UseMD5) 68918cdfa72SLei Wang return; 69018cdfa72SLei Wang StringSet<> NamesInProfile; 69118cdfa72SLei Wang if (auto NameTable = Reader.getNameTable()) { 69218cdfa72SLei Wang for (auto Name : *NameTable) 69318cdfa72SLei Wang NamesInProfile.insert(Name.stringRef()); 69418cdfa72SLei Wang } 69518cdfa72SLei Wang 69618cdfa72SLei Wang for (auto &F : M) { 69718cdfa72SLei Wang // Skip declarations, as even if the function can be matched, we have 69818cdfa72SLei Wang // nothing to do with it. 69918cdfa72SLei Wang if (F.isDeclaration()) 70018cdfa72SLei Wang continue; 70118cdfa72SLei Wang 70218cdfa72SLei Wang StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName()); 70318cdfa72SLei Wang const auto *FS = getFlattenedSamplesFor(F); 70418cdfa72SLei Wang if (FS) 70518cdfa72SLei Wang continue; 70618cdfa72SLei Wang 70718cdfa72SLei Wang // For extended binary, functions fully inlined may not be loaded in the 70818cdfa72SLei Wang // top-level profile, so check the NameTable which has the all symbol names 70918cdfa72SLei Wang // in profile. 71018cdfa72SLei Wang if (NamesInProfile.count(CanonFName)) 71118cdfa72SLei Wang continue; 71218cdfa72SLei Wang 71318cdfa72SLei Wang // For extended binary, non-profiled function symbols are in the profile 71418cdfa72SLei Wang // symbol list table. 71518cdfa72SLei Wang if (PSL && PSL->contains(CanonFName)) 71618cdfa72SLei Wang continue; 71718cdfa72SLei Wang 71818cdfa72SLei Wang LLVM_DEBUG(dbgs() << "Function " << CanonFName 71918cdfa72SLei Wang << " is not in profile or profile symbol list.\n"); 72018cdfa72SLei Wang FunctionsWithoutProfile[FunctionId(CanonFName)] = &F; 72118cdfa72SLei Wang } 72218cdfa72SLei Wang } 72318cdfa72SLei Wang 72418cdfa72SLei Wang bool SampleProfileMatcher::functionMatchesProfileHelper( 72518cdfa72SLei Wang const Function &IRFunc, const FunctionId &ProfFunc) { 72618cdfa72SLei Wang // The value is in the range [0, 1]. The bigger the value is, the more similar 72718cdfa72SLei Wang // two sequences are. 72818cdfa72SLei Wang float Similarity = 0.0; 72918cdfa72SLei Wang 7306e60330aSLei Wang const auto *FSForMatching = getFlattenedSamplesFor(ProfFunc); 7316e60330aSLei Wang // With extbinary profile format, initial profile loading only reads profile 7326e60330aSLei Wang // based on current function names in the module. 7336e60330aSLei Wang // However, if a function is renamed, sample loader skips to load its original 7346e60330aSLei Wang // profile(which has a different name), we will miss this case. To address 7356e60330aSLei Wang // this, we load the top-level profile candidate explicitly for the matching. 7366e60330aSLei Wang if (!FSForMatching && LoadFuncProfileforCGMatching) { 7376e60330aSLei Wang DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()}); 7386e60330aSLei Wang if (std::error_code EC = Reader.read(TopLevelFunc)) 7396e60330aSLei Wang return false; 7406e60330aSLei Wang FSForMatching = Reader.getSamplesFor(ProfFunc.stringRef()); 7416e60330aSLei Wang LLVM_DEBUG({ 7426e60330aSLei Wang if (FSForMatching) 7436e60330aSLei Wang dbgs() << "Read top-level function " << ProfFunc 7446e60330aSLei Wang << " for call-graph matching\n"; 7456e60330aSLei Wang }); 7466e60330aSLei Wang } 7476e60330aSLei Wang if (!FSForMatching) 74818cdfa72SLei Wang return false; 74918cdfa72SLei Wang // The check for similarity or checksum may not be reliable if the function is 75018cdfa72SLei Wang // tiny, we use the number of basic block as a proxy for the function 75118cdfa72SLei Wang // complexity and skip the matching if it's too small. 75218cdfa72SLei Wang if (IRFunc.size() < MinFuncCountForCGMatching || 7536e60330aSLei Wang FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching) 75418cdfa72SLei Wang return false; 75518cdfa72SLei Wang 75618cdfa72SLei Wang // For probe-based function, we first trust the checksum info. If the checksum 75718cdfa72SLei Wang // doesn't match, we continue checking for similarity. 75818cdfa72SLei Wang if (FunctionSamples::ProfileIsProbeBased) { 75918cdfa72SLei Wang const auto *FuncDesc = ProbeManager->getDesc(IRFunc); 76018cdfa72SLei Wang if (FuncDesc && 7616e60330aSLei Wang !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSForMatching)) { 76218cdfa72SLei Wang LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName() 76318cdfa72SLei Wang << "(IR) and " << ProfFunc << "(Profile) match.\n"); 76418cdfa72SLei Wang 76518cdfa72SLei Wang return true; 76618cdfa72SLei Wang } 76718cdfa72SLei Wang } 76818cdfa72SLei Wang 76918cdfa72SLei Wang AnchorMap IRAnchors; 77018cdfa72SLei Wang findIRAnchors(IRFunc, IRAnchors); 77118cdfa72SLei Wang AnchorMap ProfileAnchors; 7726e60330aSLei Wang findProfileAnchors(*FSForMatching, ProfileAnchors); 77318cdfa72SLei Wang 77418cdfa72SLei Wang AnchorList FilteredIRAnchorsList; 77518cdfa72SLei Wang AnchorList FilteredProfileAnchorList; 77618cdfa72SLei Wang getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList, 77718cdfa72SLei Wang FilteredProfileAnchorList); 77818cdfa72SLei Wang 77918cdfa72SLei Wang // Similarly skip the matching if the num of anchors is not enough. 78018cdfa72SLei Wang if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching || 78118cdfa72SLei Wang FilteredProfileAnchorList.size() < MinCallCountForCGMatching) 78218cdfa72SLei Wang return false; 78318cdfa72SLei Wang 78418cdfa72SLei Wang // Use the diff algorithm to find the LCS between IR and profile. 78518cdfa72SLei Wang 78618cdfa72SLei Wang // Don't recursively match the callee function to avoid infinite matching, 78718cdfa72SLei Wang // callee functions will be handled later since it's processed in top-down 78818cdfa72SLei Wang // order . 78918cdfa72SLei Wang LocToLocMap MatchedAnchors = 79018cdfa72SLei Wang longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList, 79118cdfa72SLei Wang false /* Match unused functions */); 79218cdfa72SLei Wang 79318cdfa72SLei Wang Similarity = 79418cdfa72SLei Wang static_cast<float>(MatchedAnchors.size()) * 2 / 79518cdfa72SLei Wang (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size()); 79618cdfa72SLei Wang 79718cdfa72SLei Wang LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName() 79818cdfa72SLei Wang << "(IR) and " << ProfFunc << "(profile) is " 79918cdfa72SLei Wang << format("%.2f", Similarity) << "\n"); 80018cdfa72SLei Wang assert((Similarity >= 0 && Similarity <= 1.0) && 80118cdfa72SLei Wang "Similarity value should be in [0, 1]"); 80218cdfa72SLei Wang return Similarity * 100 > FuncProfileSimilarityThreshold; 80318cdfa72SLei Wang } 80418cdfa72SLei Wang 80518cdfa72SLei Wang // If FindMatchedProfileOnly is set to true, only use the processed function 80618cdfa72SLei Wang // results. This is used for skipping the repeated recursive matching. 80718cdfa72SLei Wang bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc, 80818cdfa72SLei Wang const FunctionId &ProfFunc, 80918cdfa72SLei Wang bool FindMatchedProfileOnly) { 81018cdfa72SLei Wang auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc}); 81118cdfa72SLei Wang if (R != FuncProfileMatchCache.end()) 81218cdfa72SLei Wang return R->second; 81318cdfa72SLei Wang 81418cdfa72SLei Wang if (FindMatchedProfileOnly) 81518cdfa72SLei Wang return false; 81618cdfa72SLei Wang 81718cdfa72SLei Wang bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc); 81818cdfa72SLei Wang FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched; 81918cdfa72SLei Wang if (Matched) { 82018cdfa72SLei Wang FuncToProfileNameMap[&IRFunc] = ProfFunc; 82118cdfa72SLei Wang LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName() 82218cdfa72SLei Wang << " matches profile:" << ProfFunc << "\n"); 82318cdfa72SLei Wang } 82418cdfa72SLei Wang 82518cdfa72SLei Wang return Matched; 82618cdfa72SLei Wang } 82718cdfa72SLei Wang 8286e60330aSLei Wang void SampleProfileMatcher::UpdateWithSalvagedProfiles() { 8296e60330aSLei Wang DenseSet<StringRef> ProfileSalvagedFuncs; 8306e60330aSLei Wang // Update FuncNameToProfNameMap and SymbolMap. 8316e60330aSLei Wang for (auto &I : FuncToProfileNameMap) { 8326e60330aSLei Wang assert(I.first && "New function is null"); 8336e60330aSLei Wang FunctionId FuncName(I.first->getName()); 8346e60330aSLei Wang ProfileSalvagedFuncs.insert(I.second.stringRef()); 8356e60330aSLei Wang FuncNameToProfNameMap->emplace(FuncName, I.second); 8366e60330aSLei Wang 8376e60330aSLei Wang // We need to remove the old entry to avoid duplicating the function 8386e60330aSLei Wang // processing. 8396e60330aSLei Wang SymbolMap->erase(FuncName); 8406e60330aSLei Wang SymbolMap->emplace(I.second, I.first); 8416e60330aSLei Wang } 8426e60330aSLei Wang 8436e60330aSLei Wang // With extbinary profile format, initial profile loading only reads profile 8446e60330aSLei Wang // based on current function names in the module, so we need to load top-level 8456e60330aSLei Wang // profiles for functions with different profile name explicitly after 8466e60330aSLei Wang // function-profile name map is established with stale profile matching. 8476e60330aSLei Wang Reader.read(ProfileSalvagedFuncs); 8486e60330aSLei Wang Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap); 8496e60330aSLei Wang } 8506e60330aSLei Wang 8511d99d7a6SLei Wang void SampleProfileMatcher::runOnModule() { 8521d99d7a6SLei Wang ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles, 8531d99d7a6SLei Wang FunctionSamples::ProfileIsCS); 85418cdfa72SLei Wang if (SalvageUnusedProfile) 85518cdfa72SLei Wang findFunctionsWithoutProfile(); 85618cdfa72SLei Wang 85718cdfa72SLei Wang // Process the matching in top-down order so that the caller matching result 85818cdfa72SLei Wang // can be used to the callee matching. 85918cdfa72SLei Wang std::vector<Function *> TopDownFunctionList; 86018cdfa72SLei Wang TopDownFunctionList.reserve(M.size()); 86118cdfa72SLei Wang buildTopDownFuncOrder(CG, TopDownFunctionList); 86218cdfa72SLei Wang for (auto *F : TopDownFunctionList) { 86318cdfa72SLei Wang if (skipProfileForFunction(*F)) 8641d99d7a6SLei Wang continue; 86518cdfa72SLei Wang runOnFunction(*F); 8661d99d7a6SLei Wang } 86718cdfa72SLei Wang 86818cdfa72SLei Wang if (SalvageUnusedProfile) 8696e60330aSLei Wang UpdateWithSalvagedProfiles(); 87018cdfa72SLei Wang 8711d99d7a6SLei Wang if (SalvageStaleProfile) 8721d99d7a6SLei Wang distributeIRToProfileLocationMap(); 8731d99d7a6SLei Wang 8741d99d7a6SLei Wang computeAndReportProfileStaleness(); 8751d99d7a6SLei Wang } 8761d99d7a6SLei Wang 8771d99d7a6SLei Wang void SampleProfileMatcher::distributeIRToProfileLocationMap( 8781d99d7a6SLei Wang FunctionSamples &FS) { 8791d99d7a6SLei Wang const auto ProfileMappings = FuncMappings.find(FS.getFuncName()); 8801d99d7a6SLei Wang if (ProfileMappings != FuncMappings.end()) { 8811d99d7a6SLei Wang FS.setIRToProfileLocationMap(&(ProfileMappings->second)); 8821d99d7a6SLei Wang } 8831d99d7a6SLei Wang 8841d99d7a6SLei Wang for (auto &Callees : 8851d99d7a6SLei Wang const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) { 8861d99d7a6SLei Wang for (auto &FS : Callees.second) { 8871d99d7a6SLei Wang distributeIRToProfileLocationMap(FS.second); 8881d99d7a6SLei Wang } 8891d99d7a6SLei Wang } 8901d99d7a6SLei Wang } 8911d99d7a6SLei Wang 8921d99d7a6SLei Wang // Use a central place to distribute the matching results. Outlined and inlined 8931d99d7a6SLei Wang // profile with the function name will be set to the same pointer. 8941d99d7a6SLei Wang void SampleProfileMatcher::distributeIRToProfileLocationMap() { 8951d99d7a6SLei Wang for (auto &I : Reader.getProfiles()) { 8961d99d7a6SLei Wang distributeIRToProfileLocationMap(I.second); 8971d99d7a6SLei Wang } 8981d99d7a6SLei Wang } 899