xref: /llvm-project/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp (revision 3a5791e75c53234bb25cb7a427c990d3cb6a0883)
11d99d7a6SLei Wang //===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//
21d99d7a6SLei Wang //
31d99d7a6SLei Wang // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
41d99d7a6SLei Wang // See https://llvm.org/LICENSE.txt for license information.
51d99d7a6SLei Wang // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
61d99d7a6SLei Wang //
71d99d7a6SLei Wang //===----------------------------------------------------------------------===//
81d99d7a6SLei Wang //
91d99d7a6SLei Wang // This file implements the SampleProfileMatcher used for stale
101d99d7a6SLei Wang // profile matching.
111d99d7a6SLei Wang //
121d99d7a6SLei Wang //===----------------------------------------------------------------------===//
131d99d7a6SLei Wang 
141d99d7a6SLei Wang #include "llvm/Transforms/IPO/SampleProfileMatcher.h"
151d99d7a6SLei Wang #include "llvm/IR/IntrinsicInst.h"
161d99d7a6SLei Wang #include "llvm/IR/MDBuilder.h"
1721ba91c4SKrzysztof Pszeniczny #include "llvm/Support/CommandLine.h"
18*3a5791e7SKazu Hirata #include "llvm/Transforms/Utils/LongestCommonSequence.h"
191d99d7a6SLei Wang 
201d99d7a6SLei Wang using namespace llvm;
211d99d7a6SLei Wang using namespace sampleprof;
221d99d7a6SLei Wang 
231d99d7a6SLei Wang #define DEBUG_TYPE "sample-profile-matcher"
241d99d7a6SLei Wang 
2518cdfa72SLei Wang static cl::opt<unsigned> FuncProfileSimilarityThreshold(
2618cdfa72SLei Wang     "func-profile-similarity-threshold", cl::Hidden, cl::init(80),
2718cdfa72SLei Wang     cl::desc("Consider a profile matches a function if the similarity of their "
2818cdfa72SLei Wang              "callee sequences is above the specified percentile."));
2918cdfa72SLei Wang 
3018cdfa72SLei Wang static cl::opt<unsigned> MinFuncCountForCGMatching(
3118cdfa72SLei Wang     "min-func-count-for-cg-matching", cl::Hidden, cl::init(5),
3218cdfa72SLei Wang     cl::desc("The minimum number of basic blocks required for a function to "
3318cdfa72SLei Wang              "run stale profile call graph matching."));
3418cdfa72SLei Wang 
3518cdfa72SLei Wang static cl::opt<unsigned> MinCallCountForCGMatching(
3618cdfa72SLei Wang     "min-call-count-for-cg-matching", cl::Hidden, cl::init(3),
3718cdfa72SLei Wang     cl::desc("The minimum number of call anchors required for a function to "
3818cdfa72SLei Wang              "run stale profile call graph matching."));
3918cdfa72SLei Wang 
406e60330aSLei Wang static cl::opt<bool> LoadFuncProfileforCGMatching(
416e60330aSLei Wang     "load-func-profile-for-cg-matching", cl::Hidden, cl::init(false),
426e60330aSLei Wang     cl::desc(
436e60330aSLei Wang         "Load top-level profiles that the sample reader initially skipped for "
446e60330aSLei Wang         "the call-graph matching (only meaningful for extended binary "
456e60330aSLei Wang         "format)"));
466e60330aSLei Wang 
471d99d7a6SLei Wang extern cl::opt<bool> SalvageStaleProfile;
4818cdfa72SLei Wang extern cl::opt<bool> SalvageUnusedProfile;
491d99d7a6SLei Wang extern cl::opt<bool> PersistProfileStaleness;
501d99d7a6SLei Wang extern cl::opt<bool> ReportProfileStaleness;
511d99d7a6SLei Wang 
5221ba91c4SKrzysztof Pszeniczny static cl::opt<unsigned> SalvageStaleProfileMaxCallsites(
5321ba91c4SKrzysztof Pszeniczny     "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),
5421ba91c4SKrzysztof Pszeniczny     cl::desc("The maximum number of callsites in a function, above which stale "
5521ba91c4SKrzysztof Pszeniczny              "profile matching will be skipped."));
5621ba91c4SKrzysztof Pszeniczny 
575b6f1511SLei Wang void SampleProfileMatcher::findIRAnchors(const Function &F,
5818cdfa72SLei Wang                                          AnchorMap &IRAnchors) const {
591d99d7a6SLei Wang   // For inlined code, recover the original callsite and callee by finding the
601d99d7a6SLei Wang   // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
611d99d7a6SLei Wang   // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
621d99d7a6SLei Wang   auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
631d99d7a6SLei Wang     assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
641d99d7a6SLei Wang     const DILocation *PrevDIL = nullptr;
651d99d7a6SLei Wang     do {
661d99d7a6SLei Wang       PrevDIL = DIL;
671d99d7a6SLei Wang       DIL = DIL->getInlinedAt();
681d99d7a6SLei Wang     } while (DIL->getInlinedAt());
691d99d7a6SLei Wang 
70e06d6ed1SKrzysztof Pszeniczny     LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
71e06d6ed1SKrzysztof Pszeniczny         DIL, FunctionSamples::ProfileIsFS);
721d99d7a6SLei Wang     StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
735b6f1511SLei Wang     return std::make_pair(Callsite, FunctionId(CalleeName));
741d99d7a6SLei Wang   };
751d99d7a6SLei Wang 
761d99d7a6SLei Wang   auto GetCanonicalCalleeName = [](const CallBase *CB) {
771d99d7a6SLei Wang     StringRef CalleeName = UnknownIndirectCallee;
781d99d7a6SLei Wang     if (Function *Callee = CB->getCalledFunction())
791d99d7a6SLei Wang       CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
801d99d7a6SLei Wang     return CalleeName;
811d99d7a6SLei Wang   };
821d99d7a6SLei Wang 
831d99d7a6SLei Wang   // Extract profile matching anchors in the IR.
841d99d7a6SLei Wang   for (auto &BB : F) {
851d99d7a6SLei Wang     for (auto &I : BB) {
861d99d7a6SLei Wang       DILocation *DIL = I.getDebugLoc();
871d99d7a6SLei Wang       if (!DIL)
881d99d7a6SLei Wang         continue;
891d99d7a6SLei Wang 
901d99d7a6SLei Wang       if (FunctionSamples::ProfileIsProbeBased) {
911d99d7a6SLei Wang         if (auto Probe = extractProbe(I)) {
921d99d7a6SLei Wang           // Flatten inlined IR for the matching.
931d99d7a6SLei Wang           if (DIL->getInlinedAt()) {
941d99d7a6SLei Wang             IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
951d99d7a6SLei Wang           } else {
961d99d7a6SLei Wang             // Use empty StringRef for basic block probe.
971d99d7a6SLei Wang             StringRef CalleeName;
981d99d7a6SLei Wang             if (const auto *CB = dyn_cast<CallBase>(&I)) {
991d99d7a6SLei Wang               // Skip the probe inst whose callee name is "llvm.pseudoprobe".
1001d99d7a6SLei Wang               if (!isa<IntrinsicInst>(&I))
1011d99d7a6SLei Wang                 CalleeName = GetCanonicalCalleeName(CB);
1021d99d7a6SLei Wang             }
1035b6f1511SLei Wang             LineLocation Loc = LineLocation(Probe->Id, 0);
1045b6f1511SLei Wang             IRAnchors.emplace(Loc, FunctionId(CalleeName));
1051d99d7a6SLei Wang           }
1061d99d7a6SLei Wang         }
1071d99d7a6SLei Wang       } else {
1081d99d7a6SLei Wang         // TODO: For line-number based profile(AutoFDO), currently only support
1091d99d7a6SLei Wang         // find callsite anchors. In future, we need to parse all the non-call
1101d99d7a6SLei Wang         // instructions to extract the line locations for profile matching.
1111d99d7a6SLei Wang         if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
1121d99d7a6SLei Wang           continue;
1131d99d7a6SLei Wang 
1141d99d7a6SLei Wang         if (DIL->getInlinedAt()) {
1151d99d7a6SLei Wang           IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
1161d99d7a6SLei Wang         } else {
117e06d6ed1SKrzysztof Pszeniczny           LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
118e06d6ed1SKrzysztof Pszeniczny               DIL, FunctionSamples::ProfileIsFS);
1191d99d7a6SLei Wang           StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
1205b6f1511SLei Wang           IRAnchors.emplace(Callsite, FunctionId(CalleeName));
1211d99d7a6SLei Wang         }
1221d99d7a6SLei Wang       }
1231d99d7a6SLei Wang     }
1241d99d7a6SLei Wang   }
1251d99d7a6SLei Wang }
1261d99d7a6SLei Wang 
1275b6f1511SLei Wang void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
12818cdfa72SLei Wang                                               AnchorMap &ProfileAnchors) const {
1291d99d7a6SLei Wang   auto isInvalidLineOffset = [](uint32_t LineOffset) {
1301d99d7a6SLei Wang     return LineOffset & 0x8000;
1311d99d7a6SLei Wang   };
1321d99d7a6SLei Wang 
1335b6f1511SLei Wang   auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
1345b6f1511SLei Wang                          AnchorMap &ProfileAnchors) {
1355b6f1511SLei Wang     auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);
1365b6f1511SLei Wang     if (!Ret.second) {
1375b6f1511SLei Wang       // For multiple callees, which indicates it's an indirect call, we use a
1385b6f1511SLei Wang       // dummy name(UnknownIndirectCallee) as the indrect callee name.
1395b6f1511SLei Wang       Ret.first->second = FunctionId(UnknownIndirectCallee);
1405b6f1511SLei Wang     }
1415b6f1511SLei Wang   };
1425b6f1511SLei Wang 
1431d99d7a6SLei Wang   for (const auto &I : FS.getBodySamples()) {
1441d99d7a6SLei Wang     const LineLocation &Loc = I.first;
1451d99d7a6SLei Wang     if (isInvalidLineOffset(Loc.LineOffset))
1461d99d7a6SLei Wang       continue;
1475b6f1511SLei Wang     for (const auto &C : I.second.getCallTargets())
1485b6f1511SLei Wang       InsertAnchor(Loc, C.first, ProfileAnchors);
1491d99d7a6SLei Wang   }
1501d99d7a6SLei Wang 
1511d99d7a6SLei Wang   for (const auto &I : FS.getCallsiteSamples()) {
1521d99d7a6SLei Wang     const LineLocation &Loc = I.first;
1531d99d7a6SLei Wang     if (isInvalidLineOffset(Loc.LineOffset))
1541d99d7a6SLei Wang       continue;
1555b6f1511SLei Wang     for (const auto &C : I.second)
1565b6f1511SLei Wang       InsertAnchor(Loc, C.first, ProfileAnchors);
1571d99d7a6SLei Wang   }
1581d99d7a6SLei Wang }
1591d99d7a6SLei Wang 
16018cdfa72SLei Wang bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName,
16118cdfa72SLei Wang                                               Function *&FuncWithoutProfile) {
16218cdfa72SLei Wang   FuncWithoutProfile = nullptr;
16318cdfa72SLei Wang   auto R = FunctionsWithoutProfile.find(IRFuncName);
16418cdfa72SLei Wang   if (R != FunctionsWithoutProfile.end())
16518cdfa72SLei Wang     FuncWithoutProfile = R->second;
16618cdfa72SLei Wang   return !FuncWithoutProfile;
16718cdfa72SLei Wang }
16818cdfa72SLei Wang 
16918cdfa72SLei Wang bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {
17018cdfa72SLei Wang   return SymbolMap->find(ProfileFuncName) == SymbolMap->end();
17118cdfa72SLei Wang }
17218cdfa72SLei Wang 
17318cdfa72SLei Wang bool SampleProfileMatcher::functionMatchesProfile(
17418cdfa72SLei Wang     const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,
17518cdfa72SLei Wang     bool FindMatchedProfileOnly) {
17618cdfa72SLei Wang   if (IRFuncName == ProfileFuncName)
17718cdfa72SLei Wang     return true;
17818cdfa72SLei Wang   if (!SalvageUnusedProfile)
17918cdfa72SLei Wang     return false;
18018cdfa72SLei Wang 
18118cdfa72SLei Wang   // If IR function doesn't have profile and the profile is unused, try
18218cdfa72SLei Wang   // matching them.
18318cdfa72SLei Wang   Function *IRFunc = nullptr;
18418cdfa72SLei Wang   if (functionHasProfile(IRFuncName, IRFunc) ||
18518cdfa72SLei Wang       !isProfileUnused(ProfileFuncName))
18618cdfa72SLei Wang     return false;
18718cdfa72SLei Wang 
18818cdfa72SLei Wang   assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&
18918cdfa72SLei Wang          "IR function should be different from profile function to match");
19018cdfa72SLei Wang   return functionMatchesProfile(*IRFunc, ProfileFuncName,
19118cdfa72SLei Wang                                 FindMatchedProfileOnly);
19218cdfa72SLei Wang }
19318cdfa72SLei Wang 
19418cdfa72SLei Wang LocToLocMap
19518cdfa72SLei Wang SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
19618cdfa72SLei Wang                                             const AnchorList &AnchorList2,
19718cdfa72SLei Wang                                             bool MatchUnusedFunction) {
198*3a5791e7SKazu Hirata   LocToLocMap MatchedAnchors;
199*3a5791e7SKazu Hirata   llvm::longestCommonSequence<LineLocation, FunctionId>(
200*3a5791e7SKazu Hirata       AnchorList1, AnchorList2,
201*3a5791e7SKazu Hirata       [&](const FunctionId &A, const FunctionId &B) {
202*3a5791e7SKazu Hirata         return functionMatchesProfile(
203*3a5791e7SKazu Hirata             A, B,
204*3a5791e7SKazu Hirata             !MatchUnusedFunction // Find matched function only
205*3a5791e7SKazu Hirata         );
206*3a5791e7SKazu Hirata       },
207*3a5791e7SKazu Hirata       [&](LineLocation A, LineLocation B) {
208*3a5791e7SKazu Hirata         MatchedAnchors.try_emplace(A, B);
209*3a5791e7SKazu Hirata       });
210*3a5791e7SKazu Hirata   return MatchedAnchors;
2115b6f1511SLei Wang }
2125b6f1511SLei Wang 
2135b6f1511SLei Wang void SampleProfileMatcher::matchNonCallsiteLocs(
2145b6f1511SLei Wang     const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
2151d99d7a6SLei Wang     LocToLocMap &IRToProfileLocationMap) {
2161d99d7a6SLei Wang   auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
2171d99d7a6SLei Wang     // Skip the unchanged location mapping to save memory.
2181d99d7a6SLei Wang     if (From != To)
2191d99d7a6SLei Wang       IRToProfileLocationMap.insert({From, To});
2201d99d7a6SLei Wang   };
2211d99d7a6SLei Wang 
2221d99d7a6SLei Wang   // Use function's beginning location as the initial anchor.
2231d99d7a6SLei Wang   int32_t LocationDelta = 0;
2241d99d7a6SLei Wang   SmallVector<LineLocation> LastMatchedNonAnchors;
2251d99d7a6SLei Wang   for (const auto &IR : IRAnchors) {
2261d99d7a6SLei Wang     const auto &Loc = IR.first;
2271d99d7a6SLei Wang     bool IsMatchedAnchor = false;
2281d99d7a6SLei Wang     // Match the anchor location in lexical order.
2295b6f1511SLei Wang     auto R = MatchedAnchors.find(Loc);
2305b6f1511SLei Wang     if (R != MatchedAnchors.end()) {
2315b6f1511SLei Wang       const auto &Candidate = R->second;
2321d99d7a6SLei Wang       InsertMatching(Loc, Candidate);
2335b6f1511SLei Wang       LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
2341d99d7a6SLei Wang                         << " is matched from " << Loc << " to " << Candidate
2351d99d7a6SLei Wang                         << "\n");
2361d99d7a6SLei Wang       LocationDelta = Candidate.LineOffset - Loc.LineOffset;
2371d99d7a6SLei Wang 
2381d99d7a6SLei Wang       // Match backwards for non-anchor locations.
2391d99d7a6SLei Wang       // The locations in LastMatchedNonAnchors have been matched forwards
2401d99d7a6SLei Wang       // based on the previous anchor, spilt it evenly and overwrite the
2411d99d7a6SLei Wang       // second half based on the current anchor.
2421d99d7a6SLei Wang       for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
2431d99d7a6SLei Wang            I < LastMatchedNonAnchors.size(); I++) {
2441d99d7a6SLei Wang         const auto &L = LastMatchedNonAnchors[I];
2451d99d7a6SLei Wang         uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
2461d99d7a6SLei Wang         LineLocation Candidate(CandidateLineOffset, L.Discriminator);
2471d99d7a6SLei Wang         InsertMatching(L, Candidate);
2481d99d7a6SLei Wang         LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
2491d99d7a6SLei Wang                           << " to " << Candidate << "\n");
2501d99d7a6SLei Wang       }
2511d99d7a6SLei Wang 
2521d99d7a6SLei Wang       IsMatchedAnchor = true;
2531d99d7a6SLei Wang       LastMatchedNonAnchors.clear();
2541d99d7a6SLei Wang     }
2551d99d7a6SLei Wang 
2561d99d7a6SLei Wang     // Match forwards for non-anchor locations.
2571d99d7a6SLei Wang     if (!IsMatchedAnchor) {
2581d99d7a6SLei Wang       uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
2591d99d7a6SLei Wang       LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
2601d99d7a6SLei Wang       InsertMatching(Loc, Candidate);
2611d99d7a6SLei Wang       LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
2621d99d7a6SLei Wang                         << Candidate << "\n");
2631d99d7a6SLei Wang       LastMatchedNonAnchors.emplace_back(Loc);
2641d99d7a6SLei Wang     }
2651d99d7a6SLei Wang   }
2661d99d7a6SLei Wang }
2671d99d7a6SLei Wang 
26818cdfa72SLei Wang // Filter the non-call locations from IRAnchors and ProfileAnchors and write
26918cdfa72SLei Wang // them into a list for random access later.
27018cdfa72SLei Wang void SampleProfileMatcher::getFilteredAnchorList(
27118cdfa72SLei Wang     const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,
27218cdfa72SLei Wang     AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {
27318cdfa72SLei Wang   for (const auto &I : IRAnchors) {
27418cdfa72SLei Wang     if (I.second.stringRef().empty())
27518cdfa72SLei Wang       continue;
27618cdfa72SLei Wang     FilteredIRAnchorsList.emplace_back(I);
27718cdfa72SLei Wang   }
27818cdfa72SLei Wang 
27918cdfa72SLei Wang   for (const auto &I : ProfileAnchors)
28018cdfa72SLei Wang     FilteredProfileAnchorList.emplace_back(I);
28118cdfa72SLei Wang }
28218cdfa72SLei Wang 
2835b6f1511SLei Wang // Call target name anchor based profile fuzzy matching.
2845b6f1511SLei Wang // Input:
2855b6f1511SLei Wang // For IR locations, the anchor is the callee name of direct callsite; For
2865b6f1511SLei Wang // profile locations, it's the call target name for BodySamples or inlinee's
2875b6f1511SLei Wang // profile name for CallsiteSamples.
2885b6f1511SLei Wang // Matching heuristic:
2895b6f1511SLei Wang // First match all the anchors using the diff algorithm, then split the
2905b6f1511SLei Wang // non-anchor locations between the two anchors evenly, first half are matched
2915b6f1511SLei Wang // based on the start anchor, second half are matched based on the end anchor.
2925b6f1511SLei Wang // For example, given:
2935b6f1511SLei Wang // IR locations:      [1, 2(foo), 3, 5, 6(bar), 7]
2945b6f1511SLei Wang // Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
2955b6f1511SLei Wang // The matching gives:
2965b6f1511SLei Wang //   [1,    2(foo), 3,  5,  6(bar), 7]
2975b6f1511SLei Wang //    |     |       |   |     |     |
2985b6f1511SLei Wang //   [1, 2, 3(foo), 4,  7,  8(bar), 9]
2995b6f1511SLei Wang // The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
3005b6f1511SLei Wang void SampleProfileMatcher::runStaleProfileMatching(
3015b6f1511SLei Wang     const Function &F, const AnchorMap &IRAnchors,
30218cdfa72SLei Wang     const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap,
30318cdfa72SLei Wang     bool RunCFGMatching, bool RunCGMatching) {
30418cdfa72SLei Wang   if (!RunCFGMatching && !RunCGMatching)
30518cdfa72SLei Wang     return;
3065b6f1511SLei Wang   LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
3075b6f1511SLei Wang                     << "\n");
3085b6f1511SLei Wang   assert(IRToProfileLocationMap.empty() &&
3095b6f1511SLei Wang          "Run stale profile matching only once per function");
3105b6f1511SLei Wang 
3115b6f1511SLei Wang   AnchorList FilteredProfileAnchorList;
3125b6f1511SLei Wang   AnchorList FilteredIRAnchorsList;
31318cdfa72SLei Wang   getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
31418cdfa72SLei Wang                         FilteredProfileAnchorList);
3155b6f1511SLei Wang 
3165b6f1511SLei Wang   if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
3175b6f1511SLei Wang     return;
3185b6f1511SLei Wang 
31921ba91c4SKrzysztof Pszeniczny   if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||
32021ba91c4SKrzysztof Pszeniczny       FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {
32121ba91c4SKrzysztof Pszeniczny     LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()
32221ba91c4SKrzysztof Pszeniczny                       << " because the number of callsites in the IR is "
32321ba91c4SKrzysztof Pszeniczny                       << FilteredIRAnchorsList.size()
32421ba91c4SKrzysztof Pszeniczny                       << " and in the profile is "
32521ba91c4SKrzysztof Pszeniczny                       << FilteredProfileAnchorList.size() << "\n");
32621ba91c4SKrzysztof Pszeniczny     return;
32721ba91c4SKrzysztof Pszeniczny   }
32821ba91c4SKrzysztof Pszeniczny 
3295b6f1511SLei Wang   // Match the callsite anchors by finding the longest common subsequence
33018cdfa72SLei Wang   // between IR and profile.
33118cdfa72SLei Wang   // Define a match between two anchors as follows:
33218cdfa72SLei Wang   // 1) The function names of anchors are the same.
33318cdfa72SLei Wang   // 2) The similarity between the anchor functions is above a threshold if
33418cdfa72SLei Wang   // RunCGMatching is set.
33518cdfa72SLei Wang   // For 2), we only consider the anchor functions from IR and profile don't
33618cdfa72SLei Wang   // appear on either side to reduce the matching scope. Note that we need to
33718cdfa72SLei Wang   // use IR anchor as base(A side) to align with the order of
3385b6f1511SLei Wang   // IRToProfileLocationMap.
33918cdfa72SLei Wang   LocToLocMap MatchedAnchors =
34018cdfa72SLei Wang       longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
34118cdfa72SLei Wang                             RunCGMatching /* Match unused functions */);
34218cdfa72SLei Wang 
34318cdfa72SLei Wang   // CFG level matching:
34418cdfa72SLei Wang   // Apply the callsite matchings to infer matching for the basic
34518cdfa72SLei Wang   // block(non-callsite) locations and write the result to
34618cdfa72SLei Wang   // IRToProfileLocationMap.
34718cdfa72SLei Wang   if (RunCFGMatching)
3485b6f1511SLei Wang     matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
3495b6f1511SLei Wang }
3505b6f1511SLei Wang 
3511d99d7a6SLei Wang void SampleProfileMatcher::runOnFunction(Function &F) {
3521d99d7a6SLei Wang   // We need to use flattened function samples for matching.
3531d99d7a6SLei Wang   // Unlike IR, which includes all callsites from the source code, the callsites
3541d99d7a6SLei Wang   // in profile only show up when they are hit by samples, i,e. the profile
3551d99d7a6SLei Wang   // callsites in one context may differ from those in another context. To get
3561d99d7a6SLei Wang   // the maximum number of callsites, we merge the function profiles from all
3571d99d7a6SLei Wang   // contexts, aka, the flattened profile to find profile anchors.
3586e60330aSLei Wang   const auto *FSForMatching = getFlattenedSamplesFor(F);
3596e60330aSLei Wang   if (SalvageUnusedProfile && !FSForMatching) {
36018cdfa72SLei Wang     // Apply the matching in place to find the new function's matched profile.
36118cdfa72SLei Wang     auto R = FuncToProfileNameMap.find(&F);
3626e60330aSLei Wang     if (R != FuncToProfileNameMap.end()) {
3636e60330aSLei Wang       FSForMatching = getFlattenedSamplesFor(R->second);
3646e60330aSLei Wang       // Try to find the salvaged top-level profiles that are explicitly loaded
3656e60330aSLei Wang       // for the matching, see "functionMatchesProfileHelper" for the details.
3666e60330aSLei Wang       if (!FSForMatching && LoadFuncProfileforCGMatching)
3676e60330aSLei Wang         FSForMatching = Reader.getSamplesFor(R->second.stringRef());
36818cdfa72SLei Wang     }
3696e60330aSLei Wang   }
3706e60330aSLei Wang   if (!FSForMatching)
3711d99d7a6SLei Wang     return;
3721d99d7a6SLei Wang 
3731d99d7a6SLei Wang   // Anchors for IR. It's a map from IR location to callee name, callee name is
3741d99d7a6SLei Wang   // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
3751d99d7a6SLei Wang   // for unknown indrect callee name.
3765b6f1511SLei Wang   AnchorMap IRAnchors;
3771d99d7a6SLei Wang   findIRAnchors(F, IRAnchors);
3781d99d7a6SLei Wang   // Anchors for profile. It's a map from callsite location to a set of callee
3791d99d7a6SLei Wang   // name.
3805b6f1511SLei Wang   AnchorMap ProfileAnchors;
3816e60330aSLei Wang   findProfileAnchors(*FSForMatching, ProfileAnchors);
3821d99d7a6SLei Wang 
3831d99d7a6SLei Wang   // Compute the callsite match states for profile staleness report.
3841d99d7a6SLei Wang   if (ReportProfileStaleness || PersistProfileStaleness)
3851d99d7a6SLei Wang     recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
3861d99d7a6SLei Wang 
38718cdfa72SLei Wang   if (!SalvageStaleProfile)
38818cdfa72SLei Wang     return;
38918cdfa72SLei Wang   // For probe-based profiles, run matching only when profile checksum is
39018cdfa72SLei Wang   // mismatched.
39118cdfa72SLei Wang   bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
3926e60330aSLei Wang                           !ProbeManager->profileIsValid(F, *FSForMatching);
39318cdfa72SLei Wang   bool RunCFGMatching =
39418cdfa72SLei Wang       !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
39518cdfa72SLei Wang   bool RunCGMatching = SalvageUnusedProfile;
3961d99d7a6SLei Wang   // For imported functions, the checksum metadata(pseudo_probe_desc) are
3971d99d7a6SLei Wang   // dropped, so we leverage function attribute(profile-checksum-mismatch) to
3981d99d7a6SLei Wang   // transfer the info: add the attribute during pre-link phase and check it
3991d99d7a6SLei Wang   // during post-link phase(see "profileIsValid").
40018cdfa72SLei Wang   if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
4011d99d7a6SLei Wang     F.addFnAttr("profile-checksum-mismatch");
4021d99d7a6SLei Wang 
4031d99d7a6SLei Wang   // The matching result will be saved to IRToProfileLocationMap, create a
4041d99d7a6SLei Wang   // new map for each function.
4051d99d7a6SLei Wang   auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
40618cdfa72SLei Wang   runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap,
40718cdfa72SLei Wang                           RunCFGMatching, RunCGMatching);
4081d99d7a6SLei Wang   // Find and update callsite match states after matching.
40918cdfa72SLei Wang   if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
4101d99d7a6SLei Wang     recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
4111d99d7a6SLei Wang                               &IRToProfileLocationMap);
4121d99d7a6SLei Wang }
4131d99d7a6SLei Wang 
4141d99d7a6SLei Wang void SampleProfileMatcher::recordCallsiteMatchStates(
4155b6f1511SLei Wang     const Function &F, const AnchorMap &IRAnchors,
4165b6f1511SLei Wang     const AnchorMap &ProfileAnchors,
4171d99d7a6SLei Wang     const LocToLocMap *IRToProfileLocationMap) {
4181d99d7a6SLei Wang   bool IsPostMatch = IRToProfileLocationMap != nullptr;
4191d99d7a6SLei Wang   auto &CallsiteMatchStates =
4201d99d7a6SLei Wang       FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];
4211d99d7a6SLei Wang 
4221d99d7a6SLei Wang   auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
4231d99d7a6SLei Wang     // IRToProfileLocationMap is null in pre-match phrase.
4241d99d7a6SLei Wang     if (!IRToProfileLocationMap)
4251d99d7a6SLei Wang       return IRLoc;
4261d99d7a6SLei Wang     const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
4271d99d7a6SLei Wang     if (ProfileLoc != IRToProfileLocationMap->end())
4281d99d7a6SLei Wang       return ProfileLoc->second;
4291d99d7a6SLei Wang     else
4301d99d7a6SLei Wang       return IRLoc;
4311d99d7a6SLei Wang   };
4321d99d7a6SLei Wang 
4331d99d7a6SLei Wang   for (const auto &I : IRAnchors) {
4341d99d7a6SLei Wang     // After fuzzy profile matching, use the matching result to remap the
4351d99d7a6SLei Wang     // current IR callsite.
4361d99d7a6SLei Wang     const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
4375b6f1511SLei Wang     const auto &IRCalleeId = I.second;
4381d99d7a6SLei Wang     const auto &It = ProfileAnchors.find(ProfileLoc);
4391d99d7a6SLei Wang     if (It == ProfileAnchors.end())
4401d99d7a6SLei Wang       continue;
4415b6f1511SLei Wang     const auto &ProfCalleeId = It->second;
4425b6f1511SLei Wang     if (IRCalleeId == ProfCalleeId) {
4431d99d7a6SLei Wang       auto It = CallsiteMatchStates.find(ProfileLoc);
4441d99d7a6SLei Wang       if (It == CallsiteMatchStates.end())
4451d99d7a6SLei Wang         CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
4461d99d7a6SLei Wang       else if (IsPostMatch) {
4471d99d7a6SLei Wang         if (It->second == MatchState::InitialMatch)
4481d99d7a6SLei Wang           It->second = MatchState::UnchangedMatch;
4491d99d7a6SLei Wang         else if (It->second == MatchState::InitialMismatch)
4501d99d7a6SLei Wang           It->second = MatchState::RecoveredMismatch;
4511d99d7a6SLei Wang       }
4521d99d7a6SLei Wang     }
4531d99d7a6SLei Wang   }
4541d99d7a6SLei Wang 
4551d99d7a6SLei Wang   // Check if there are any callsites in the profile that does not match to any
4561d99d7a6SLei Wang   // IR callsites.
4571d99d7a6SLei Wang   for (const auto &I : ProfileAnchors) {
4581d99d7a6SLei Wang     const auto &Loc = I.first;
4595b6f1511SLei Wang     assert(!I.second.stringRef().empty() && "Callees should not be empty");
4601d99d7a6SLei Wang     auto It = CallsiteMatchStates.find(Loc);
4611d99d7a6SLei Wang     if (It == CallsiteMatchStates.end())
4621d99d7a6SLei Wang       CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
4631d99d7a6SLei Wang     else if (IsPostMatch) {
4641d99d7a6SLei Wang       // Update the state if it's not matched(UnchangedMatch or
4651d99d7a6SLei Wang       // RecoveredMismatch).
4661d99d7a6SLei Wang       if (It->second == MatchState::InitialMismatch)
4671d99d7a6SLei Wang         It->second = MatchState::UnchangedMismatch;
4681d99d7a6SLei Wang       else if (It->second == MatchState::InitialMatch)
4691d99d7a6SLei Wang         It->second = MatchState::RemovedMatch;
4701d99d7a6SLei Wang     }
4711d99d7a6SLei Wang   }
4721d99d7a6SLei Wang }
4731d99d7a6SLei Wang 
4741d99d7a6SLei Wang void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
4751d99d7a6SLei Wang                                                       bool IsTopLevel) {
4761d99d7a6SLei Wang   const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
4771d99d7a6SLei Wang   // Skip the function that is external or renamed.
4781d99d7a6SLei Wang   if (!FuncDesc)
4791d99d7a6SLei Wang     return;
4801d99d7a6SLei Wang 
4811d99d7a6SLei Wang   if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
4821d99d7a6SLei Wang     if (IsTopLevel)
4831d99d7a6SLei Wang       NumStaleProfileFunc++;
4841d99d7a6SLei Wang     // Given currently all probe ids are after block probe ids, once the
4851d99d7a6SLei Wang     // checksum is mismatched, it's likely all the callites are mismatched and
4861d99d7a6SLei Wang     // dropped. We conservatively count all the samples as mismatched and stop
4871d99d7a6SLei Wang     // counting the inlinees' profiles.
4881d99d7a6SLei Wang     MismatchedFunctionSamples += FS.getTotalSamples();
4891d99d7a6SLei Wang     return;
4901d99d7a6SLei Wang   }
4911d99d7a6SLei Wang 
4921d99d7a6SLei Wang   // Even the current-level function checksum is matched, it's possible that the
4931d99d7a6SLei Wang   // nested inlinees' checksums are mismatched that affect the inlinee's sample
4941d99d7a6SLei Wang   // loading, we need to go deeper to check the inlinees' function samples.
4951d99d7a6SLei Wang   // Similarly, count all the samples as mismatched if the inlinee's checksum is
4961d99d7a6SLei Wang   // mismatched using this recursive function.
4971d99d7a6SLei Wang   for (const auto &I : FS.getCallsiteSamples())
4981d99d7a6SLei Wang     for (const auto &CS : I.second)
4991d99d7a6SLei Wang       countMismatchedFuncSamples(CS.second, false);
5001d99d7a6SLei Wang }
5011d99d7a6SLei Wang 
5021d99d7a6SLei Wang void SampleProfileMatcher::countMismatchedCallsiteSamples(
5031d99d7a6SLei Wang     const FunctionSamples &FS) {
5041d99d7a6SLei Wang   auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
5051d99d7a6SLei Wang   // Skip it if no mismatched callsite or this is an external function.
5061d99d7a6SLei Wang   if (It == FuncCallsiteMatchStates.end() || It->second.empty())
5071d99d7a6SLei Wang     return;
5081d99d7a6SLei Wang   const auto &CallsiteMatchStates = It->second;
5091d99d7a6SLei Wang 
5101d99d7a6SLei Wang   auto findMatchState = [&](const LineLocation &Loc) {
5111d99d7a6SLei Wang     auto It = CallsiteMatchStates.find(Loc);
5121d99d7a6SLei Wang     if (It == CallsiteMatchStates.end())
5131d99d7a6SLei Wang       return MatchState::Unknown;
5141d99d7a6SLei Wang     return It->second;
5151d99d7a6SLei Wang   };
5161d99d7a6SLei Wang 
5171d99d7a6SLei Wang   auto AttributeMismatchedSamples = [&](const enum MatchState &State,
5181d99d7a6SLei Wang                                         uint64_t Samples) {
5191d99d7a6SLei Wang     if (isMismatchState(State))
5201d99d7a6SLei Wang       MismatchedCallsiteSamples += Samples;
5211d99d7a6SLei Wang     else if (State == MatchState::RecoveredMismatch)
5221d99d7a6SLei Wang       RecoveredCallsiteSamples += Samples;
5231d99d7a6SLei Wang   };
5241d99d7a6SLei Wang 
5251d99d7a6SLei Wang   // The non-inlined callsites are saved in the body samples of function
5261d99d7a6SLei Wang   // profile, go through it to count the non-inlined callsite samples.
5271d99d7a6SLei Wang   for (const auto &I : FS.getBodySamples())
5281d99d7a6SLei Wang     AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());
5291d99d7a6SLei Wang 
5301d99d7a6SLei Wang   // Count the inlined callsite samples.
5311d99d7a6SLei Wang   for (const auto &I : FS.getCallsiteSamples()) {
5321d99d7a6SLei Wang     auto State = findMatchState(I.first);
5331d99d7a6SLei Wang     uint64_t CallsiteSamples = 0;
5341d99d7a6SLei Wang     for (const auto &CS : I.second)
5351d99d7a6SLei Wang       CallsiteSamples += CS.second.getTotalSamples();
5361d99d7a6SLei Wang     AttributeMismatchedSamples(State, CallsiteSamples);
5371d99d7a6SLei Wang 
5381d99d7a6SLei Wang     if (isMismatchState(State))
5391d99d7a6SLei Wang       continue;
5401d99d7a6SLei Wang 
5411d99d7a6SLei Wang     // When the current level of inlined call site matches the profiled call
5421d99d7a6SLei Wang     // site, we need to go deeper along the inline tree to count mismatches from
5431d99d7a6SLei Wang     // lower level inlinees.
5441d99d7a6SLei Wang     for (const auto &CS : I.second)
5451d99d7a6SLei Wang       countMismatchedCallsiteSamples(CS.second);
5461d99d7a6SLei Wang   }
5471d99d7a6SLei Wang }
5481d99d7a6SLei Wang 
5491d99d7a6SLei Wang void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
5501d99d7a6SLei Wang   auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
5511d99d7a6SLei Wang   // Skip it if no mismatched callsite or this is an external function.
5521d99d7a6SLei Wang   if (It == FuncCallsiteMatchStates.end() || It->second.empty())
5531d99d7a6SLei Wang     return;
5541d99d7a6SLei Wang   const auto &MatchStates = It->second;
5551d99d7a6SLei Wang   [[maybe_unused]] bool OnInitialState =
5561d99d7a6SLei Wang       isInitialState(MatchStates.begin()->second);
5571d99d7a6SLei Wang   for (const auto &I : MatchStates) {
5581d99d7a6SLei Wang     TotalProfiledCallsites++;
5591d99d7a6SLei Wang     assert(
5601d99d7a6SLei Wang         (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&
5611d99d7a6SLei Wang         "Profile matching state is inconsistent");
5621d99d7a6SLei Wang 
5631d99d7a6SLei Wang     if (isMismatchState(I.second))
5641d99d7a6SLei Wang       NumMismatchedCallsites++;
5651d99d7a6SLei Wang     else if (I.second == MatchState::RecoveredMismatch)
5661d99d7a6SLei Wang       NumRecoveredCallsites++;
5671d99d7a6SLei Wang   }
5681d99d7a6SLei Wang }
5691d99d7a6SLei Wang 
57018cdfa72SLei Wang void SampleProfileMatcher::countCallGraphRecoveredSamples(
57118cdfa72SLei Wang     const FunctionSamples &FS,
57218cdfa72SLei Wang     std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) {
57318cdfa72SLei Wang   if (CallGraphRecoveredProfiles.count(FS.getFunction())) {
57418cdfa72SLei Wang     NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
57518cdfa72SLei Wang     return;
57618cdfa72SLei Wang   }
57718cdfa72SLei Wang 
57818cdfa72SLei Wang   for (const auto &CM : FS.getCallsiteSamples()) {
57918cdfa72SLei Wang     for (const auto &CS : CM.second) {
58018cdfa72SLei Wang       countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles);
58118cdfa72SLei Wang     }
58218cdfa72SLei Wang   }
58318cdfa72SLei Wang }
58418cdfa72SLei Wang 
5851d99d7a6SLei Wang void SampleProfileMatcher::computeAndReportProfileStaleness() {
5861d99d7a6SLei Wang   if (!ReportProfileStaleness && !PersistProfileStaleness)
5871d99d7a6SLei Wang     return;
5881d99d7a6SLei Wang 
58918cdfa72SLei Wang   std::unordered_set<FunctionId> CallGraphRecoveredProfiles;
59018cdfa72SLei Wang   if (SalvageUnusedProfile) {
59118cdfa72SLei Wang     for (const auto &I : FuncToProfileNameMap) {
59218cdfa72SLei Wang       CallGraphRecoveredProfiles.insert(I.second);
59318cdfa72SLei Wang       if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage()))
59418cdfa72SLei Wang         continue;
59518cdfa72SLei Wang       NumCallGraphRecoveredProfiledFunc++;
59618cdfa72SLei Wang     }
59718cdfa72SLei Wang   }
59818cdfa72SLei Wang 
5991d99d7a6SLei Wang   // Count profile mismatches for profile staleness report.
6001d99d7a6SLei Wang   for (const auto &F : M) {
6011d99d7a6SLei Wang     if (skipProfileForFunction(F))
6021d99d7a6SLei Wang       continue;
6031d99d7a6SLei Wang     // As the stats will be merged by linker, skip reporting the metrics for
6041d99d7a6SLei Wang     // imported functions to avoid repeated counting.
6051d99d7a6SLei Wang     if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage()))
6061d99d7a6SLei Wang       continue;
6071d99d7a6SLei Wang     const auto *FS = Reader.getSamplesFor(F);
6081d99d7a6SLei Wang     if (!FS)
6091d99d7a6SLei Wang       continue;
6101d99d7a6SLei Wang     TotalProfiledFunc++;
6111d99d7a6SLei Wang     TotalFunctionSamples += FS->getTotalSamples();
6121d99d7a6SLei Wang 
61318cdfa72SLei Wang     if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty())
61418cdfa72SLei Wang       countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles);
61518cdfa72SLei Wang 
6161d99d7a6SLei Wang     // Checksum mismatch is only used in pseudo-probe mode.
6171d99d7a6SLei Wang     if (FunctionSamples::ProfileIsProbeBased)
6181d99d7a6SLei Wang       countMismatchedFuncSamples(*FS, true);
6191d99d7a6SLei Wang 
6201d99d7a6SLei Wang     // Count mismatches and samples for calliste.
6211d99d7a6SLei Wang     countMismatchCallsites(*FS);
6221d99d7a6SLei Wang     countMismatchedCallsiteSamples(*FS);
6231d99d7a6SLei Wang   }
6241d99d7a6SLei Wang 
6251d99d7a6SLei Wang   if (ReportProfileStaleness) {
6261d99d7a6SLei Wang     if (FunctionSamples::ProfileIsProbeBased) {
6271d99d7a6SLei Wang       errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc
6281d99d7a6SLei Wang              << ") of functions' profile are invalid and ("
6291d99d7a6SLei Wang              << MismatchedFunctionSamples << "/" << TotalFunctionSamples
6301d99d7a6SLei Wang              << ") of samples are discarded due to function hash mismatch.\n";
6311d99d7a6SLei Wang     }
63218cdfa72SLei Wang     if (SalvageUnusedProfile) {
63318cdfa72SLei Wang       errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/"
63418cdfa72SLei Wang              << TotalProfiledFunc << ") of functions' profile are matched and ("
63518cdfa72SLei Wang              << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples
63618cdfa72SLei Wang              << ") of samples are reused by call graph matching.\n";
63718cdfa72SLei Wang     }
63818cdfa72SLei Wang 
6391d99d7a6SLei Wang     errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
6401d99d7a6SLei Wang            << TotalProfiledCallsites
6411d99d7a6SLei Wang            << ") of callsites' profile are invalid and ("
6421d99d7a6SLei Wang            << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"
6431d99d7a6SLei Wang            << TotalFunctionSamples
6441d99d7a6SLei Wang            << ") of samples are discarded due to callsite location mismatch.\n";
6451d99d7a6SLei Wang     errs() << "(" << NumRecoveredCallsites << "/"
6461d99d7a6SLei Wang            << (NumRecoveredCallsites + NumMismatchedCallsites)
6471d99d7a6SLei Wang            << ") of callsites and (" << RecoveredCallsiteSamples << "/"
6481d99d7a6SLei Wang            << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
6491d99d7a6SLei Wang            << ") of samples are recovered by stale profile matching.\n";
6501d99d7a6SLei Wang   }
6511d99d7a6SLei Wang 
6521d99d7a6SLei Wang   if (PersistProfileStaleness) {
6531d99d7a6SLei Wang     LLVMContext &Ctx = M.getContext();
6541d99d7a6SLei Wang     MDBuilder MDB(Ctx);
6551d99d7a6SLei Wang 
6561d99d7a6SLei Wang     SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
6571d99d7a6SLei Wang     if (FunctionSamples::ProfileIsProbeBased) {
6581d99d7a6SLei Wang       ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc);
6591d99d7a6SLei Wang       ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
6601d99d7a6SLei Wang       ProfStatsVec.emplace_back("MismatchedFunctionSamples",
6611d99d7a6SLei Wang                                 MismatchedFunctionSamples);
6621d99d7a6SLei Wang       ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples);
6631d99d7a6SLei Wang     }
6641d99d7a6SLei Wang 
66518cdfa72SLei Wang     if (SalvageUnusedProfile) {
66618cdfa72SLei Wang       ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc",
66718cdfa72SLei Wang                                 NumCallGraphRecoveredProfiledFunc);
66818cdfa72SLei Wang       ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples",
66918cdfa72SLei Wang                                 NumCallGraphRecoveredFuncSamples);
67018cdfa72SLei Wang     }
67118cdfa72SLei Wang 
6721d99d7a6SLei Wang     ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
6731d99d7a6SLei Wang     ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites);
6741d99d7a6SLei Wang     ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
6751d99d7a6SLei Wang     ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
6761d99d7a6SLei Wang                               MismatchedCallsiteSamples);
6771d99d7a6SLei Wang     ProfStatsVec.emplace_back("RecoveredCallsiteSamples",
6781d99d7a6SLei Wang                               RecoveredCallsiteSamples);
6791d99d7a6SLei Wang 
6801d99d7a6SLei Wang     auto *MD = MDB.createLLVMStats(ProfStatsVec);
6811d99d7a6SLei Wang     auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
6821d99d7a6SLei Wang     NMD->addOperand(MD);
6831d99d7a6SLei Wang   }
6841d99d7a6SLei Wang }
6851d99d7a6SLei Wang 
68618cdfa72SLei Wang void SampleProfileMatcher::findFunctionsWithoutProfile() {
68718cdfa72SLei Wang   // TODO: Support MD5 profile.
68818cdfa72SLei Wang   if (FunctionSamples::UseMD5)
68918cdfa72SLei Wang     return;
69018cdfa72SLei Wang   StringSet<> NamesInProfile;
69118cdfa72SLei Wang   if (auto NameTable = Reader.getNameTable()) {
69218cdfa72SLei Wang     for (auto Name : *NameTable)
69318cdfa72SLei Wang       NamesInProfile.insert(Name.stringRef());
69418cdfa72SLei Wang   }
69518cdfa72SLei Wang 
69618cdfa72SLei Wang   for (auto &F : M) {
69718cdfa72SLei Wang     // Skip declarations, as even if the function can be matched, we have
69818cdfa72SLei Wang     // nothing to do with it.
69918cdfa72SLei Wang     if (F.isDeclaration())
70018cdfa72SLei Wang       continue;
70118cdfa72SLei Wang 
70218cdfa72SLei Wang     StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());
70318cdfa72SLei Wang     const auto *FS = getFlattenedSamplesFor(F);
70418cdfa72SLei Wang     if (FS)
70518cdfa72SLei Wang       continue;
70618cdfa72SLei Wang 
70718cdfa72SLei Wang     // For extended binary, functions fully inlined may not be loaded in the
70818cdfa72SLei Wang     // top-level profile, so check the NameTable which has the all symbol names
70918cdfa72SLei Wang     // in profile.
71018cdfa72SLei Wang     if (NamesInProfile.count(CanonFName))
71118cdfa72SLei Wang       continue;
71218cdfa72SLei Wang 
71318cdfa72SLei Wang     // For extended binary, non-profiled function symbols are in the profile
71418cdfa72SLei Wang     // symbol list table.
71518cdfa72SLei Wang     if (PSL && PSL->contains(CanonFName))
71618cdfa72SLei Wang       continue;
71718cdfa72SLei Wang 
71818cdfa72SLei Wang     LLVM_DEBUG(dbgs() << "Function " << CanonFName
71918cdfa72SLei Wang                       << " is not in profile or profile symbol list.\n");
72018cdfa72SLei Wang     FunctionsWithoutProfile[FunctionId(CanonFName)] = &F;
72118cdfa72SLei Wang   }
72218cdfa72SLei Wang }
72318cdfa72SLei Wang 
72418cdfa72SLei Wang bool SampleProfileMatcher::functionMatchesProfileHelper(
72518cdfa72SLei Wang     const Function &IRFunc, const FunctionId &ProfFunc) {
72618cdfa72SLei Wang   // The value is in the range [0, 1]. The bigger the value is, the more similar
72718cdfa72SLei Wang   // two sequences are.
72818cdfa72SLei Wang   float Similarity = 0.0;
72918cdfa72SLei Wang 
7306e60330aSLei Wang   const auto *FSForMatching = getFlattenedSamplesFor(ProfFunc);
7316e60330aSLei Wang   // With extbinary profile format, initial profile loading only reads profile
7326e60330aSLei Wang   // based on current function names in the module.
7336e60330aSLei Wang   // However, if a function is renamed, sample loader skips to load its original
7346e60330aSLei Wang   // profile(which has a different name), we will miss this case. To address
7356e60330aSLei Wang   // this, we load the top-level profile candidate explicitly for the matching.
7366e60330aSLei Wang   if (!FSForMatching && LoadFuncProfileforCGMatching) {
7376e60330aSLei Wang     DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
7386e60330aSLei Wang     if (std::error_code EC = Reader.read(TopLevelFunc))
7396e60330aSLei Wang       return false;
7406e60330aSLei Wang     FSForMatching = Reader.getSamplesFor(ProfFunc.stringRef());
7416e60330aSLei Wang     LLVM_DEBUG({
7426e60330aSLei Wang       if (FSForMatching)
7436e60330aSLei Wang         dbgs() << "Read top-level function " << ProfFunc
7446e60330aSLei Wang                << " for call-graph matching\n";
7456e60330aSLei Wang     });
7466e60330aSLei Wang   }
7476e60330aSLei Wang   if (!FSForMatching)
74818cdfa72SLei Wang     return false;
74918cdfa72SLei Wang   // The check for similarity or checksum may not be reliable if the function is
75018cdfa72SLei Wang   // tiny, we use the number of basic block as a proxy for the function
75118cdfa72SLei Wang   // complexity and skip the matching if it's too small.
75218cdfa72SLei Wang   if (IRFunc.size() < MinFuncCountForCGMatching ||
7536e60330aSLei Wang       FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching)
75418cdfa72SLei Wang     return false;
75518cdfa72SLei Wang 
75618cdfa72SLei Wang   // For probe-based function, we first trust the checksum info. If the checksum
75718cdfa72SLei Wang   // doesn't match, we continue checking for similarity.
75818cdfa72SLei Wang   if (FunctionSamples::ProfileIsProbeBased) {
75918cdfa72SLei Wang     const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
76018cdfa72SLei Wang     if (FuncDesc &&
7616e60330aSLei Wang         !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSForMatching)) {
76218cdfa72SLei Wang       LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
76318cdfa72SLei Wang                         << "(IR) and " << ProfFunc << "(Profile) match.\n");
76418cdfa72SLei Wang 
76518cdfa72SLei Wang       return true;
76618cdfa72SLei Wang     }
76718cdfa72SLei Wang   }
76818cdfa72SLei Wang 
76918cdfa72SLei Wang   AnchorMap IRAnchors;
77018cdfa72SLei Wang   findIRAnchors(IRFunc, IRAnchors);
77118cdfa72SLei Wang   AnchorMap ProfileAnchors;
7726e60330aSLei Wang   findProfileAnchors(*FSForMatching, ProfileAnchors);
77318cdfa72SLei Wang 
77418cdfa72SLei Wang   AnchorList FilteredIRAnchorsList;
77518cdfa72SLei Wang   AnchorList FilteredProfileAnchorList;
77618cdfa72SLei Wang   getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
77718cdfa72SLei Wang                         FilteredProfileAnchorList);
77818cdfa72SLei Wang 
77918cdfa72SLei Wang   // Similarly skip the matching if the num of anchors is not enough.
78018cdfa72SLei Wang   if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching ||
78118cdfa72SLei Wang       FilteredProfileAnchorList.size() < MinCallCountForCGMatching)
78218cdfa72SLei Wang     return false;
78318cdfa72SLei Wang 
78418cdfa72SLei Wang   // Use the diff algorithm to find the LCS between IR and profile.
78518cdfa72SLei Wang 
78618cdfa72SLei Wang   // Don't recursively match the callee function to avoid infinite matching,
78718cdfa72SLei Wang   // callee functions will be handled later since it's processed in top-down
78818cdfa72SLei Wang   // order .
78918cdfa72SLei Wang   LocToLocMap MatchedAnchors =
79018cdfa72SLei Wang       longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
79118cdfa72SLei Wang                             false /* Match unused functions */);
79218cdfa72SLei Wang 
79318cdfa72SLei Wang   Similarity =
79418cdfa72SLei Wang       static_cast<float>(MatchedAnchors.size()) * 2 /
79518cdfa72SLei Wang       (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());
79618cdfa72SLei Wang 
79718cdfa72SLei Wang   LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
79818cdfa72SLei Wang                     << "(IR) and " << ProfFunc << "(profile) is "
79918cdfa72SLei Wang                     << format("%.2f", Similarity) << "\n");
80018cdfa72SLei Wang   assert((Similarity >= 0 && Similarity <= 1.0) &&
80118cdfa72SLei Wang          "Similarity value should be in [0, 1]");
80218cdfa72SLei Wang   return Similarity * 100 > FuncProfileSimilarityThreshold;
80318cdfa72SLei Wang }
80418cdfa72SLei Wang 
80518cdfa72SLei Wang // If FindMatchedProfileOnly is set to true, only use the processed function
80618cdfa72SLei Wang // results. This is used for skipping the repeated recursive matching.
80718cdfa72SLei Wang bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
80818cdfa72SLei Wang                                                   const FunctionId &ProfFunc,
80918cdfa72SLei Wang                                                   bool FindMatchedProfileOnly) {
81018cdfa72SLei Wang   auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc});
81118cdfa72SLei Wang   if (R != FuncProfileMatchCache.end())
81218cdfa72SLei Wang     return R->second;
81318cdfa72SLei Wang 
81418cdfa72SLei Wang   if (FindMatchedProfileOnly)
81518cdfa72SLei Wang     return false;
81618cdfa72SLei Wang 
81718cdfa72SLei Wang   bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
81818cdfa72SLei Wang   FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched;
81918cdfa72SLei Wang   if (Matched) {
82018cdfa72SLei Wang     FuncToProfileNameMap[&IRFunc] = ProfFunc;
82118cdfa72SLei Wang     LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()
82218cdfa72SLei Wang                       << " matches profile:" << ProfFunc << "\n");
82318cdfa72SLei Wang   }
82418cdfa72SLei Wang 
82518cdfa72SLei Wang   return Matched;
82618cdfa72SLei Wang }
82718cdfa72SLei Wang 
8286e60330aSLei Wang void SampleProfileMatcher::UpdateWithSalvagedProfiles() {
8296e60330aSLei Wang   DenseSet<StringRef> ProfileSalvagedFuncs;
8306e60330aSLei Wang   // Update FuncNameToProfNameMap and SymbolMap.
8316e60330aSLei Wang   for (auto &I : FuncToProfileNameMap) {
8326e60330aSLei Wang     assert(I.first && "New function is null");
8336e60330aSLei Wang     FunctionId FuncName(I.first->getName());
8346e60330aSLei Wang     ProfileSalvagedFuncs.insert(I.second.stringRef());
8356e60330aSLei Wang     FuncNameToProfNameMap->emplace(FuncName, I.second);
8366e60330aSLei Wang 
8376e60330aSLei Wang     // We need to remove the old entry to avoid duplicating the function
8386e60330aSLei Wang     // processing.
8396e60330aSLei Wang     SymbolMap->erase(FuncName);
8406e60330aSLei Wang     SymbolMap->emplace(I.second, I.first);
8416e60330aSLei Wang   }
8426e60330aSLei Wang 
8436e60330aSLei Wang   // With extbinary profile format, initial profile loading only reads profile
8446e60330aSLei Wang   // based on current function names in the module, so we need to load top-level
8456e60330aSLei Wang   // profiles for functions with different profile name explicitly after
8466e60330aSLei Wang   // function-profile name map is established with stale profile matching.
8476e60330aSLei Wang   Reader.read(ProfileSalvagedFuncs);
8486e60330aSLei Wang   Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap);
8496e60330aSLei Wang }
8506e60330aSLei Wang 
8511d99d7a6SLei Wang void SampleProfileMatcher::runOnModule() {
8521d99d7a6SLei Wang   ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
8531d99d7a6SLei Wang                                    FunctionSamples::ProfileIsCS);
85418cdfa72SLei Wang   if (SalvageUnusedProfile)
85518cdfa72SLei Wang     findFunctionsWithoutProfile();
85618cdfa72SLei Wang 
85718cdfa72SLei Wang   // Process the matching in top-down order so that the caller matching result
85818cdfa72SLei Wang   // can be used to the callee matching.
85918cdfa72SLei Wang   std::vector<Function *> TopDownFunctionList;
86018cdfa72SLei Wang   TopDownFunctionList.reserve(M.size());
86118cdfa72SLei Wang   buildTopDownFuncOrder(CG, TopDownFunctionList);
86218cdfa72SLei Wang   for (auto *F : TopDownFunctionList) {
86318cdfa72SLei Wang     if (skipProfileForFunction(*F))
8641d99d7a6SLei Wang       continue;
86518cdfa72SLei Wang     runOnFunction(*F);
8661d99d7a6SLei Wang   }
86718cdfa72SLei Wang 
86818cdfa72SLei Wang   if (SalvageUnusedProfile)
8696e60330aSLei Wang     UpdateWithSalvagedProfiles();
87018cdfa72SLei Wang 
8711d99d7a6SLei Wang   if (SalvageStaleProfile)
8721d99d7a6SLei Wang     distributeIRToProfileLocationMap();
8731d99d7a6SLei Wang 
8741d99d7a6SLei Wang   computeAndReportProfileStaleness();
8751d99d7a6SLei Wang }
8761d99d7a6SLei Wang 
8771d99d7a6SLei Wang void SampleProfileMatcher::distributeIRToProfileLocationMap(
8781d99d7a6SLei Wang     FunctionSamples &FS) {
8791d99d7a6SLei Wang   const auto ProfileMappings = FuncMappings.find(FS.getFuncName());
8801d99d7a6SLei Wang   if (ProfileMappings != FuncMappings.end()) {
8811d99d7a6SLei Wang     FS.setIRToProfileLocationMap(&(ProfileMappings->second));
8821d99d7a6SLei Wang   }
8831d99d7a6SLei Wang 
8841d99d7a6SLei Wang   for (auto &Callees :
8851d99d7a6SLei Wang        const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
8861d99d7a6SLei Wang     for (auto &FS : Callees.second) {
8871d99d7a6SLei Wang       distributeIRToProfileLocationMap(FS.second);
8881d99d7a6SLei Wang     }
8891d99d7a6SLei Wang   }
8901d99d7a6SLei Wang }
8911d99d7a6SLei Wang 
8921d99d7a6SLei Wang // Use a central place to distribute the matching results. Outlined and inlined
8931d99d7a6SLei Wang // profile with the function name will be set to the same pointer.
8941d99d7a6SLei Wang void SampleProfileMatcher::distributeIRToProfileLocationMap() {
8951d99d7a6SLei Wang   for (auto &I : Reader.getProfiles()) {
8961d99d7a6SLei Wang     distributeIRToProfileLocationMap(I.second);
8971d99d7a6SLei Wang   }
8981d99d7a6SLei Wang }
899