xref: /llvm-project/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp (revision 3a5791e75c53234bb25cb7a427c990d3cb6a0883)
1 //===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SampleProfileMatcher used for stale
10 // profile matching.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/IPO/SampleProfileMatcher.h"
15 #include "llvm/IR/IntrinsicInst.h"
16 #include "llvm/IR/MDBuilder.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Transforms/Utils/LongestCommonSequence.h"
19 
20 using namespace llvm;
21 using namespace sampleprof;
22 
23 #define DEBUG_TYPE "sample-profile-matcher"
24 
25 static cl::opt<unsigned> FuncProfileSimilarityThreshold(
26     "func-profile-similarity-threshold", cl::Hidden, cl::init(80),
27     cl::desc("Consider a profile matches a function if the similarity of their "
28              "callee sequences is above the specified percentile."));
29 
30 static cl::opt<unsigned> MinFuncCountForCGMatching(
31     "min-func-count-for-cg-matching", cl::Hidden, cl::init(5),
32     cl::desc("The minimum number of basic blocks required for a function to "
33              "run stale profile call graph matching."));
34 
35 static cl::opt<unsigned> MinCallCountForCGMatching(
36     "min-call-count-for-cg-matching", cl::Hidden, cl::init(3),
37     cl::desc("The minimum number of call anchors required for a function to "
38              "run stale profile call graph matching."));
39 
40 static cl::opt<bool> LoadFuncProfileforCGMatching(
41     "load-func-profile-for-cg-matching", cl::Hidden, cl::init(false),
42     cl::desc(
43         "Load top-level profiles that the sample reader initially skipped for "
44         "the call-graph matching (only meaningful for extended binary "
45         "format)"));
46 
47 extern cl::opt<bool> SalvageStaleProfile;
48 extern cl::opt<bool> SalvageUnusedProfile;
49 extern cl::opt<bool> PersistProfileStaleness;
50 extern cl::opt<bool> ReportProfileStaleness;
51 
52 static cl::opt<unsigned> SalvageStaleProfileMaxCallsites(
53     "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),
54     cl::desc("The maximum number of callsites in a function, above which stale "
55              "profile matching will be skipped."));
56 
57 void SampleProfileMatcher::findIRAnchors(const Function &F,
58                                          AnchorMap &IRAnchors) const {
59   // For inlined code, recover the original callsite and callee by finding the
60   // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
61   // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
62   auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
63     assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
64     const DILocation *PrevDIL = nullptr;
65     do {
66       PrevDIL = DIL;
67       DIL = DIL->getInlinedAt();
68     } while (DIL->getInlinedAt());
69 
70     LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
71         DIL, FunctionSamples::ProfileIsFS);
72     StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
73     return std::make_pair(Callsite, FunctionId(CalleeName));
74   };
75 
76   auto GetCanonicalCalleeName = [](const CallBase *CB) {
77     StringRef CalleeName = UnknownIndirectCallee;
78     if (Function *Callee = CB->getCalledFunction())
79       CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
80     return CalleeName;
81   };
82 
83   // Extract profile matching anchors in the IR.
84   for (auto &BB : F) {
85     for (auto &I : BB) {
86       DILocation *DIL = I.getDebugLoc();
87       if (!DIL)
88         continue;
89 
90       if (FunctionSamples::ProfileIsProbeBased) {
91         if (auto Probe = extractProbe(I)) {
92           // Flatten inlined IR for the matching.
93           if (DIL->getInlinedAt()) {
94             IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
95           } else {
96             // Use empty StringRef for basic block probe.
97             StringRef CalleeName;
98             if (const auto *CB = dyn_cast<CallBase>(&I)) {
99               // Skip the probe inst whose callee name is "llvm.pseudoprobe".
100               if (!isa<IntrinsicInst>(&I))
101                 CalleeName = GetCanonicalCalleeName(CB);
102             }
103             LineLocation Loc = LineLocation(Probe->Id, 0);
104             IRAnchors.emplace(Loc, FunctionId(CalleeName));
105           }
106         }
107       } else {
108         // TODO: For line-number based profile(AutoFDO), currently only support
109         // find callsite anchors. In future, we need to parse all the non-call
110         // instructions to extract the line locations for profile matching.
111         if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
112           continue;
113 
114         if (DIL->getInlinedAt()) {
115           IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
116         } else {
117           LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
118               DIL, FunctionSamples::ProfileIsFS);
119           StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
120           IRAnchors.emplace(Callsite, FunctionId(CalleeName));
121         }
122       }
123     }
124   }
125 }
126 
127 void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
128                                               AnchorMap &ProfileAnchors) const {
129   auto isInvalidLineOffset = [](uint32_t LineOffset) {
130     return LineOffset & 0x8000;
131   };
132 
133   auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
134                          AnchorMap &ProfileAnchors) {
135     auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);
136     if (!Ret.second) {
137       // For multiple callees, which indicates it's an indirect call, we use a
138       // dummy name(UnknownIndirectCallee) as the indrect callee name.
139       Ret.first->second = FunctionId(UnknownIndirectCallee);
140     }
141   };
142 
143   for (const auto &I : FS.getBodySamples()) {
144     const LineLocation &Loc = I.first;
145     if (isInvalidLineOffset(Loc.LineOffset))
146       continue;
147     for (const auto &C : I.second.getCallTargets())
148       InsertAnchor(Loc, C.first, ProfileAnchors);
149   }
150 
151   for (const auto &I : FS.getCallsiteSamples()) {
152     const LineLocation &Loc = I.first;
153     if (isInvalidLineOffset(Loc.LineOffset))
154       continue;
155     for (const auto &C : I.second)
156       InsertAnchor(Loc, C.first, ProfileAnchors);
157   }
158 }
159 
160 bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName,
161                                               Function *&FuncWithoutProfile) {
162   FuncWithoutProfile = nullptr;
163   auto R = FunctionsWithoutProfile.find(IRFuncName);
164   if (R != FunctionsWithoutProfile.end())
165     FuncWithoutProfile = R->second;
166   return !FuncWithoutProfile;
167 }
168 
169 bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {
170   return SymbolMap->find(ProfileFuncName) == SymbolMap->end();
171 }
172 
173 bool SampleProfileMatcher::functionMatchesProfile(
174     const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,
175     bool FindMatchedProfileOnly) {
176   if (IRFuncName == ProfileFuncName)
177     return true;
178   if (!SalvageUnusedProfile)
179     return false;
180 
181   // If IR function doesn't have profile and the profile is unused, try
182   // matching them.
183   Function *IRFunc = nullptr;
184   if (functionHasProfile(IRFuncName, IRFunc) ||
185       !isProfileUnused(ProfileFuncName))
186     return false;
187 
188   assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&
189          "IR function should be different from profile function to match");
190   return functionMatchesProfile(*IRFunc, ProfileFuncName,
191                                 FindMatchedProfileOnly);
192 }
193 
194 LocToLocMap
195 SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
196                                             const AnchorList &AnchorList2,
197                                             bool MatchUnusedFunction) {
198   LocToLocMap MatchedAnchors;
199   llvm::longestCommonSequence<LineLocation, FunctionId>(
200       AnchorList1, AnchorList2,
201       [&](const FunctionId &A, const FunctionId &B) {
202         return functionMatchesProfile(
203             A, B,
204             !MatchUnusedFunction // Find matched function only
205         );
206       },
207       [&](LineLocation A, LineLocation B) {
208         MatchedAnchors.try_emplace(A, B);
209       });
210   return MatchedAnchors;
211 }
212 
213 void SampleProfileMatcher::matchNonCallsiteLocs(
214     const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
215     LocToLocMap &IRToProfileLocationMap) {
216   auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
217     // Skip the unchanged location mapping to save memory.
218     if (From != To)
219       IRToProfileLocationMap.insert({From, To});
220   };
221 
222   // Use function's beginning location as the initial anchor.
223   int32_t LocationDelta = 0;
224   SmallVector<LineLocation> LastMatchedNonAnchors;
225   for (const auto &IR : IRAnchors) {
226     const auto &Loc = IR.first;
227     bool IsMatchedAnchor = false;
228     // Match the anchor location in lexical order.
229     auto R = MatchedAnchors.find(Loc);
230     if (R != MatchedAnchors.end()) {
231       const auto &Candidate = R->second;
232       InsertMatching(Loc, Candidate);
233       LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
234                         << " is matched from " << Loc << " to " << Candidate
235                         << "\n");
236       LocationDelta = Candidate.LineOffset - Loc.LineOffset;
237 
238       // Match backwards for non-anchor locations.
239       // The locations in LastMatchedNonAnchors have been matched forwards
240       // based on the previous anchor, spilt it evenly and overwrite the
241       // second half based on the current anchor.
242       for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
243            I < LastMatchedNonAnchors.size(); I++) {
244         const auto &L = LastMatchedNonAnchors[I];
245         uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
246         LineLocation Candidate(CandidateLineOffset, L.Discriminator);
247         InsertMatching(L, Candidate);
248         LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
249                           << " to " << Candidate << "\n");
250       }
251 
252       IsMatchedAnchor = true;
253       LastMatchedNonAnchors.clear();
254     }
255 
256     // Match forwards for non-anchor locations.
257     if (!IsMatchedAnchor) {
258       uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
259       LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
260       InsertMatching(Loc, Candidate);
261       LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
262                         << Candidate << "\n");
263       LastMatchedNonAnchors.emplace_back(Loc);
264     }
265   }
266 }
267 
268 // Filter the non-call locations from IRAnchors and ProfileAnchors and write
269 // them into a list for random access later.
270 void SampleProfileMatcher::getFilteredAnchorList(
271     const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,
272     AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {
273   for (const auto &I : IRAnchors) {
274     if (I.second.stringRef().empty())
275       continue;
276     FilteredIRAnchorsList.emplace_back(I);
277   }
278 
279   for (const auto &I : ProfileAnchors)
280     FilteredProfileAnchorList.emplace_back(I);
281 }
282 
283 // Call target name anchor based profile fuzzy matching.
284 // Input:
285 // For IR locations, the anchor is the callee name of direct callsite; For
286 // profile locations, it's the call target name for BodySamples or inlinee's
287 // profile name for CallsiteSamples.
288 // Matching heuristic:
289 // First match all the anchors using the diff algorithm, then split the
290 // non-anchor locations between the two anchors evenly, first half are matched
291 // based on the start anchor, second half are matched based on the end anchor.
292 // For example, given:
293 // IR locations:      [1, 2(foo), 3, 5, 6(bar), 7]
294 // Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
295 // The matching gives:
296 //   [1,    2(foo), 3,  5,  6(bar), 7]
297 //    |     |       |   |     |     |
298 //   [1, 2, 3(foo), 4,  7,  8(bar), 9]
299 // The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
300 void SampleProfileMatcher::runStaleProfileMatching(
301     const Function &F, const AnchorMap &IRAnchors,
302     const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap,
303     bool RunCFGMatching, bool RunCGMatching) {
304   if (!RunCFGMatching && !RunCGMatching)
305     return;
306   LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
307                     << "\n");
308   assert(IRToProfileLocationMap.empty() &&
309          "Run stale profile matching only once per function");
310 
311   AnchorList FilteredProfileAnchorList;
312   AnchorList FilteredIRAnchorsList;
313   getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
314                         FilteredProfileAnchorList);
315 
316   if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
317     return;
318 
319   if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||
320       FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {
321     LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()
322                       << " because the number of callsites in the IR is "
323                       << FilteredIRAnchorsList.size()
324                       << " and in the profile is "
325                       << FilteredProfileAnchorList.size() << "\n");
326     return;
327   }
328 
329   // Match the callsite anchors by finding the longest common subsequence
330   // between IR and profile.
331   // Define a match between two anchors as follows:
332   // 1) The function names of anchors are the same.
333   // 2) The similarity between the anchor functions is above a threshold if
334   // RunCGMatching is set.
335   // For 2), we only consider the anchor functions from IR and profile don't
336   // appear on either side to reduce the matching scope. Note that we need to
337   // use IR anchor as base(A side) to align with the order of
338   // IRToProfileLocationMap.
339   LocToLocMap MatchedAnchors =
340       longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
341                             RunCGMatching /* Match unused functions */);
342 
343   // CFG level matching:
344   // Apply the callsite matchings to infer matching for the basic
345   // block(non-callsite) locations and write the result to
346   // IRToProfileLocationMap.
347   if (RunCFGMatching)
348     matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
349 }
350 
351 void SampleProfileMatcher::runOnFunction(Function &F) {
352   // We need to use flattened function samples for matching.
353   // Unlike IR, which includes all callsites from the source code, the callsites
354   // in profile only show up when they are hit by samples, i,e. the profile
355   // callsites in one context may differ from those in another context. To get
356   // the maximum number of callsites, we merge the function profiles from all
357   // contexts, aka, the flattened profile to find profile anchors.
358   const auto *FSForMatching = getFlattenedSamplesFor(F);
359   if (SalvageUnusedProfile && !FSForMatching) {
360     // Apply the matching in place to find the new function's matched profile.
361     auto R = FuncToProfileNameMap.find(&F);
362     if (R != FuncToProfileNameMap.end()) {
363       FSForMatching = getFlattenedSamplesFor(R->second);
364       // Try to find the salvaged top-level profiles that are explicitly loaded
365       // for the matching, see "functionMatchesProfileHelper" for the details.
366       if (!FSForMatching && LoadFuncProfileforCGMatching)
367         FSForMatching = Reader.getSamplesFor(R->second.stringRef());
368     }
369   }
370   if (!FSForMatching)
371     return;
372 
373   // Anchors for IR. It's a map from IR location to callee name, callee name is
374   // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
375   // for unknown indrect callee name.
376   AnchorMap IRAnchors;
377   findIRAnchors(F, IRAnchors);
378   // Anchors for profile. It's a map from callsite location to a set of callee
379   // name.
380   AnchorMap ProfileAnchors;
381   findProfileAnchors(*FSForMatching, ProfileAnchors);
382 
383   // Compute the callsite match states for profile staleness report.
384   if (ReportProfileStaleness || PersistProfileStaleness)
385     recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
386 
387   if (!SalvageStaleProfile)
388     return;
389   // For probe-based profiles, run matching only when profile checksum is
390   // mismatched.
391   bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
392                           !ProbeManager->profileIsValid(F, *FSForMatching);
393   bool RunCFGMatching =
394       !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
395   bool RunCGMatching = SalvageUnusedProfile;
396   // For imported functions, the checksum metadata(pseudo_probe_desc) are
397   // dropped, so we leverage function attribute(profile-checksum-mismatch) to
398   // transfer the info: add the attribute during pre-link phase and check it
399   // during post-link phase(see "profileIsValid").
400   if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
401     F.addFnAttr("profile-checksum-mismatch");
402 
403   // The matching result will be saved to IRToProfileLocationMap, create a
404   // new map for each function.
405   auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
406   runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap,
407                           RunCFGMatching, RunCGMatching);
408   // Find and update callsite match states after matching.
409   if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
410     recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
411                               &IRToProfileLocationMap);
412 }
413 
414 void SampleProfileMatcher::recordCallsiteMatchStates(
415     const Function &F, const AnchorMap &IRAnchors,
416     const AnchorMap &ProfileAnchors,
417     const LocToLocMap *IRToProfileLocationMap) {
418   bool IsPostMatch = IRToProfileLocationMap != nullptr;
419   auto &CallsiteMatchStates =
420       FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];
421 
422   auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
423     // IRToProfileLocationMap is null in pre-match phrase.
424     if (!IRToProfileLocationMap)
425       return IRLoc;
426     const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
427     if (ProfileLoc != IRToProfileLocationMap->end())
428       return ProfileLoc->second;
429     else
430       return IRLoc;
431   };
432 
433   for (const auto &I : IRAnchors) {
434     // After fuzzy profile matching, use the matching result to remap the
435     // current IR callsite.
436     const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
437     const auto &IRCalleeId = I.second;
438     const auto &It = ProfileAnchors.find(ProfileLoc);
439     if (It == ProfileAnchors.end())
440       continue;
441     const auto &ProfCalleeId = It->second;
442     if (IRCalleeId == ProfCalleeId) {
443       auto It = CallsiteMatchStates.find(ProfileLoc);
444       if (It == CallsiteMatchStates.end())
445         CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
446       else if (IsPostMatch) {
447         if (It->second == MatchState::InitialMatch)
448           It->second = MatchState::UnchangedMatch;
449         else if (It->second == MatchState::InitialMismatch)
450           It->second = MatchState::RecoveredMismatch;
451       }
452     }
453   }
454 
455   // Check if there are any callsites in the profile that does not match to any
456   // IR callsites.
457   for (const auto &I : ProfileAnchors) {
458     const auto &Loc = I.first;
459     assert(!I.second.stringRef().empty() && "Callees should not be empty");
460     auto It = CallsiteMatchStates.find(Loc);
461     if (It == CallsiteMatchStates.end())
462       CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
463     else if (IsPostMatch) {
464       // Update the state if it's not matched(UnchangedMatch or
465       // RecoveredMismatch).
466       if (It->second == MatchState::InitialMismatch)
467         It->second = MatchState::UnchangedMismatch;
468       else if (It->second == MatchState::InitialMatch)
469         It->second = MatchState::RemovedMatch;
470     }
471   }
472 }
473 
474 void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
475                                                       bool IsTopLevel) {
476   const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
477   // Skip the function that is external or renamed.
478   if (!FuncDesc)
479     return;
480 
481   if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
482     if (IsTopLevel)
483       NumStaleProfileFunc++;
484     // Given currently all probe ids are after block probe ids, once the
485     // checksum is mismatched, it's likely all the callites are mismatched and
486     // dropped. We conservatively count all the samples as mismatched and stop
487     // counting the inlinees' profiles.
488     MismatchedFunctionSamples += FS.getTotalSamples();
489     return;
490   }
491 
492   // Even the current-level function checksum is matched, it's possible that the
493   // nested inlinees' checksums are mismatched that affect the inlinee's sample
494   // loading, we need to go deeper to check the inlinees' function samples.
495   // Similarly, count all the samples as mismatched if the inlinee's checksum is
496   // mismatched using this recursive function.
497   for (const auto &I : FS.getCallsiteSamples())
498     for (const auto &CS : I.second)
499       countMismatchedFuncSamples(CS.second, false);
500 }
501 
502 void SampleProfileMatcher::countMismatchedCallsiteSamples(
503     const FunctionSamples &FS) {
504   auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
505   // Skip it if no mismatched callsite or this is an external function.
506   if (It == FuncCallsiteMatchStates.end() || It->second.empty())
507     return;
508   const auto &CallsiteMatchStates = It->second;
509 
510   auto findMatchState = [&](const LineLocation &Loc) {
511     auto It = CallsiteMatchStates.find(Loc);
512     if (It == CallsiteMatchStates.end())
513       return MatchState::Unknown;
514     return It->second;
515   };
516 
517   auto AttributeMismatchedSamples = [&](const enum MatchState &State,
518                                         uint64_t Samples) {
519     if (isMismatchState(State))
520       MismatchedCallsiteSamples += Samples;
521     else if (State == MatchState::RecoveredMismatch)
522       RecoveredCallsiteSamples += Samples;
523   };
524 
525   // The non-inlined callsites are saved in the body samples of function
526   // profile, go through it to count the non-inlined callsite samples.
527   for (const auto &I : FS.getBodySamples())
528     AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());
529 
530   // Count the inlined callsite samples.
531   for (const auto &I : FS.getCallsiteSamples()) {
532     auto State = findMatchState(I.first);
533     uint64_t CallsiteSamples = 0;
534     for (const auto &CS : I.second)
535       CallsiteSamples += CS.second.getTotalSamples();
536     AttributeMismatchedSamples(State, CallsiteSamples);
537 
538     if (isMismatchState(State))
539       continue;
540 
541     // When the current level of inlined call site matches the profiled call
542     // site, we need to go deeper along the inline tree to count mismatches from
543     // lower level inlinees.
544     for (const auto &CS : I.second)
545       countMismatchedCallsiteSamples(CS.second);
546   }
547 }
548 
549 void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
550   auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
551   // Skip it if no mismatched callsite or this is an external function.
552   if (It == FuncCallsiteMatchStates.end() || It->second.empty())
553     return;
554   const auto &MatchStates = It->second;
555   [[maybe_unused]] bool OnInitialState =
556       isInitialState(MatchStates.begin()->second);
557   for (const auto &I : MatchStates) {
558     TotalProfiledCallsites++;
559     assert(
560         (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&
561         "Profile matching state is inconsistent");
562 
563     if (isMismatchState(I.second))
564       NumMismatchedCallsites++;
565     else if (I.second == MatchState::RecoveredMismatch)
566       NumRecoveredCallsites++;
567   }
568 }
569 
570 void SampleProfileMatcher::countCallGraphRecoveredSamples(
571     const FunctionSamples &FS,
572     std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) {
573   if (CallGraphRecoveredProfiles.count(FS.getFunction())) {
574     NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
575     return;
576   }
577 
578   for (const auto &CM : FS.getCallsiteSamples()) {
579     for (const auto &CS : CM.second) {
580       countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles);
581     }
582   }
583 }
584 
585 void SampleProfileMatcher::computeAndReportProfileStaleness() {
586   if (!ReportProfileStaleness && !PersistProfileStaleness)
587     return;
588 
589   std::unordered_set<FunctionId> CallGraphRecoveredProfiles;
590   if (SalvageUnusedProfile) {
591     for (const auto &I : FuncToProfileNameMap) {
592       CallGraphRecoveredProfiles.insert(I.second);
593       if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage()))
594         continue;
595       NumCallGraphRecoveredProfiledFunc++;
596     }
597   }
598 
599   // Count profile mismatches for profile staleness report.
600   for (const auto &F : M) {
601     if (skipProfileForFunction(F))
602       continue;
603     // As the stats will be merged by linker, skip reporting the metrics for
604     // imported functions to avoid repeated counting.
605     if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage()))
606       continue;
607     const auto *FS = Reader.getSamplesFor(F);
608     if (!FS)
609       continue;
610     TotalProfiledFunc++;
611     TotalFunctionSamples += FS->getTotalSamples();
612 
613     if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty())
614       countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles);
615 
616     // Checksum mismatch is only used in pseudo-probe mode.
617     if (FunctionSamples::ProfileIsProbeBased)
618       countMismatchedFuncSamples(*FS, true);
619 
620     // Count mismatches and samples for calliste.
621     countMismatchCallsites(*FS);
622     countMismatchedCallsiteSamples(*FS);
623   }
624 
625   if (ReportProfileStaleness) {
626     if (FunctionSamples::ProfileIsProbeBased) {
627       errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc
628              << ") of functions' profile are invalid and ("
629              << MismatchedFunctionSamples << "/" << TotalFunctionSamples
630              << ") of samples are discarded due to function hash mismatch.\n";
631     }
632     if (SalvageUnusedProfile) {
633       errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/"
634              << TotalProfiledFunc << ") of functions' profile are matched and ("
635              << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples
636              << ") of samples are reused by call graph matching.\n";
637     }
638 
639     errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
640            << TotalProfiledCallsites
641            << ") of callsites' profile are invalid and ("
642            << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"
643            << TotalFunctionSamples
644            << ") of samples are discarded due to callsite location mismatch.\n";
645     errs() << "(" << NumRecoveredCallsites << "/"
646            << (NumRecoveredCallsites + NumMismatchedCallsites)
647            << ") of callsites and (" << RecoveredCallsiteSamples << "/"
648            << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
649            << ") of samples are recovered by stale profile matching.\n";
650   }
651 
652   if (PersistProfileStaleness) {
653     LLVMContext &Ctx = M.getContext();
654     MDBuilder MDB(Ctx);
655 
656     SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
657     if (FunctionSamples::ProfileIsProbeBased) {
658       ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc);
659       ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
660       ProfStatsVec.emplace_back("MismatchedFunctionSamples",
661                                 MismatchedFunctionSamples);
662       ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples);
663     }
664 
665     if (SalvageUnusedProfile) {
666       ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc",
667                                 NumCallGraphRecoveredProfiledFunc);
668       ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples",
669                                 NumCallGraphRecoveredFuncSamples);
670     }
671 
672     ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
673     ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites);
674     ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
675     ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
676                               MismatchedCallsiteSamples);
677     ProfStatsVec.emplace_back("RecoveredCallsiteSamples",
678                               RecoveredCallsiteSamples);
679 
680     auto *MD = MDB.createLLVMStats(ProfStatsVec);
681     auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
682     NMD->addOperand(MD);
683   }
684 }
685 
686 void SampleProfileMatcher::findFunctionsWithoutProfile() {
687   // TODO: Support MD5 profile.
688   if (FunctionSamples::UseMD5)
689     return;
690   StringSet<> NamesInProfile;
691   if (auto NameTable = Reader.getNameTable()) {
692     for (auto Name : *NameTable)
693       NamesInProfile.insert(Name.stringRef());
694   }
695 
696   for (auto &F : M) {
697     // Skip declarations, as even if the function can be matched, we have
698     // nothing to do with it.
699     if (F.isDeclaration())
700       continue;
701 
702     StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());
703     const auto *FS = getFlattenedSamplesFor(F);
704     if (FS)
705       continue;
706 
707     // For extended binary, functions fully inlined may not be loaded in the
708     // top-level profile, so check the NameTable which has the all symbol names
709     // in profile.
710     if (NamesInProfile.count(CanonFName))
711       continue;
712 
713     // For extended binary, non-profiled function symbols are in the profile
714     // symbol list table.
715     if (PSL && PSL->contains(CanonFName))
716       continue;
717 
718     LLVM_DEBUG(dbgs() << "Function " << CanonFName
719                       << " is not in profile or profile symbol list.\n");
720     FunctionsWithoutProfile[FunctionId(CanonFName)] = &F;
721   }
722 }
723 
724 bool SampleProfileMatcher::functionMatchesProfileHelper(
725     const Function &IRFunc, const FunctionId &ProfFunc) {
726   // The value is in the range [0, 1]. The bigger the value is, the more similar
727   // two sequences are.
728   float Similarity = 0.0;
729 
730   const auto *FSForMatching = getFlattenedSamplesFor(ProfFunc);
731   // With extbinary profile format, initial profile loading only reads profile
732   // based on current function names in the module.
733   // However, if a function is renamed, sample loader skips to load its original
734   // profile(which has a different name), we will miss this case. To address
735   // this, we load the top-level profile candidate explicitly for the matching.
736   if (!FSForMatching && LoadFuncProfileforCGMatching) {
737     DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
738     if (std::error_code EC = Reader.read(TopLevelFunc))
739       return false;
740     FSForMatching = Reader.getSamplesFor(ProfFunc.stringRef());
741     LLVM_DEBUG({
742       if (FSForMatching)
743         dbgs() << "Read top-level function " << ProfFunc
744                << " for call-graph matching\n";
745     });
746   }
747   if (!FSForMatching)
748     return false;
749   // The check for similarity or checksum may not be reliable if the function is
750   // tiny, we use the number of basic block as a proxy for the function
751   // complexity and skip the matching if it's too small.
752   if (IRFunc.size() < MinFuncCountForCGMatching ||
753       FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching)
754     return false;
755 
756   // For probe-based function, we first trust the checksum info. If the checksum
757   // doesn't match, we continue checking for similarity.
758   if (FunctionSamples::ProfileIsProbeBased) {
759     const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
760     if (FuncDesc &&
761         !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSForMatching)) {
762       LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
763                         << "(IR) and " << ProfFunc << "(Profile) match.\n");
764 
765       return true;
766     }
767   }
768 
769   AnchorMap IRAnchors;
770   findIRAnchors(IRFunc, IRAnchors);
771   AnchorMap ProfileAnchors;
772   findProfileAnchors(*FSForMatching, ProfileAnchors);
773 
774   AnchorList FilteredIRAnchorsList;
775   AnchorList FilteredProfileAnchorList;
776   getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
777                         FilteredProfileAnchorList);
778 
779   // Similarly skip the matching if the num of anchors is not enough.
780   if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching ||
781       FilteredProfileAnchorList.size() < MinCallCountForCGMatching)
782     return false;
783 
784   // Use the diff algorithm to find the LCS between IR and profile.
785 
786   // Don't recursively match the callee function to avoid infinite matching,
787   // callee functions will be handled later since it's processed in top-down
788   // order .
789   LocToLocMap MatchedAnchors =
790       longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
791                             false /* Match unused functions */);
792 
793   Similarity =
794       static_cast<float>(MatchedAnchors.size()) * 2 /
795       (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());
796 
797   LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
798                     << "(IR) and " << ProfFunc << "(profile) is "
799                     << format("%.2f", Similarity) << "\n");
800   assert((Similarity >= 0 && Similarity <= 1.0) &&
801          "Similarity value should be in [0, 1]");
802   return Similarity * 100 > FuncProfileSimilarityThreshold;
803 }
804 
805 // If FindMatchedProfileOnly is set to true, only use the processed function
806 // results. This is used for skipping the repeated recursive matching.
807 bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
808                                                   const FunctionId &ProfFunc,
809                                                   bool FindMatchedProfileOnly) {
810   auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc});
811   if (R != FuncProfileMatchCache.end())
812     return R->second;
813 
814   if (FindMatchedProfileOnly)
815     return false;
816 
817   bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
818   FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched;
819   if (Matched) {
820     FuncToProfileNameMap[&IRFunc] = ProfFunc;
821     LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()
822                       << " matches profile:" << ProfFunc << "\n");
823   }
824 
825   return Matched;
826 }
827 
828 void SampleProfileMatcher::UpdateWithSalvagedProfiles() {
829   DenseSet<StringRef> ProfileSalvagedFuncs;
830   // Update FuncNameToProfNameMap and SymbolMap.
831   for (auto &I : FuncToProfileNameMap) {
832     assert(I.first && "New function is null");
833     FunctionId FuncName(I.first->getName());
834     ProfileSalvagedFuncs.insert(I.second.stringRef());
835     FuncNameToProfNameMap->emplace(FuncName, I.second);
836 
837     // We need to remove the old entry to avoid duplicating the function
838     // processing.
839     SymbolMap->erase(FuncName);
840     SymbolMap->emplace(I.second, I.first);
841   }
842 
843   // With extbinary profile format, initial profile loading only reads profile
844   // based on current function names in the module, so we need to load top-level
845   // profiles for functions with different profile name explicitly after
846   // function-profile name map is established with stale profile matching.
847   Reader.read(ProfileSalvagedFuncs);
848   Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap);
849 }
850 
851 void SampleProfileMatcher::runOnModule() {
852   ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
853                                    FunctionSamples::ProfileIsCS);
854   if (SalvageUnusedProfile)
855     findFunctionsWithoutProfile();
856 
857   // Process the matching in top-down order so that the caller matching result
858   // can be used to the callee matching.
859   std::vector<Function *> TopDownFunctionList;
860   TopDownFunctionList.reserve(M.size());
861   buildTopDownFuncOrder(CG, TopDownFunctionList);
862   for (auto *F : TopDownFunctionList) {
863     if (skipProfileForFunction(*F))
864       continue;
865     runOnFunction(*F);
866   }
867 
868   if (SalvageUnusedProfile)
869     UpdateWithSalvagedProfiles();
870 
871   if (SalvageStaleProfile)
872     distributeIRToProfileLocationMap();
873 
874   computeAndReportProfileStaleness();
875 }
876 
877 void SampleProfileMatcher::distributeIRToProfileLocationMap(
878     FunctionSamples &FS) {
879   const auto ProfileMappings = FuncMappings.find(FS.getFuncName());
880   if (ProfileMappings != FuncMappings.end()) {
881     FS.setIRToProfileLocationMap(&(ProfileMappings->second));
882   }
883 
884   for (auto &Callees :
885        const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
886     for (auto &FS : Callees.second) {
887       distributeIRToProfileLocationMap(FS.second);
888     }
889   }
890 }
891 
892 // Use a central place to distribute the matching results. Outlined and inlined
893 // profile with the function name will be set to the same pointer.
894 void SampleProfileMatcher::distributeIRToProfileLocationMap() {
895   for (auto &I : Reader.getProfiles()) {
896     distributeIRToProfileLocationMap(I.second);
897   }
898 }
899