1 //===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SampleProfileMatcher used for stale 10 // profile matching. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/IPO/SampleProfileMatcher.h" 15 #include "llvm/IR/IntrinsicInst.h" 16 #include "llvm/IR/MDBuilder.h" 17 #include "llvm/Support/CommandLine.h" 18 #include "llvm/Transforms/Utils/LongestCommonSequence.h" 19 20 using namespace llvm; 21 using namespace sampleprof; 22 23 #define DEBUG_TYPE "sample-profile-matcher" 24 25 static cl::opt<unsigned> FuncProfileSimilarityThreshold( 26 "func-profile-similarity-threshold", cl::Hidden, cl::init(80), 27 cl::desc("Consider a profile matches a function if the similarity of their " 28 "callee sequences is above the specified percentile.")); 29 30 static cl::opt<unsigned> MinFuncCountForCGMatching( 31 "min-func-count-for-cg-matching", cl::Hidden, cl::init(5), 32 cl::desc("The minimum number of basic blocks required for a function to " 33 "run stale profile call graph matching.")); 34 35 static cl::opt<unsigned> MinCallCountForCGMatching( 36 "min-call-count-for-cg-matching", cl::Hidden, cl::init(3), 37 cl::desc("The minimum number of call anchors required for a function to " 38 "run stale profile call graph matching.")); 39 40 static cl::opt<bool> LoadFuncProfileforCGMatching( 41 "load-func-profile-for-cg-matching", cl::Hidden, cl::init(false), 42 cl::desc( 43 "Load top-level profiles that the sample reader initially skipped for " 44 "the call-graph matching (only meaningful for extended binary " 45 "format)")); 46 47 extern cl::opt<bool> SalvageStaleProfile; 48 extern cl::opt<bool> SalvageUnusedProfile; 49 extern cl::opt<bool> PersistProfileStaleness; 50 extern cl::opt<bool> ReportProfileStaleness; 51 52 static cl::opt<unsigned> SalvageStaleProfileMaxCallsites( 53 "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX), 54 cl::desc("The maximum number of callsites in a function, above which stale " 55 "profile matching will be skipped.")); 56 57 void SampleProfileMatcher::findIRAnchors(const Function &F, 58 AnchorMap &IRAnchors) const { 59 // For inlined code, recover the original callsite and callee by finding the 60 // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the 61 // top-level frame is "main:1", the callsite is "1" and the callee is "foo". 62 auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) { 63 assert((DIL && DIL->getInlinedAt()) && "No inlined callsite"); 64 const DILocation *PrevDIL = nullptr; 65 do { 66 PrevDIL = DIL; 67 DIL = DIL->getInlinedAt(); 68 } while (DIL->getInlinedAt()); 69 70 LineLocation Callsite = FunctionSamples::getCallSiteIdentifier( 71 DIL, FunctionSamples::ProfileIsFS); 72 StringRef CalleeName = PrevDIL->getSubprogramLinkageName(); 73 return std::make_pair(Callsite, FunctionId(CalleeName)); 74 }; 75 76 auto GetCanonicalCalleeName = [](const CallBase *CB) { 77 StringRef CalleeName = UnknownIndirectCallee; 78 if (Function *Callee = CB->getCalledFunction()) 79 CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName()); 80 return CalleeName; 81 }; 82 83 // Extract profile matching anchors in the IR. 84 for (auto &BB : F) { 85 for (auto &I : BB) { 86 DILocation *DIL = I.getDebugLoc(); 87 if (!DIL) 88 continue; 89 90 if (FunctionSamples::ProfileIsProbeBased) { 91 if (auto Probe = extractProbe(I)) { 92 // Flatten inlined IR for the matching. 93 if (DIL->getInlinedAt()) { 94 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL)); 95 } else { 96 // Use empty StringRef for basic block probe. 97 StringRef CalleeName; 98 if (const auto *CB = dyn_cast<CallBase>(&I)) { 99 // Skip the probe inst whose callee name is "llvm.pseudoprobe". 100 if (!isa<IntrinsicInst>(&I)) 101 CalleeName = GetCanonicalCalleeName(CB); 102 } 103 LineLocation Loc = LineLocation(Probe->Id, 0); 104 IRAnchors.emplace(Loc, FunctionId(CalleeName)); 105 } 106 } 107 } else { 108 // TODO: For line-number based profile(AutoFDO), currently only support 109 // find callsite anchors. In future, we need to parse all the non-call 110 // instructions to extract the line locations for profile matching. 111 if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I)) 112 continue; 113 114 if (DIL->getInlinedAt()) { 115 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL)); 116 } else { 117 LineLocation Callsite = FunctionSamples::getCallSiteIdentifier( 118 DIL, FunctionSamples::ProfileIsFS); 119 StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I)); 120 IRAnchors.emplace(Callsite, FunctionId(CalleeName)); 121 } 122 } 123 } 124 } 125 } 126 127 void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS, 128 AnchorMap &ProfileAnchors) const { 129 auto isInvalidLineOffset = [](uint32_t LineOffset) { 130 return LineOffset & 0x8000; 131 }; 132 133 auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName, 134 AnchorMap &ProfileAnchors) { 135 auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName); 136 if (!Ret.second) { 137 // For multiple callees, which indicates it's an indirect call, we use a 138 // dummy name(UnknownIndirectCallee) as the indrect callee name. 139 Ret.first->second = FunctionId(UnknownIndirectCallee); 140 } 141 }; 142 143 for (const auto &I : FS.getBodySamples()) { 144 const LineLocation &Loc = I.first; 145 if (isInvalidLineOffset(Loc.LineOffset)) 146 continue; 147 for (const auto &C : I.second.getCallTargets()) 148 InsertAnchor(Loc, C.first, ProfileAnchors); 149 } 150 151 for (const auto &I : FS.getCallsiteSamples()) { 152 const LineLocation &Loc = I.first; 153 if (isInvalidLineOffset(Loc.LineOffset)) 154 continue; 155 for (const auto &C : I.second) 156 InsertAnchor(Loc, C.first, ProfileAnchors); 157 } 158 } 159 160 bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName, 161 Function *&FuncWithoutProfile) { 162 FuncWithoutProfile = nullptr; 163 auto R = FunctionsWithoutProfile.find(IRFuncName); 164 if (R != FunctionsWithoutProfile.end()) 165 FuncWithoutProfile = R->second; 166 return !FuncWithoutProfile; 167 } 168 169 bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) { 170 return SymbolMap->find(ProfileFuncName) == SymbolMap->end(); 171 } 172 173 bool SampleProfileMatcher::functionMatchesProfile( 174 const FunctionId &IRFuncName, const FunctionId &ProfileFuncName, 175 bool FindMatchedProfileOnly) { 176 if (IRFuncName == ProfileFuncName) 177 return true; 178 if (!SalvageUnusedProfile) 179 return false; 180 181 // If IR function doesn't have profile and the profile is unused, try 182 // matching them. 183 Function *IRFunc = nullptr; 184 if (functionHasProfile(IRFuncName, IRFunc) || 185 !isProfileUnused(ProfileFuncName)) 186 return false; 187 188 assert(FunctionId(IRFunc->getName()) != ProfileFuncName && 189 "IR function should be different from profile function to match"); 190 return functionMatchesProfile(*IRFunc, ProfileFuncName, 191 FindMatchedProfileOnly); 192 } 193 194 LocToLocMap 195 SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1, 196 const AnchorList &AnchorList2, 197 bool MatchUnusedFunction) { 198 LocToLocMap MatchedAnchors; 199 llvm::longestCommonSequence<LineLocation, FunctionId>( 200 AnchorList1, AnchorList2, 201 [&](const FunctionId &A, const FunctionId &B) { 202 return functionMatchesProfile( 203 A, B, 204 !MatchUnusedFunction // Find matched function only 205 ); 206 }, 207 [&](LineLocation A, LineLocation B) { 208 MatchedAnchors.try_emplace(A, B); 209 }); 210 return MatchedAnchors; 211 } 212 213 void SampleProfileMatcher::matchNonCallsiteLocs( 214 const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors, 215 LocToLocMap &IRToProfileLocationMap) { 216 auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) { 217 // Skip the unchanged location mapping to save memory. 218 if (From != To) 219 IRToProfileLocationMap.insert({From, To}); 220 }; 221 222 // Use function's beginning location as the initial anchor. 223 int32_t LocationDelta = 0; 224 SmallVector<LineLocation> LastMatchedNonAnchors; 225 for (const auto &IR : IRAnchors) { 226 const auto &Loc = IR.first; 227 bool IsMatchedAnchor = false; 228 // Match the anchor location in lexical order. 229 auto R = MatchedAnchors.find(Loc); 230 if (R != MatchedAnchors.end()) { 231 const auto &Candidate = R->second; 232 InsertMatching(Loc, Candidate); 233 LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef() 234 << " is matched from " << Loc << " to " << Candidate 235 << "\n"); 236 LocationDelta = Candidate.LineOffset - Loc.LineOffset; 237 238 // Match backwards for non-anchor locations. 239 // The locations in LastMatchedNonAnchors have been matched forwards 240 // based on the previous anchor, spilt it evenly and overwrite the 241 // second half based on the current anchor. 242 for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2; 243 I < LastMatchedNonAnchors.size(); I++) { 244 const auto &L = LastMatchedNonAnchors[I]; 245 uint32_t CandidateLineOffset = L.LineOffset + LocationDelta; 246 LineLocation Candidate(CandidateLineOffset, L.Discriminator); 247 InsertMatching(L, Candidate); 248 LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L 249 << " to " << Candidate << "\n"); 250 } 251 252 IsMatchedAnchor = true; 253 LastMatchedNonAnchors.clear(); 254 } 255 256 // Match forwards for non-anchor locations. 257 if (!IsMatchedAnchor) { 258 uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta; 259 LineLocation Candidate(CandidateLineOffset, Loc.Discriminator); 260 InsertMatching(Loc, Candidate); 261 LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to " 262 << Candidate << "\n"); 263 LastMatchedNonAnchors.emplace_back(Loc); 264 } 265 } 266 } 267 268 // Filter the non-call locations from IRAnchors and ProfileAnchors and write 269 // them into a list for random access later. 270 void SampleProfileMatcher::getFilteredAnchorList( 271 const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors, 272 AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) { 273 for (const auto &I : IRAnchors) { 274 if (I.second.stringRef().empty()) 275 continue; 276 FilteredIRAnchorsList.emplace_back(I); 277 } 278 279 for (const auto &I : ProfileAnchors) 280 FilteredProfileAnchorList.emplace_back(I); 281 } 282 283 // Call target name anchor based profile fuzzy matching. 284 // Input: 285 // For IR locations, the anchor is the callee name of direct callsite; For 286 // profile locations, it's the call target name for BodySamples or inlinee's 287 // profile name for CallsiteSamples. 288 // Matching heuristic: 289 // First match all the anchors using the diff algorithm, then split the 290 // non-anchor locations between the two anchors evenly, first half are matched 291 // based on the start anchor, second half are matched based on the end anchor. 292 // For example, given: 293 // IR locations: [1, 2(foo), 3, 5, 6(bar), 7] 294 // Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9] 295 // The matching gives: 296 // [1, 2(foo), 3, 5, 6(bar), 7] 297 // | | | | | | 298 // [1, 2, 3(foo), 4, 7, 8(bar), 9] 299 // The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9]. 300 void SampleProfileMatcher::runStaleProfileMatching( 301 const Function &F, const AnchorMap &IRAnchors, 302 const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap, 303 bool RunCFGMatching, bool RunCGMatching) { 304 if (!RunCFGMatching && !RunCGMatching) 305 return; 306 LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName() 307 << "\n"); 308 assert(IRToProfileLocationMap.empty() && 309 "Run stale profile matching only once per function"); 310 311 AnchorList FilteredProfileAnchorList; 312 AnchorList FilteredIRAnchorsList; 313 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList, 314 FilteredProfileAnchorList); 315 316 if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty()) 317 return; 318 319 if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites || 320 FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) { 321 LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName() 322 << " because the number of callsites in the IR is " 323 << FilteredIRAnchorsList.size() 324 << " and in the profile is " 325 << FilteredProfileAnchorList.size() << "\n"); 326 return; 327 } 328 329 // Match the callsite anchors by finding the longest common subsequence 330 // between IR and profile. 331 // Define a match between two anchors as follows: 332 // 1) The function names of anchors are the same. 333 // 2) The similarity between the anchor functions is above a threshold if 334 // RunCGMatching is set. 335 // For 2), we only consider the anchor functions from IR and profile don't 336 // appear on either side to reduce the matching scope. Note that we need to 337 // use IR anchor as base(A side) to align with the order of 338 // IRToProfileLocationMap. 339 LocToLocMap MatchedAnchors = 340 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList, 341 RunCGMatching /* Match unused functions */); 342 343 // CFG level matching: 344 // Apply the callsite matchings to infer matching for the basic 345 // block(non-callsite) locations and write the result to 346 // IRToProfileLocationMap. 347 if (RunCFGMatching) 348 matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap); 349 } 350 351 void SampleProfileMatcher::runOnFunction(Function &F) { 352 // We need to use flattened function samples for matching. 353 // Unlike IR, which includes all callsites from the source code, the callsites 354 // in profile only show up when they are hit by samples, i,e. the profile 355 // callsites in one context may differ from those in another context. To get 356 // the maximum number of callsites, we merge the function profiles from all 357 // contexts, aka, the flattened profile to find profile anchors. 358 const auto *FSForMatching = getFlattenedSamplesFor(F); 359 if (SalvageUnusedProfile && !FSForMatching) { 360 // Apply the matching in place to find the new function's matched profile. 361 auto R = FuncToProfileNameMap.find(&F); 362 if (R != FuncToProfileNameMap.end()) { 363 FSForMatching = getFlattenedSamplesFor(R->second); 364 // Try to find the salvaged top-level profiles that are explicitly loaded 365 // for the matching, see "functionMatchesProfileHelper" for the details. 366 if (!FSForMatching && LoadFuncProfileforCGMatching) 367 FSForMatching = Reader.getSamplesFor(R->second.stringRef()); 368 } 369 } 370 if (!FSForMatching) 371 return; 372 373 // Anchors for IR. It's a map from IR location to callee name, callee name is 374 // empty for non-call instruction and use a dummy name(UnknownIndirectCallee) 375 // for unknown indrect callee name. 376 AnchorMap IRAnchors; 377 findIRAnchors(F, IRAnchors); 378 // Anchors for profile. It's a map from callsite location to a set of callee 379 // name. 380 AnchorMap ProfileAnchors; 381 findProfileAnchors(*FSForMatching, ProfileAnchors); 382 383 // Compute the callsite match states for profile staleness report. 384 if (ReportProfileStaleness || PersistProfileStaleness) 385 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr); 386 387 if (!SalvageStaleProfile) 388 return; 389 // For probe-based profiles, run matching only when profile checksum is 390 // mismatched. 391 bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased && 392 !ProbeManager->profileIsValid(F, *FSForMatching); 393 bool RunCFGMatching = 394 !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch; 395 bool RunCGMatching = SalvageUnusedProfile; 396 // For imported functions, the checksum metadata(pseudo_probe_desc) are 397 // dropped, so we leverage function attribute(profile-checksum-mismatch) to 398 // transfer the info: add the attribute during pre-link phase and check it 399 // during post-link phase(see "profileIsValid"). 400 if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) 401 F.addFnAttr("profile-checksum-mismatch"); 402 403 // The matching result will be saved to IRToProfileLocationMap, create a 404 // new map for each function. 405 auto &IRToProfileLocationMap = getIRToProfileLocationMap(F); 406 runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap, 407 RunCFGMatching, RunCGMatching); 408 // Find and update callsite match states after matching. 409 if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness)) 410 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, 411 &IRToProfileLocationMap); 412 } 413 414 void SampleProfileMatcher::recordCallsiteMatchStates( 415 const Function &F, const AnchorMap &IRAnchors, 416 const AnchorMap &ProfileAnchors, 417 const LocToLocMap *IRToProfileLocationMap) { 418 bool IsPostMatch = IRToProfileLocationMap != nullptr; 419 auto &CallsiteMatchStates = 420 FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())]; 421 422 auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) { 423 // IRToProfileLocationMap is null in pre-match phrase. 424 if (!IRToProfileLocationMap) 425 return IRLoc; 426 const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc); 427 if (ProfileLoc != IRToProfileLocationMap->end()) 428 return ProfileLoc->second; 429 else 430 return IRLoc; 431 }; 432 433 for (const auto &I : IRAnchors) { 434 // After fuzzy profile matching, use the matching result to remap the 435 // current IR callsite. 436 const auto &ProfileLoc = MapIRLocToProfileLoc(I.first); 437 const auto &IRCalleeId = I.second; 438 const auto &It = ProfileAnchors.find(ProfileLoc); 439 if (It == ProfileAnchors.end()) 440 continue; 441 const auto &ProfCalleeId = It->second; 442 if (IRCalleeId == ProfCalleeId) { 443 auto It = CallsiteMatchStates.find(ProfileLoc); 444 if (It == CallsiteMatchStates.end()) 445 CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch); 446 else if (IsPostMatch) { 447 if (It->second == MatchState::InitialMatch) 448 It->second = MatchState::UnchangedMatch; 449 else if (It->second == MatchState::InitialMismatch) 450 It->second = MatchState::RecoveredMismatch; 451 } 452 } 453 } 454 455 // Check if there are any callsites in the profile that does not match to any 456 // IR callsites. 457 for (const auto &I : ProfileAnchors) { 458 const auto &Loc = I.first; 459 assert(!I.second.stringRef().empty() && "Callees should not be empty"); 460 auto It = CallsiteMatchStates.find(Loc); 461 if (It == CallsiteMatchStates.end()) 462 CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch); 463 else if (IsPostMatch) { 464 // Update the state if it's not matched(UnchangedMatch or 465 // RecoveredMismatch). 466 if (It->second == MatchState::InitialMismatch) 467 It->second = MatchState::UnchangedMismatch; 468 else if (It->second == MatchState::InitialMatch) 469 It->second = MatchState::RemovedMatch; 470 } 471 } 472 } 473 474 void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS, 475 bool IsTopLevel) { 476 const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID()); 477 // Skip the function that is external or renamed. 478 if (!FuncDesc) 479 return; 480 481 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) { 482 if (IsTopLevel) 483 NumStaleProfileFunc++; 484 // Given currently all probe ids are after block probe ids, once the 485 // checksum is mismatched, it's likely all the callites are mismatched and 486 // dropped. We conservatively count all the samples as mismatched and stop 487 // counting the inlinees' profiles. 488 MismatchedFunctionSamples += FS.getTotalSamples(); 489 return; 490 } 491 492 // Even the current-level function checksum is matched, it's possible that the 493 // nested inlinees' checksums are mismatched that affect the inlinee's sample 494 // loading, we need to go deeper to check the inlinees' function samples. 495 // Similarly, count all the samples as mismatched if the inlinee's checksum is 496 // mismatched using this recursive function. 497 for (const auto &I : FS.getCallsiteSamples()) 498 for (const auto &CS : I.second) 499 countMismatchedFuncSamples(CS.second, false); 500 } 501 502 void SampleProfileMatcher::countMismatchedCallsiteSamples( 503 const FunctionSamples &FS) { 504 auto It = FuncCallsiteMatchStates.find(FS.getFuncName()); 505 // Skip it if no mismatched callsite or this is an external function. 506 if (It == FuncCallsiteMatchStates.end() || It->second.empty()) 507 return; 508 const auto &CallsiteMatchStates = It->second; 509 510 auto findMatchState = [&](const LineLocation &Loc) { 511 auto It = CallsiteMatchStates.find(Loc); 512 if (It == CallsiteMatchStates.end()) 513 return MatchState::Unknown; 514 return It->second; 515 }; 516 517 auto AttributeMismatchedSamples = [&](const enum MatchState &State, 518 uint64_t Samples) { 519 if (isMismatchState(State)) 520 MismatchedCallsiteSamples += Samples; 521 else if (State == MatchState::RecoveredMismatch) 522 RecoveredCallsiteSamples += Samples; 523 }; 524 525 // The non-inlined callsites are saved in the body samples of function 526 // profile, go through it to count the non-inlined callsite samples. 527 for (const auto &I : FS.getBodySamples()) 528 AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples()); 529 530 // Count the inlined callsite samples. 531 for (const auto &I : FS.getCallsiteSamples()) { 532 auto State = findMatchState(I.first); 533 uint64_t CallsiteSamples = 0; 534 for (const auto &CS : I.second) 535 CallsiteSamples += CS.second.getTotalSamples(); 536 AttributeMismatchedSamples(State, CallsiteSamples); 537 538 if (isMismatchState(State)) 539 continue; 540 541 // When the current level of inlined call site matches the profiled call 542 // site, we need to go deeper along the inline tree to count mismatches from 543 // lower level inlinees. 544 for (const auto &CS : I.second) 545 countMismatchedCallsiteSamples(CS.second); 546 } 547 } 548 549 void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) { 550 auto It = FuncCallsiteMatchStates.find(FS.getFuncName()); 551 // Skip it if no mismatched callsite or this is an external function. 552 if (It == FuncCallsiteMatchStates.end() || It->second.empty()) 553 return; 554 const auto &MatchStates = It->second; 555 [[maybe_unused]] bool OnInitialState = 556 isInitialState(MatchStates.begin()->second); 557 for (const auto &I : MatchStates) { 558 TotalProfiledCallsites++; 559 assert( 560 (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) && 561 "Profile matching state is inconsistent"); 562 563 if (isMismatchState(I.second)) 564 NumMismatchedCallsites++; 565 else if (I.second == MatchState::RecoveredMismatch) 566 NumRecoveredCallsites++; 567 } 568 } 569 570 void SampleProfileMatcher::countCallGraphRecoveredSamples( 571 const FunctionSamples &FS, 572 std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) { 573 if (CallGraphRecoveredProfiles.count(FS.getFunction())) { 574 NumCallGraphRecoveredFuncSamples += FS.getTotalSamples(); 575 return; 576 } 577 578 for (const auto &CM : FS.getCallsiteSamples()) { 579 for (const auto &CS : CM.second) { 580 countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles); 581 } 582 } 583 } 584 585 void SampleProfileMatcher::computeAndReportProfileStaleness() { 586 if (!ReportProfileStaleness && !PersistProfileStaleness) 587 return; 588 589 std::unordered_set<FunctionId> CallGraphRecoveredProfiles; 590 if (SalvageUnusedProfile) { 591 for (const auto &I : FuncToProfileNameMap) { 592 CallGraphRecoveredProfiles.insert(I.second); 593 if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage())) 594 continue; 595 NumCallGraphRecoveredProfiledFunc++; 596 } 597 } 598 599 // Count profile mismatches for profile staleness report. 600 for (const auto &F : M) { 601 if (skipProfileForFunction(F)) 602 continue; 603 // As the stats will be merged by linker, skip reporting the metrics for 604 // imported functions to avoid repeated counting. 605 if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage())) 606 continue; 607 const auto *FS = Reader.getSamplesFor(F); 608 if (!FS) 609 continue; 610 TotalProfiledFunc++; 611 TotalFunctionSamples += FS->getTotalSamples(); 612 613 if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty()) 614 countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles); 615 616 // Checksum mismatch is only used in pseudo-probe mode. 617 if (FunctionSamples::ProfileIsProbeBased) 618 countMismatchedFuncSamples(*FS, true); 619 620 // Count mismatches and samples for calliste. 621 countMismatchCallsites(*FS); 622 countMismatchedCallsiteSamples(*FS); 623 } 624 625 if (ReportProfileStaleness) { 626 if (FunctionSamples::ProfileIsProbeBased) { 627 errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc 628 << ") of functions' profile are invalid and (" 629 << MismatchedFunctionSamples << "/" << TotalFunctionSamples 630 << ") of samples are discarded due to function hash mismatch.\n"; 631 } 632 if (SalvageUnusedProfile) { 633 errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/" 634 << TotalProfiledFunc << ") of functions' profile are matched and (" 635 << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples 636 << ") of samples are reused by call graph matching.\n"; 637 } 638 639 errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/" 640 << TotalProfiledCallsites 641 << ") of callsites' profile are invalid and (" 642 << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/" 643 << TotalFunctionSamples 644 << ") of samples are discarded due to callsite location mismatch.\n"; 645 errs() << "(" << NumRecoveredCallsites << "/" 646 << (NumRecoveredCallsites + NumMismatchedCallsites) 647 << ") of callsites and (" << RecoveredCallsiteSamples << "/" 648 << (RecoveredCallsiteSamples + MismatchedCallsiteSamples) 649 << ") of samples are recovered by stale profile matching.\n"; 650 } 651 652 if (PersistProfileStaleness) { 653 LLVMContext &Ctx = M.getContext(); 654 MDBuilder MDB(Ctx); 655 656 SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec; 657 if (FunctionSamples::ProfileIsProbeBased) { 658 ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc); 659 ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc); 660 ProfStatsVec.emplace_back("MismatchedFunctionSamples", 661 MismatchedFunctionSamples); 662 ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples); 663 } 664 665 if (SalvageUnusedProfile) { 666 ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc", 667 NumCallGraphRecoveredProfiledFunc); 668 ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples", 669 NumCallGraphRecoveredFuncSamples); 670 } 671 672 ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites); 673 ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites); 674 ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites); 675 ProfStatsVec.emplace_back("MismatchedCallsiteSamples", 676 MismatchedCallsiteSamples); 677 ProfStatsVec.emplace_back("RecoveredCallsiteSamples", 678 RecoveredCallsiteSamples); 679 680 auto *MD = MDB.createLLVMStats(ProfStatsVec); 681 auto *NMD = M.getOrInsertNamedMetadata("llvm.stats"); 682 NMD->addOperand(MD); 683 } 684 } 685 686 void SampleProfileMatcher::findFunctionsWithoutProfile() { 687 // TODO: Support MD5 profile. 688 if (FunctionSamples::UseMD5) 689 return; 690 StringSet<> NamesInProfile; 691 if (auto NameTable = Reader.getNameTable()) { 692 for (auto Name : *NameTable) 693 NamesInProfile.insert(Name.stringRef()); 694 } 695 696 for (auto &F : M) { 697 // Skip declarations, as even if the function can be matched, we have 698 // nothing to do with it. 699 if (F.isDeclaration()) 700 continue; 701 702 StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName()); 703 const auto *FS = getFlattenedSamplesFor(F); 704 if (FS) 705 continue; 706 707 // For extended binary, functions fully inlined may not be loaded in the 708 // top-level profile, so check the NameTable which has the all symbol names 709 // in profile. 710 if (NamesInProfile.count(CanonFName)) 711 continue; 712 713 // For extended binary, non-profiled function symbols are in the profile 714 // symbol list table. 715 if (PSL && PSL->contains(CanonFName)) 716 continue; 717 718 LLVM_DEBUG(dbgs() << "Function " << CanonFName 719 << " is not in profile or profile symbol list.\n"); 720 FunctionsWithoutProfile[FunctionId(CanonFName)] = &F; 721 } 722 } 723 724 bool SampleProfileMatcher::functionMatchesProfileHelper( 725 const Function &IRFunc, const FunctionId &ProfFunc) { 726 // The value is in the range [0, 1]. The bigger the value is, the more similar 727 // two sequences are. 728 float Similarity = 0.0; 729 730 const auto *FSForMatching = getFlattenedSamplesFor(ProfFunc); 731 // With extbinary profile format, initial profile loading only reads profile 732 // based on current function names in the module. 733 // However, if a function is renamed, sample loader skips to load its original 734 // profile(which has a different name), we will miss this case. To address 735 // this, we load the top-level profile candidate explicitly for the matching. 736 if (!FSForMatching && LoadFuncProfileforCGMatching) { 737 DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()}); 738 if (std::error_code EC = Reader.read(TopLevelFunc)) 739 return false; 740 FSForMatching = Reader.getSamplesFor(ProfFunc.stringRef()); 741 LLVM_DEBUG({ 742 if (FSForMatching) 743 dbgs() << "Read top-level function " << ProfFunc 744 << " for call-graph matching\n"; 745 }); 746 } 747 if (!FSForMatching) 748 return false; 749 // The check for similarity or checksum may not be reliable if the function is 750 // tiny, we use the number of basic block as a proxy for the function 751 // complexity and skip the matching if it's too small. 752 if (IRFunc.size() < MinFuncCountForCGMatching || 753 FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching) 754 return false; 755 756 // For probe-based function, we first trust the checksum info. If the checksum 757 // doesn't match, we continue checking for similarity. 758 if (FunctionSamples::ProfileIsProbeBased) { 759 const auto *FuncDesc = ProbeManager->getDesc(IRFunc); 760 if (FuncDesc && 761 !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSForMatching)) { 762 LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName() 763 << "(IR) and " << ProfFunc << "(Profile) match.\n"); 764 765 return true; 766 } 767 } 768 769 AnchorMap IRAnchors; 770 findIRAnchors(IRFunc, IRAnchors); 771 AnchorMap ProfileAnchors; 772 findProfileAnchors(*FSForMatching, ProfileAnchors); 773 774 AnchorList FilteredIRAnchorsList; 775 AnchorList FilteredProfileAnchorList; 776 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList, 777 FilteredProfileAnchorList); 778 779 // Similarly skip the matching if the num of anchors is not enough. 780 if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching || 781 FilteredProfileAnchorList.size() < MinCallCountForCGMatching) 782 return false; 783 784 // Use the diff algorithm to find the LCS between IR and profile. 785 786 // Don't recursively match the callee function to avoid infinite matching, 787 // callee functions will be handled later since it's processed in top-down 788 // order . 789 LocToLocMap MatchedAnchors = 790 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList, 791 false /* Match unused functions */); 792 793 Similarity = 794 static_cast<float>(MatchedAnchors.size()) * 2 / 795 (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size()); 796 797 LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName() 798 << "(IR) and " << ProfFunc << "(profile) is " 799 << format("%.2f", Similarity) << "\n"); 800 assert((Similarity >= 0 && Similarity <= 1.0) && 801 "Similarity value should be in [0, 1]"); 802 return Similarity * 100 > FuncProfileSimilarityThreshold; 803 } 804 805 // If FindMatchedProfileOnly is set to true, only use the processed function 806 // results. This is used for skipping the repeated recursive matching. 807 bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc, 808 const FunctionId &ProfFunc, 809 bool FindMatchedProfileOnly) { 810 auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc}); 811 if (R != FuncProfileMatchCache.end()) 812 return R->second; 813 814 if (FindMatchedProfileOnly) 815 return false; 816 817 bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc); 818 FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched; 819 if (Matched) { 820 FuncToProfileNameMap[&IRFunc] = ProfFunc; 821 LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName() 822 << " matches profile:" << ProfFunc << "\n"); 823 } 824 825 return Matched; 826 } 827 828 void SampleProfileMatcher::UpdateWithSalvagedProfiles() { 829 DenseSet<StringRef> ProfileSalvagedFuncs; 830 // Update FuncNameToProfNameMap and SymbolMap. 831 for (auto &I : FuncToProfileNameMap) { 832 assert(I.first && "New function is null"); 833 FunctionId FuncName(I.first->getName()); 834 ProfileSalvagedFuncs.insert(I.second.stringRef()); 835 FuncNameToProfNameMap->emplace(FuncName, I.second); 836 837 // We need to remove the old entry to avoid duplicating the function 838 // processing. 839 SymbolMap->erase(FuncName); 840 SymbolMap->emplace(I.second, I.first); 841 } 842 843 // With extbinary profile format, initial profile loading only reads profile 844 // based on current function names in the module, so we need to load top-level 845 // profiles for functions with different profile name explicitly after 846 // function-profile name map is established with stale profile matching. 847 Reader.read(ProfileSalvagedFuncs); 848 Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap); 849 } 850 851 void SampleProfileMatcher::runOnModule() { 852 ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles, 853 FunctionSamples::ProfileIsCS); 854 if (SalvageUnusedProfile) 855 findFunctionsWithoutProfile(); 856 857 // Process the matching in top-down order so that the caller matching result 858 // can be used to the callee matching. 859 std::vector<Function *> TopDownFunctionList; 860 TopDownFunctionList.reserve(M.size()); 861 buildTopDownFuncOrder(CG, TopDownFunctionList); 862 for (auto *F : TopDownFunctionList) { 863 if (skipProfileForFunction(*F)) 864 continue; 865 runOnFunction(*F); 866 } 867 868 if (SalvageUnusedProfile) 869 UpdateWithSalvagedProfiles(); 870 871 if (SalvageStaleProfile) 872 distributeIRToProfileLocationMap(); 873 874 computeAndReportProfileStaleness(); 875 } 876 877 void SampleProfileMatcher::distributeIRToProfileLocationMap( 878 FunctionSamples &FS) { 879 const auto ProfileMappings = FuncMappings.find(FS.getFuncName()); 880 if (ProfileMappings != FuncMappings.end()) { 881 FS.setIRToProfileLocationMap(&(ProfileMappings->second)); 882 } 883 884 for (auto &Callees : 885 const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) { 886 for (auto &FS : Callees.second) { 887 distributeIRToProfileLocationMap(FS.second); 888 } 889 } 890 } 891 892 // Use a central place to distribute the matching results. Outlined and inlined 893 // profile with the function name will be set to the same pointer. 894 void SampleProfileMatcher::distributeIRToProfileLocationMap() { 895 for (auto &I : Reader.getProfiles()) { 896 distributeIRToProfileLocationMap(I.second); 897 } 898 } 899