1 //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "ProfileGenerator.h" 9 #include "ErrorHandling.h" 10 #include "MissingFrameInferrer.h" 11 #include "PerfReader.h" 12 #include "ProfiledBinary.h" 13 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 14 #include "llvm/ProfileData/ProfileCommon.h" 15 #include <algorithm> 16 #include <float.h> 17 #include <unordered_set> 18 #include <utility> 19 20 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), 21 cl::Required, 22 cl::desc("Output profile file")); 23 static cl::alias OutputA("o", cl::desc("Alias for --output"), 24 cl::aliasopt(OutputFilename)); 25 26 static cl::opt<SampleProfileFormat> OutputFormat( 27 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary), 28 cl::values( 29 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"), 30 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"), 31 clEnumValN(SPF_Text, "text", "Text encoding"), 32 clEnumValN(SPF_GCC, "gcc", 33 "GCC encoding (only meaningful for -sample)"))); 34 35 static cl::opt<bool> UseMD5( 36 "use-md5", cl::Hidden, 37 cl::desc("Use md5 to represent function names in the output profile (only " 38 "meaningful for -extbinary)")); 39 40 static cl::opt<bool> PopulateProfileSymbolList( 41 "populate-profile-symbol-list", cl::init(false), cl::Hidden, 42 cl::desc("Populate profile symbol list (only meaningful for -extbinary)")); 43 44 static cl::opt<bool> FillZeroForAllFuncs( 45 "fill-zero-for-all-funcs", cl::init(false), cl::Hidden, 46 cl::desc("Attribute all functions' range with zero count " 47 "even it's not hit by any samples.")); 48 49 static cl::opt<int32_t, true> RecursionCompression( 50 "compress-recursion", 51 cl::desc("Compressing recursion by deduplicating adjacent frame " 52 "sequences up to the specified size. -1 means no size limit."), 53 cl::Hidden, 54 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); 55 56 static cl::opt<bool> 57 TrimColdProfile("trim-cold-profile", 58 cl::desc("If the total count of the profile is smaller " 59 "than threshold, it will be trimmed.")); 60 61 static cl::opt<bool> CSProfMergeColdContext( 62 "csprof-merge-cold-context", cl::init(true), 63 cl::desc("If the total count of context profile is smaller than " 64 "the threshold, it will be merged into context-less base " 65 "profile.")); 66 67 static cl::opt<uint32_t> CSProfMaxColdContextDepth( 68 "csprof-max-cold-context-depth", cl::init(1), 69 cl::desc("Keep the last K contexts while merging cold profile. 1 means the " 70 "context-less base profile")); 71 72 static cl::opt<int, true> CSProfMaxContextDepth( 73 "csprof-max-context-depth", 74 cl::desc("Keep the last K contexts while merging profile. -1 means no " 75 "depth limit."), 76 cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth)); 77 78 static cl::opt<double> HotFunctionDensityThreshold( 79 "hot-function-density-threshold", llvm::cl::init(1000), 80 llvm::cl::desc( 81 "specify density threshold for hot functions (default: 1000)"), 82 llvm::cl::Optional); 83 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false), 84 llvm::cl::desc("show profile density details"), 85 llvm::cl::Optional); 86 87 static cl::opt<bool> UpdateTotalSamples( 88 "update-total-samples", llvm::cl::init(false), 89 llvm::cl::desc( 90 "Update total samples by accumulating all its body samples."), 91 llvm::cl::Optional); 92 93 static cl::opt<bool> GenCSNestedProfile( 94 "gen-cs-nested-profile", cl::Hidden, cl::init(true), 95 cl::desc("Generate nested function profiles for CSSPGO")); 96 97 cl::opt<bool> InferMissingFrames( 98 "infer-missing-frames", llvm::cl::init(true), 99 llvm::cl::desc( 100 "Infer missing call frames due to compiler tail call elimination."), 101 llvm::cl::Optional); 102 103 using namespace llvm; 104 using namespace sampleprof; 105 106 namespace llvm { 107 extern cl::opt<int> ProfileSummaryCutoffHot; 108 extern cl::opt<bool> UseContextLessSummary; 109 110 namespace sampleprof { 111 112 // Initialize the MaxCompressionSize to -1 which means no size limit 113 int32_t CSProfileGenerator::MaxCompressionSize = -1; 114 115 int CSProfileGenerator::MaxContextDepth = -1; 116 117 bool ProfileGeneratorBase::UseFSDiscriminator = false; 118 119 std::unique_ptr<ProfileGeneratorBase> 120 ProfileGeneratorBase::create(ProfiledBinary *Binary, 121 const ContextSampleCounterMap *SampleCounters, 122 bool ProfileIsCS) { 123 std::unique_ptr<ProfileGeneratorBase> Generator; 124 if (ProfileIsCS) { 125 Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); 126 } else { 127 Generator.reset(new ProfileGenerator(Binary, SampleCounters)); 128 } 129 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); 130 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); 131 132 return Generator; 133 } 134 135 std::unique_ptr<ProfileGeneratorBase> 136 ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles, 137 bool ProfileIsCS) { 138 std::unique_ptr<ProfileGeneratorBase> Generator; 139 if (ProfileIsCS) { 140 Generator.reset(new CSProfileGenerator(Binary, Profiles)); 141 } else { 142 Generator.reset(new ProfileGenerator(Binary, std::move(Profiles))); 143 } 144 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); 145 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); 146 147 return Generator; 148 } 149 150 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer, 151 SampleProfileMap &ProfileMap) { 152 // Populate profile symbol list if extended binary format is used. 153 ProfileSymbolList SymbolList; 154 155 if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) { 156 Binary->populateSymbolListFromDWARF(SymbolList); 157 Writer->setProfileSymbolList(&SymbolList); 158 } 159 160 if (std::error_code EC = Writer->write(ProfileMap)) 161 exitWithError(std::move(EC)); 162 } 163 164 void ProfileGeneratorBase::write() { 165 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); 166 if (std::error_code EC = WriterOrErr.getError()) 167 exitWithError(EC, OutputFilename); 168 169 if (UseMD5) { 170 if (OutputFormat != SPF_Ext_Binary) 171 WithColor::warning() << "-use-md5 is ignored. Specify " 172 "--format=extbinary to enable it\n"; 173 else 174 WriterOrErr.get()->setUseMD5(); 175 } 176 177 write(std::move(WriterOrErr.get()), ProfileMap); 178 } 179 180 void ProfileGeneratorBase::showDensitySuggestion(double Density) { 181 if (Density == 0.0) 182 WithColor::warning() << "The --profile-summary-cutoff-hot option may be " 183 "set too low. Please check your command.\n"; 184 else if (Density < HotFunctionDensityThreshold) 185 WithColor::warning() 186 << "Sample PGO is estimated to optimize better with " 187 << format("%.1f", HotFunctionDensityThreshold / Density) 188 << "x more samples. Please consider increasing sampling rate or " 189 "profiling for longer duration to get more samples.\n"; 190 191 if (ShowDensity) 192 outs() << "Minimum profile density for hot functions with top " 193 << format("%.2f", 194 static_cast<double>(ProfileSummaryCutoffHot.getValue()) / 195 10000) 196 << "% total samples: " << format("%.1f", Density) << "\n"; 197 } 198 199 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles, 200 uint64_t HotCntThreshold) { 201 double Density = DBL_MAX; 202 std::vector<const FunctionSamples *> HotFuncs; 203 for (auto &I : Profiles) { 204 auto &FuncSamples = I.second; 205 if (FuncSamples.getTotalSamples() < HotCntThreshold) 206 continue; 207 HotFuncs.emplace_back(&FuncSamples); 208 } 209 210 for (auto *FuncSamples : HotFuncs) { 211 auto *Func = Binary->getBinaryFunction(FuncSamples->getName()); 212 if (!Func) 213 continue; 214 uint64_t FuncSize = Func->getFuncSize(); 215 if (FuncSize == 0) 216 continue; 217 Density = 218 std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) / 219 FuncSize); 220 } 221 222 return Density == DBL_MAX ? 0.0 : Density; 223 } 224 225 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges, 226 const RangeSample &Ranges) { 227 228 /* 229 Regions may overlap with each other. Using the boundary info, find all 230 disjoint ranges and their sample count. BoundaryPoint contains the count 231 multiple samples begin/end at this points. 232 233 |<--100-->| Sample1 234 |<------200------>| Sample2 235 A B C 236 237 In the example above, 238 Sample1 begins at A, ends at B, its value is 100. 239 Sample2 beings at A, ends at C, its value is 200. 240 For A, BeginCount is the sum of sample begins at A, which is 300 and no 241 samples ends at A, so EndCount is 0. 242 Then boundary points A, B, and C with begin/end counts are: 243 A: (300, 0) 244 B: (0, 100) 245 C: (0, 200) 246 */ 247 struct BoundaryPoint { 248 // Sum of sample counts beginning at this point 249 uint64_t BeginCount = UINT64_MAX; 250 // Sum of sample counts ending at this point 251 uint64_t EndCount = UINT64_MAX; 252 // Is the begin point of a zero range. 253 bool IsZeroRangeBegin = false; 254 // Is the end point of a zero range. 255 bool IsZeroRangeEnd = false; 256 257 void addBeginCount(uint64_t Count) { 258 if (BeginCount == UINT64_MAX) 259 BeginCount = 0; 260 BeginCount += Count; 261 } 262 263 void addEndCount(uint64_t Count) { 264 if (EndCount == UINT64_MAX) 265 EndCount = 0; 266 EndCount += Count; 267 } 268 }; 269 270 /* 271 For the above example. With boundary points, follwing logic finds two 272 disjoint region of 273 274 [A,B]: 300 275 [B+1,C]: 200 276 277 If there is a boundary point that both begin and end, the point itself 278 becomes a separate disjoint region. For example, if we have original 279 ranges of 280 281 |<--- 100 --->| 282 |<--- 200 --->| 283 A B C 284 285 there are three boundary points with their begin/end counts of 286 287 A: (100, 0) 288 B: (200, 100) 289 C: (0, 200) 290 291 the disjoint ranges would be 292 293 [A, B-1]: 100 294 [B, B]: 300 295 [B+1, C]: 200. 296 297 Example for zero value range: 298 299 |<--- 100 --->| 300 |<--- 200 --->| 301 |<--------------- 0 ----------------->| 302 A B C D E F 303 304 [A, B-1] : 0 305 [B, C] : 100 306 [C+1, D-1]: 0 307 [D, E] : 200 308 [E+1, F] : 0 309 */ 310 std::map<uint64_t, BoundaryPoint> Boundaries; 311 312 for (const auto &Item : Ranges) { 313 assert(Item.first.first <= Item.first.second && 314 "Invalid instruction range"); 315 auto &BeginPoint = Boundaries[Item.first.first]; 316 auto &EndPoint = Boundaries[Item.first.second]; 317 uint64_t Count = Item.second; 318 319 BeginPoint.addBeginCount(Count); 320 EndPoint.addEndCount(Count); 321 if (Count == 0) { 322 BeginPoint.IsZeroRangeBegin = true; 323 EndPoint.IsZeroRangeEnd = true; 324 } 325 } 326 327 // Use UINT64_MAX to indicate there is no existing range between BeginAddress 328 // and the next valid address 329 uint64_t BeginAddress = UINT64_MAX; 330 int ZeroRangeDepth = 0; 331 uint64_t Count = 0; 332 for (const auto &Item : Boundaries) { 333 uint64_t Address = Item.first; 334 const BoundaryPoint &Point = Item.second; 335 if (Point.BeginCount != UINT64_MAX) { 336 if (BeginAddress != UINT64_MAX) 337 DisjointRanges[{BeginAddress, Address - 1}] = Count; 338 Count += Point.BeginCount; 339 BeginAddress = Address; 340 ZeroRangeDepth += Point.IsZeroRangeBegin; 341 } 342 if (Point.EndCount != UINT64_MAX) { 343 assert((BeginAddress != UINT64_MAX) && 344 "First boundary point cannot be 'end' point"); 345 DisjointRanges[{BeginAddress, Address}] = Count; 346 assert(Count >= Point.EndCount && "Mismatched live ranges"); 347 Count -= Point.EndCount; 348 BeginAddress = Address + 1; 349 ZeroRangeDepth -= Point.IsZeroRangeEnd; 350 // If the remaining count is zero and it's no longer in a zero range, this 351 // means we consume all the ranges before, thus mark BeginAddress as 352 // UINT64_MAX. e.g. supposing we have two non-overlapping ranges: 353 // [<---- 10 ---->] 354 // [<---- 20 ---->] 355 // A B C D 356 // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't 357 // have the [B+1, C-1] zero range. 358 if (Count == 0 && ZeroRangeDepth == 0) 359 BeginAddress = UINT64_MAX; 360 } 361 } 362 } 363 364 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( 365 FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, 366 uint64_t Count) { 367 // Use the maximum count of samples with same line location 368 uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator); 369 370 // Use duplication factor to compensated for loop unroll/vectorization. 371 // Note that this is only needed when we're taking MAX of the counts at 372 // the location instead of SUM. 373 Count *= getDuplicationFactor(LeafLoc.Location.Discriminator); 374 375 ErrorOr<uint64_t> R = 376 FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator); 377 378 uint64_t PreviousCount = R ? R.get() : 0; 379 if (PreviousCount <= Count) { 380 FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator, 381 Count - PreviousCount); 382 } 383 } 384 385 void ProfileGeneratorBase::updateTotalSamples() { 386 for (auto &Item : ProfileMap) { 387 FunctionSamples &FunctionProfile = Item.second; 388 FunctionProfile.updateTotalSamples(); 389 } 390 } 391 392 void ProfileGeneratorBase::updateCallsiteSamples() { 393 for (auto &Item : ProfileMap) { 394 FunctionSamples &FunctionProfile = Item.second; 395 FunctionProfile.updateCallsiteSamples(); 396 } 397 } 398 399 void ProfileGeneratorBase::updateFunctionSamples() { 400 updateCallsiteSamples(); 401 402 if (UpdateTotalSamples) 403 updateTotalSamples(); 404 } 405 406 void ProfileGeneratorBase::collectProfiledFunctions() { 407 std::unordered_set<const BinaryFunction *> ProfiledFunctions; 408 if (collectFunctionsFromRawProfile(ProfiledFunctions)) 409 Binary->setProfiledFunctions(ProfiledFunctions); 410 else if (collectFunctionsFromLLVMProfile(ProfiledFunctions)) 411 Binary->setProfiledFunctions(ProfiledFunctions); 412 else 413 llvm_unreachable("Unsupported input profile"); 414 } 415 416 bool ProfileGeneratorBase::collectFunctionsFromRawProfile( 417 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) { 418 if (!SampleCounters) 419 return false; 420 // Go through all the stacks, ranges and branches in sample counters, use 421 // the start of the range to look up the function it belongs and record the 422 // function. 423 for (const auto &CI : *SampleCounters) { 424 if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) { 425 for (auto StackAddr : CtxKey->Context) { 426 if (FuncRange *FRange = Binary->findFuncRange(StackAddr)) 427 ProfiledFunctions.insert(FRange->Func); 428 } 429 } 430 431 for (auto Item : CI.second.RangeCounter) { 432 uint64_t StartAddress = Item.first.first; 433 if (FuncRange *FRange = Binary->findFuncRange(StartAddress)) 434 ProfiledFunctions.insert(FRange->Func); 435 } 436 437 for (auto Item : CI.second.BranchCounter) { 438 uint64_t SourceAddress = Item.first.first; 439 uint64_t TargetAddress = Item.first.second; 440 if (FuncRange *FRange = Binary->findFuncRange(SourceAddress)) 441 ProfiledFunctions.insert(FRange->Func); 442 if (FuncRange *FRange = Binary->findFuncRange(TargetAddress)) 443 ProfiledFunctions.insert(FRange->Func); 444 } 445 } 446 return true; 447 } 448 449 bool ProfileGenerator::collectFunctionsFromLLVMProfile( 450 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) { 451 for (const auto &FS : ProfileMap) { 452 if (auto *Func = Binary->getBinaryFunction(FS.first.getName())) 453 ProfiledFunctions.insert(Func); 454 } 455 return true; 456 } 457 458 bool CSProfileGenerator::collectFunctionsFromLLVMProfile( 459 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) { 460 for (auto *Node : ContextTracker) { 461 if (!Node->getFuncName().empty()) 462 if (auto *Func = Binary->getBinaryFunction(Node->getFuncName())) 463 ProfiledFunctions.insert(Func); 464 } 465 return true; 466 } 467 468 FunctionSamples & 469 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) { 470 SampleContext Context(FuncName); 471 auto Ret = ProfileMap.emplace(Context, FunctionSamples()); 472 if (Ret.second) { 473 FunctionSamples &FProfile = Ret.first->second; 474 FProfile.setContext(Context); 475 } 476 return Ret.first->second; 477 } 478 479 void ProfileGenerator::generateProfile() { 480 collectProfiledFunctions(); 481 482 if (Binary->usePseudoProbes()) 483 Binary->decodePseudoProbe(); 484 485 if (SampleCounters) { 486 if (Binary->usePseudoProbes()) { 487 generateProbeBasedProfile(); 488 } else { 489 generateLineNumBasedProfile(); 490 } 491 } 492 493 postProcessProfiles(); 494 } 495 496 void ProfileGenerator::postProcessProfiles() { 497 computeSummaryAndThreshold(ProfileMap); 498 trimColdProfiles(ProfileMap, ColdCountThreshold); 499 calculateAndShowDensity(ProfileMap); 500 } 501 502 void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles, 503 uint64_t ColdCntThreshold) { 504 if (!TrimColdProfile) 505 return; 506 507 // Move cold profiles into a tmp container. 508 std::vector<SampleContext> ColdProfiles; 509 for (const auto &I : ProfileMap) { 510 if (I.second.getTotalSamples() < ColdCntThreshold) 511 ColdProfiles.emplace_back(I.first); 512 } 513 514 // Remove the cold profile from ProfileMap. 515 for (const auto &I : ColdProfiles) 516 ProfileMap.erase(I); 517 } 518 519 void ProfileGenerator::generateLineNumBasedProfile() { 520 assert(SampleCounters->size() == 1 && 521 "Must have one entry for profile generation."); 522 const SampleCounter &SC = SampleCounters->begin()->second; 523 // Fill in function body samples 524 populateBodySamplesForAllFunctions(SC.RangeCounter); 525 // Fill in boundary sample counts as well as call site samples for calls 526 populateBoundarySamplesForAllFunctions(SC.BranchCounter); 527 528 updateFunctionSamples(); 529 } 530 531 void ProfileGenerator::generateProbeBasedProfile() { 532 assert(SampleCounters->size() == 1 && 533 "Must have one entry for profile generation."); 534 // Enable pseudo probe functionalities in SampleProf 535 FunctionSamples::ProfileIsProbeBased = true; 536 const SampleCounter &SC = SampleCounters->begin()->second; 537 // Fill in function body samples 538 populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter); 539 // Fill in boundary sample counts as well as call site samples for calls 540 populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter); 541 542 updateFunctionSamples(); 543 } 544 545 void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions( 546 const RangeSample &RangeCounter) { 547 ProbeCounterMap ProbeCounter; 548 // preprocessRangeCounter returns disjoint ranges, so no longer to redo it 549 // inside extractProbesFromRange. 550 extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter, 551 false); 552 553 for (const auto &PI : ProbeCounter) { 554 const MCDecodedPseudoProbe *Probe = PI.first; 555 uint64_t Count = PI.second; 556 SampleContextFrameVector FrameVec; 557 Binary->getInlineContextForProbe(Probe, FrameVec, true); 558 FunctionSamples &FunctionProfile = 559 getLeafProfileAndAddTotalSamples(FrameVec, Count); 560 FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(), 561 Count); 562 if (Probe->isEntry()) 563 FunctionProfile.addHeadSamples(Count); 564 } 565 } 566 567 void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions( 568 const BranchSample &BranchCounters) { 569 for (const auto &Entry : BranchCounters) { 570 uint64_t SourceAddress = Entry.first.first; 571 uint64_t TargetAddress = Entry.first.second; 572 uint64_t Count = Entry.second; 573 assert(Count != 0 && "Unexpected zero weight branch"); 574 575 StringRef CalleeName = getCalleeNameForAddress(TargetAddress); 576 if (CalleeName.size() == 0) 577 continue; 578 579 const MCDecodedPseudoProbe *CallProbe = 580 Binary->getCallProbeForAddr(SourceAddress); 581 if (CallProbe == nullptr) 582 continue; 583 584 // Record called target sample and its count. 585 SampleContextFrameVector FrameVec; 586 Binary->getInlineContextForProbe(CallProbe, FrameVec, true); 587 588 if (!FrameVec.empty()) { 589 FunctionSamples &FunctionProfile = 590 getLeafProfileAndAddTotalSamples(FrameVec, 0); 591 FunctionProfile.addCalledTargetSamples( 592 FrameVec.back().Location.LineOffset, 593 FrameVec.back().Location.Discriminator, 594 CalleeName, Count); 595 } 596 } 597 } 598 599 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples( 600 const SampleContextFrameVector &FrameVec, uint64_t Count) { 601 // Get top level profile 602 FunctionSamples *FunctionProfile = 603 &getTopLevelFunctionProfile(FrameVec[0].FuncName); 604 FunctionProfile->addTotalSamples(Count); 605 if (Binary->usePseudoProbes()) { 606 const auto *FuncDesc = Binary->getFuncDescForGUID( 607 Function::getGUID(FunctionProfile->getName())); 608 FunctionProfile->setFunctionHash(FuncDesc->FuncHash); 609 } 610 611 for (size_t I = 1; I < FrameVec.size(); I++) { 612 LineLocation Callsite( 613 FrameVec[I - 1].Location.LineOffset, 614 getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator)); 615 FunctionSamplesMap &SamplesMap = 616 FunctionProfile->functionSamplesAt(Callsite); 617 auto Ret = 618 SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples()); 619 if (Ret.second) { 620 SampleContext Context(FrameVec[I].FuncName); 621 Ret.first->second.setContext(Context); 622 } 623 FunctionProfile = &Ret.first->second; 624 FunctionProfile->addTotalSamples(Count); 625 if (Binary->usePseudoProbes()) { 626 const auto *FuncDesc = Binary->getFuncDescForGUID( 627 Function::getGUID(FunctionProfile->getName())); 628 FunctionProfile->setFunctionHash(FuncDesc->FuncHash); 629 } 630 } 631 632 return *FunctionProfile; 633 } 634 635 RangeSample 636 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { 637 RangeSample Ranges(RangeCounter.begin(), RangeCounter.end()); 638 if (FillZeroForAllFuncs) { 639 for (auto &FuncI : Binary->getAllBinaryFunctions()) { 640 for (auto &R : FuncI.second.Ranges) { 641 Ranges[{R.first, R.second - 1}] += 0; 642 } 643 } 644 } else { 645 // For each range, we search for all ranges of the function it belongs to 646 // and initialize it with zero count, so it remains zero if doesn't hit any 647 // samples. This is to be consistent with compiler that interpret zero count 648 // as unexecuted(cold). 649 for (const auto &I : RangeCounter) { 650 uint64_t StartAddress = I.first.first; 651 for (const auto &Range : Binary->getRanges(StartAddress)) 652 Ranges[{Range.first, Range.second - 1}] += 0; 653 } 654 } 655 RangeSample DisjointRanges; 656 findDisjointRanges(DisjointRanges, Ranges); 657 return DisjointRanges; 658 } 659 660 void ProfileGenerator::populateBodySamplesForAllFunctions( 661 const RangeSample &RangeCounter) { 662 for (const auto &Range : preprocessRangeCounter(RangeCounter)) { 663 uint64_t RangeBegin = Range.first.first; 664 uint64_t RangeEnd = Range.first.second; 665 uint64_t Count = Range.second; 666 667 InstructionPointer IP(Binary, RangeBegin, true); 668 // Disjoint ranges may have range in the middle of two instr, 669 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 670 // can be Addr1+1 to Addr2-1. We should ignore such range. 671 if (IP.Address > RangeEnd) 672 continue; 673 674 do { 675 const SampleContextFrameVector FrameVec = 676 Binary->getFrameLocationStack(IP.Address); 677 if (!FrameVec.empty()) { 678 // FIXME: As accumulating total count per instruction caused some 679 // regression, we changed to accumulate total count per byte as a 680 // workaround. Tuning hotness threshold on the compiler side might be 681 // necessary in the future. 682 FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples( 683 FrameVec, Count * Binary->getInstSize(IP.Address)); 684 updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), 685 Count); 686 } 687 } while (IP.advance() && IP.Address <= RangeEnd); 688 } 689 } 690 691 StringRef 692 ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) { 693 // Get the function range by branch target if it's a call branch. 694 auto *FRange = Binary->findFuncRangeForStartAddr(TargetAddress); 695 696 // We won't accumulate sample count for a range whose start is not the real 697 // function entry such as outlined function or inner labels. 698 if (!FRange || !FRange->IsFuncEntry) 699 return StringRef(); 700 701 return FunctionSamples::getCanonicalFnName(FRange->getFuncName()); 702 } 703 704 void ProfileGenerator::populateBoundarySamplesForAllFunctions( 705 const BranchSample &BranchCounters) { 706 for (const auto &Entry : BranchCounters) { 707 uint64_t SourceAddress = Entry.first.first; 708 uint64_t TargetAddress = Entry.first.second; 709 uint64_t Count = Entry.second; 710 assert(Count != 0 && "Unexpected zero weight branch"); 711 712 StringRef CalleeName = getCalleeNameForAddress(TargetAddress); 713 if (CalleeName.size() == 0) 714 continue; 715 // Record called target sample and its count. 716 const SampleContextFrameVector &FrameVec = 717 Binary->getCachedFrameLocationStack(SourceAddress); 718 if (!FrameVec.empty()) { 719 FunctionSamples &FunctionProfile = 720 getLeafProfileAndAddTotalSamples(FrameVec, 0); 721 FunctionProfile.addCalledTargetSamples( 722 FrameVec.back().Location.LineOffset, 723 getBaseDiscriminator(FrameVec.back().Location.Discriminator), 724 CalleeName, Count); 725 } 726 // Add head samples for callee. 727 FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName); 728 CalleeProfile.addHeadSamples(Count); 729 } 730 } 731 732 void ProfileGeneratorBase::calculateAndShowDensity( 733 const SampleProfileMap &Profiles) { 734 double Density = calculateDensity(Profiles, HotCountThreshold); 735 showDensitySuggestion(Density); 736 } 737 738 FunctionSamples * 739 CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode, 740 bool WasLeafInlined) { 741 FunctionSamples *FProfile = ContextNode->getFunctionSamples(); 742 if (!FProfile) { 743 FSamplesList.emplace_back(); 744 FProfile = &FSamplesList.back(); 745 FProfile->setName(ContextNode->getFuncName()); 746 ContextNode->setFunctionSamples(FProfile); 747 } 748 // Update ContextWasInlined attribute for existing contexts. 749 // The current function can be called in two ways: 750 // - when processing a probe of the current frame 751 // - when processing the entry probe of an inlinee's frame, which 752 // is then used to update the callsite count of the current frame. 753 // The two can happen in any order, hence here we are making sure 754 // `ContextWasInlined` is always set as expected. 755 // TODO: Note that the former does not always happen if no probes of the 756 // current frame has samples, and if the latter happens, we could lose the 757 // attribute. This should be fixed. 758 if (WasLeafInlined) 759 FProfile->getContext().setAttribute(ContextWasInlined); 760 return FProfile; 761 } 762 763 ContextTrieNode * 764 CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context, 765 bool WasLeafInlined) { 766 ContextTrieNode *ContextNode = 767 ContextTracker.getOrCreateContextPath(Context, true); 768 getOrCreateFunctionSamples(ContextNode, WasLeafInlined); 769 return ContextNode; 770 } 771 772 void CSProfileGenerator::generateProfile() { 773 FunctionSamples::ProfileIsCS = true; 774 775 collectProfiledFunctions(); 776 777 if (Binary->usePseudoProbes()) { 778 Binary->decodePseudoProbe(); 779 if (InferMissingFrames) 780 initializeMissingFrameInferrer(); 781 } 782 783 if (SampleCounters) { 784 if (Binary->usePseudoProbes()) { 785 generateProbeBasedProfile(); 786 } else { 787 generateLineNumBasedProfile(); 788 } 789 } 790 791 if (Binary->getTrackFuncContextSize()) 792 computeSizeForProfiledFunctions(); 793 794 postProcessProfiles(); 795 } 796 797 void CSProfileGenerator::initializeMissingFrameInferrer() { 798 Binary->getMissingContextInferrer()->initialize(SampleCounters); 799 } 800 801 void CSProfileGenerator::inferMissingFrames( 802 const SmallVectorImpl<uint64_t> &Context, 803 SmallVectorImpl<uint64_t> &NewContext) { 804 Binary->inferMissingFrames(Context, NewContext); 805 } 806 807 void CSProfileGenerator::computeSizeForProfiledFunctions() { 808 for (auto *Func : Binary->getProfiledFunctions()) 809 Binary->computeInlinedContextSizeForFunc(Func); 810 811 // Flush the symbolizer to save memory. 812 Binary->flushSymbolizer(); 813 } 814 815 void CSProfileGenerator::updateFunctionSamples() { 816 for (auto *Node : ContextTracker) { 817 FunctionSamples *FSamples = Node->getFunctionSamples(); 818 if (FSamples) { 819 if (UpdateTotalSamples) 820 FSamples->updateTotalSamples(); 821 FSamples->updateCallsiteSamples(); 822 } 823 } 824 } 825 826 void CSProfileGenerator::generateLineNumBasedProfile() { 827 for (const auto &CI : *SampleCounters) { 828 const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr()); 829 830 ContextTrieNode *ContextNode = &getRootContext(); 831 // Sample context will be empty if the jump is an external-to-internal call 832 // pattern, the head samples should be added for the internal function. 833 if (!CtxKey->Context.empty()) { 834 // Get or create function profile for the range 835 ContextNode = 836 getOrCreateContextNode(CtxKey->Context, CtxKey->WasLeafInlined); 837 // Fill in function body samples 838 populateBodySamplesForFunction(*ContextNode->getFunctionSamples(), 839 CI.second.RangeCounter); 840 } 841 // Fill in boundary sample counts as well as call site samples for calls 842 populateBoundarySamplesForFunction(ContextNode, CI.second.BranchCounter); 843 } 844 // Fill in call site value sample for inlined calls and also use context to 845 // infer missing samples. Since we don't have call count for inlined 846 // functions, we estimate it from inlinee's profile using the entry of the 847 // body sample. 848 populateInferredFunctionSamples(getRootContext()); 849 850 updateFunctionSamples(); 851 } 852 853 void CSProfileGenerator::populateBodySamplesForFunction( 854 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) { 855 // Compute disjoint ranges first, so we can use MAX 856 // for calculating count for each location. 857 RangeSample Ranges; 858 findDisjointRanges(Ranges, RangeCounter); 859 for (const auto &Range : Ranges) { 860 uint64_t RangeBegin = Range.first.first; 861 uint64_t RangeEnd = Range.first.second; 862 uint64_t Count = Range.second; 863 // Disjoint ranges have introduce zero-filled gap that 864 // doesn't belong to current context, filter them out. 865 if (Count == 0) 866 continue; 867 868 InstructionPointer IP(Binary, RangeBegin, true); 869 // Disjoint ranges may have range in the middle of two instr, 870 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 871 // can be Addr1+1 to Addr2-1. We should ignore such range. 872 if (IP.Address > RangeEnd) 873 continue; 874 875 do { 876 auto LeafLoc = Binary->getInlineLeafFrameLoc(IP.Address); 877 if (LeafLoc) { 878 // Recording body sample for this specific context 879 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); 880 FunctionProfile.addTotalSamples(Count); 881 } 882 } while (IP.advance() && IP.Address <= RangeEnd); 883 } 884 } 885 886 void CSProfileGenerator::populateBoundarySamplesForFunction( 887 ContextTrieNode *Node, const BranchSample &BranchCounters) { 888 889 for (const auto &Entry : BranchCounters) { 890 uint64_t SourceAddress = Entry.first.first; 891 uint64_t TargetAddress = Entry.first.second; 892 uint64_t Count = Entry.second; 893 assert(Count != 0 && "Unexpected zero weight branch"); 894 895 StringRef CalleeName = getCalleeNameForAddress(TargetAddress); 896 if (CalleeName.size() == 0) 897 continue; 898 899 ContextTrieNode *CallerNode = Node; 900 LineLocation CalleeCallSite(0, 0); 901 if (CallerNode != &getRootContext()) { 902 // Record called target sample and its count 903 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceAddress); 904 if (LeafLoc) { 905 CallerNode->getFunctionSamples()->addCalledTargetSamples( 906 LeafLoc->Location.LineOffset, 907 getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName, 908 Count); 909 // Record head sample for called target(callee) 910 CalleeCallSite = LeafLoc->Location; 911 } 912 } 913 914 ContextTrieNode *CalleeNode = 915 CallerNode->getOrCreateChildContext(CalleeCallSite, CalleeName); 916 FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(CalleeNode); 917 CalleeProfile->addHeadSamples(Count); 918 } 919 } 920 921 void CSProfileGenerator::populateInferredFunctionSamples( 922 ContextTrieNode &Node) { 923 // There is no call jmp sample between the inliner and inlinee, we need to use 924 // the inlinee's context to infer inliner's context, i.e. parent(inliner)'s 925 // sample depends on child(inlinee)'s sample, so traverse the tree in 926 // post-order. 927 for (auto &It : Node.getAllChildContext()) 928 populateInferredFunctionSamples(It.second); 929 930 FunctionSamples *CalleeProfile = Node.getFunctionSamples(); 931 if (!CalleeProfile) 932 return; 933 // If we already have head sample counts, we must have value profile 934 // for call sites added already. Skip to avoid double counting. 935 if (CalleeProfile->getHeadSamples()) 936 return; 937 ContextTrieNode *CallerNode = Node.getParentContext(); 938 // If we don't have context, nothing to do for caller's call site. 939 // This could happen for entry point function. 940 if (CallerNode == &getRootContext()) 941 return; 942 943 LineLocation CallerLeafFrameLoc = Node.getCallSiteLoc(); 944 FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode); 945 // Since we don't have call count for inlined functions, we 946 // estimate it from inlinee's profile using entry body sample. 947 uint64_t EstimatedCallCount = CalleeProfile->getHeadSamplesEstimate(); 948 // If we don't have samples with location, use 1 to indicate live. 949 if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size()) 950 EstimatedCallCount = 1; 951 CallerProfile.addCalledTargetSamples(CallerLeafFrameLoc.LineOffset, 952 CallerLeafFrameLoc.Discriminator, 953 Node.getFuncName(), EstimatedCallCount); 954 CallerProfile.addBodySamples(CallerLeafFrameLoc.LineOffset, 955 CallerLeafFrameLoc.Discriminator, 956 EstimatedCallCount); 957 CallerProfile.addTotalSamples(EstimatedCallCount); 958 } 959 960 void CSProfileGenerator::convertToProfileMap( 961 ContextTrieNode &Node, SampleContextFrameVector &Context) { 962 FunctionSamples *FProfile = Node.getFunctionSamples(); 963 if (FProfile) { 964 Context.emplace_back(Node.getFuncName(), LineLocation(0, 0)); 965 // Save the new context for future references. 966 SampleContextFrames NewContext = *Contexts.insert(Context).first; 967 auto Ret = ProfileMap.emplace(NewContext, std::move(*FProfile)); 968 FunctionSamples &NewProfile = Ret.first->second; 969 NewProfile.getContext().setContext(NewContext); 970 Context.pop_back(); 971 } 972 973 for (auto &It : Node.getAllChildContext()) { 974 ContextTrieNode &ChildNode = It.second; 975 Context.emplace_back(Node.getFuncName(), ChildNode.getCallSiteLoc()); 976 convertToProfileMap(ChildNode, Context); 977 Context.pop_back(); 978 } 979 } 980 981 void CSProfileGenerator::convertToProfileMap() { 982 assert(ProfileMap.empty() && 983 "ProfileMap should be empty before converting from the trie"); 984 assert(IsProfileValidOnTrie && 985 "Do not convert the trie twice, it's already destroyed"); 986 987 SampleContextFrameVector Context; 988 for (auto &It : getRootContext().getAllChildContext()) 989 convertToProfileMap(It.second, Context); 990 991 IsProfileValidOnTrie = false; 992 } 993 994 void CSProfileGenerator::postProcessProfiles() { 995 // Compute hot/cold threshold based on profile. This will be used for cold 996 // context profile merging/trimming. 997 computeSummaryAndThreshold(); 998 999 // Run global pre-inliner to adjust/merge context profile based on estimated 1000 // inline decisions. 1001 if (EnableCSPreInliner) { 1002 ContextTracker.populateFuncToCtxtMap(); 1003 CSPreInliner(ContextTracker, *Binary, Summary.get()).run(); 1004 // Turn off the profile merger by default unless it is explicitly enabled. 1005 if (!CSProfMergeColdContext.getNumOccurrences()) 1006 CSProfMergeColdContext = false; 1007 } 1008 1009 convertToProfileMap(); 1010 1011 // Trim and merge cold context profile using cold threshold above. 1012 if (TrimColdProfile || CSProfMergeColdContext) { 1013 SampleContextTrimmer(ProfileMap) 1014 .trimAndMergeColdContextProfiles( 1015 HotCountThreshold, TrimColdProfile, CSProfMergeColdContext, 1016 CSProfMaxColdContextDepth, EnableCSPreInliner); 1017 } 1018 1019 // Merge function samples of CS profile to calculate profile density. 1020 sampleprof::SampleProfileMap ContextLessProfiles; 1021 for (const auto &I : ProfileMap) { 1022 ContextLessProfiles[I.second.getName()].merge(I.second); 1023 } 1024 1025 calculateAndShowDensity(ContextLessProfiles); 1026 if (GenCSNestedProfile) { 1027 ProfileConverter CSConverter(ProfileMap); 1028 CSConverter.convertCSProfiles(); 1029 FunctionSamples::ProfileIsCS = false; 1030 } 1031 } 1032 1033 void ProfileGeneratorBase::computeSummaryAndThreshold( 1034 SampleProfileMap &Profiles) { 1035 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 1036 Summary = Builder.computeSummaryForProfiles(Profiles); 1037 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( 1038 (Summary->getDetailedSummary())); 1039 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( 1040 (Summary->getDetailedSummary())); 1041 } 1042 1043 void CSProfileGenerator::computeSummaryAndThreshold() { 1044 // Always merge and use context-less profile map to compute summary. 1045 SampleProfileMap ContextLessProfiles; 1046 ContextTracker.createContextLessProfileMap(ContextLessProfiles); 1047 1048 // Set the flag below to avoid merging the profile again in 1049 // computeSummaryAndThreshold 1050 FunctionSamples::ProfileIsCS = false; 1051 assert( 1052 (!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && 1053 "Don't set --profile-summary-contextless to false for profile " 1054 "generation"); 1055 ProfileGeneratorBase::computeSummaryAndThreshold(ContextLessProfiles); 1056 // Recover the old value. 1057 FunctionSamples::ProfileIsCS = true; 1058 } 1059 1060 void ProfileGeneratorBase::extractProbesFromRange( 1061 const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter, 1062 bool FindDisjointRanges) { 1063 const RangeSample *PRanges = &RangeCounter; 1064 RangeSample Ranges; 1065 if (FindDisjointRanges) { 1066 findDisjointRanges(Ranges, RangeCounter); 1067 PRanges = &Ranges; 1068 } 1069 1070 for (const auto &Range : *PRanges) { 1071 uint64_t RangeBegin = Range.first.first; 1072 uint64_t RangeEnd = Range.first.second; 1073 uint64_t Count = Range.second; 1074 1075 InstructionPointer IP(Binary, RangeBegin, true); 1076 // Disjoint ranges may have range in the middle of two instr, 1077 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 1078 // can be Addr1+1 to Addr2-1. We should ignore such range. 1079 if (IP.Address > RangeEnd) 1080 continue; 1081 1082 do { 1083 const AddressProbesMap &Address2ProbesMap = 1084 Binary->getAddress2ProbesMap(); 1085 auto It = Address2ProbesMap.find(IP.Address); 1086 if (It != Address2ProbesMap.end()) { 1087 for (const auto &Probe : It->second) { 1088 ProbeCounter[&Probe] += Count; 1089 } 1090 } 1091 } while (IP.advance() && IP.Address <= RangeEnd); 1092 } 1093 } 1094 1095 static void extractPrefixContextStack(SampleContextFrameVector &ContextStack, 1096 const SmallVectorImpl<uint64_t> &AddrVec, 1097 ProfiledBinary *Binary) { 1098 SmallVector<const MCDecodedPseudoProbe *, 16> Probes; 1099 for (auto Address : reverse(AddrVec)) { 1100 const MCDecodedPseudoProbe *CallProbe = 1101 Binary->getCallProbeForAddr(Address); 1102 // These could be the cases when a probe is not found at a calliste. Cutting 1103 // off the context from here since the inliner will not know how to consume 1104 // a context with unknown callsites. 1105 // 1. for functions that are not sampled when 1106 // --decode-probe-for-profiled-functions-only is on. 1107 // 2. for a merged callsite. Callsite merging may cause the loss of original 1108 // probe IDs. 1109 // 3. for an external callsite. 1110 if (!CallProbe) 1111 break; 1112 Probes.push_back(CallProbe); 1113 } 1114 1115 std::reverse(Probes.begin(), Probes.end()); 1116 1117 // Extract context stack for reusing, leaf context stack will be added 1118 // compressed while looking up function profile. 1119 for (const auto *P : Probes) { 1120 Binary->getInlineContextForProbe(P, ContextStack, true); 1121 } 1122 } 1123 1124 void CSProfileGenerator::generateProbeBasedProfile() { 1125 // Enable pseudo probe functionalities in SampleProf 1126 FunctionSamples::ProfileIsProbeBased = true; 1127 for (const auto &CI : *SampleCounters) { 1128 const AddrBasedCtxKey *CtxKey = 1129 dyn_cast<AddrBasedCtxKey>(CI.first.getPtr()); 1130 // Fill in function body samples from probes, also infer caller's samples 1131 // from callee's probe 1132 populateBodySamplesWithProbes(CI.second.RangeCounter, CtxKey); 1133 // Fill in boundary samples for a call probe 1134 populateBoundarySamplesWithProbes(CI.second.BranchCounter, CtxKey); 1135 } 1136 } 1137 1138 void CSProfileGenerator::populateBodySamplesWithProbes( 1139 const RangeSample &RangeCounter, const AddrBasedCtxKey *CtxKey) { 1140 ProbeCounterMap ProbeCounter; 1141 // Extract the top frame probes by looking up each address among the range in 1142 // the Address2ProbeMap 1143 extractProbesFromRange(RangeCounter, ProbeCounter); 1144 std::unordered_map<MCDecodedPseudoProbeInlineTree *, 1145 std::unordered_set<FunctionSamples *>> 1146 FrameSamples; 1147 for (const auto &PI : ProbeCounter) { 1148 const MCDecodedPseudoProbe *Probe = PI.first; 1149 uint64_t Count = PI.second; 1150 // Disjoint ranges have introduce zero-filled gap that 1151 // doesn't belong to current context, filter them out. 1152 if (!Probe->isBlock() || Count == 0) 1153 continue; 1154 1155 ContextTrieNode *ContextNode = getContextNodeForLeafProbe(CtxKey, Probe); 1156 FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples(); 1157 // Record the current frame and FunctionProfile whenever samples are 1158 // collected for non-danglie probes. This is for reporting all of the 1159 // zero count probes of the frame later. 1160 FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile); 1161 FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(), 1162 Count); 1163 FunctionProfile.addTotalSamples(Count); 1164 if (Probe->isEntry()) { 1165 FunctionProfile.addHeadSamples(Count); 1166 // Look up for the caller's function profile 1167 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); 1168 ContextTrieNode *CallerNode = ContextNode->getParentContext(); 1169 if (InlinerDesc != nullptr && CallerNode != &getRootContext()) { 1170 // Since the context id will be compressed, we have to use callee's 1171 // context id to infer caller's context id to ensure they share the 1172 // same context prefix. 1173 uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset; 1174 uint64_t CallerDiscriminator = ContextNode->getCallSiteLoc().Discriminator; 1175 assert(CallerIndex && 1176 "Inferred caller's location index shouldn't be zero!"); 1177 assert(!CallerDiscriminator && 1178 "Callsite probe should not have a discriminator!"); 1179 FunctionSamples &CallerProfile = 1180 *getOrCreateFunctionSamples(CallerNode); 1181 CallerProfile.setFunctionHash(InlinerDesc->FuncHash); 1182 CallerProfile.addBodySamples(CallerIndex, CallerDiscriminator, Count); 1183 CallerProfile.addTotalSamples(Count); 1184 CallerProfile.addCalledTargetSamples(CallerIndex, CallerDiscriminator, 1185 ContextNode->getFuncName(), Count); 1186 } 1187 } 1188 } 1189 1190 // Assign zero count for remaining probes without sample hits to 1191 // differentiate from probes optimized away, of which the counts are unknown 1192 // and will be inferred by the compiler. 1193 for (auto &I : FrameSamples) { 1194 for (auto *FunctionProfile : I.second) { 1195 for (auto *Probe : I.first->getProbes()) { 1196 FunctionProfile->addBodySamples(Probe->getIndex(), 1197 Probe->getDiscriminator(), 0); 1198 } 1199 } 1200 } 1201 } 1202 1203 void CSProfileGenerator::populateBoundarySamplesWithProbes( 1204 const BranchSample &BranchCounter, const AddrBasedCtxKey *CtxKey) { 1205 for (const auto &BI : BranchCounter) { 1206 uint64_t SourceAddress = BI.first.first; 1207 uint64_t TargetAddress = BI.first.second; 1208 uint64_t Count = BI.second; 1209 const MCDecodedPseudoProbe *CallProbe = 1210 Binary->getCallProbeForAddr(SourceAddress); 1211 if (CallProbe == nullptr) 1212 continue; 1213 FunctionSamples &FunctionProfile = 1214 getFunctionProfileForLeafProbe(CtxKey, CallProbe); 1215 FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count); 1216 FunctionProfile.addTotalSamples(Count); 1217 StringRef CalleeName = getCalleeNameForAddress(TargetAddress); 1218 if (CalleeName.size() == 0) 1219 continue; 1220 FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 1221 CallProbe->getDiscriminator(), 1222 CalleeName, Count); 1223 } 1224 } 1225 1226 ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe( 1227 const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) { 1228 1229 const SmallVectorImpl<uint64_t> *PContext = &CtxKey->Context; 1230 SmallVector<uint64_t, 16> NewContext; 1231 1232 if (InferMissingFrames) { 1233 SmallVector<uint64_t, 16> Context = CtxKey->Context; 1234 // Append leaf frame for a complete inference. 1235 Context.push_back(LeafProbe->getAddress()); 1236 inferMissingFrames(Context, NewContext); 1237 // Pop out the leaf probe that was pushed in above. 1238 NewContext.pop_back(); 1239 PContext = &NewContext; 1240 } 1241 1242 SampleContextFrameVector ContextStack; 1243 extractPrefixContextStack(ContextStack, *PContext, Binary); 1244 1245 // Explicitly copy the context for appending the leaf context 1246 SampleContextFrameVector NewContextStack(ContextStack.begin(), 1247 ContextStack.end()); 1248 Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true); 1249 // For leaf inlined context with the top frame, we should strip off the top 1250 // frame's probe id, like: 1251 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" 1252 auto LeafFrame = NewContextStack.back(); 1253 LeafFrame.Location = LineLocation(0, 0); 1254 NewContextStack.pop_back(); 1255 // Compress the context string except for the leaf frame 1256 CSProfileGenerator::compressRecursionContext(NewContextStack); 1257 CSProfileGenerator::trimContext(NewContextStack); 1258 NewContextStack.push_back(LeafFrame); 1259 1260 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid()); 1261 bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); 1262 ContextTrieNode *ContextNode = 1263 getOrCreateContextNode(NewContextStack, WasLeafInlined); 1264 ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash); 1265 return ContextNode; 1266 } 1267 1268 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( 1269 const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) { 1270 return *getContextNodeForLeafProbe(CtxKey, LeafProbe)->getFunctionSamples(); 1271 } 1272 1273 } // end namespace sampleprof 1274 } // end namespace llvm 1275