1 //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "ProfileGenerator.h" 9 #include "ErrorHandling.h" 10 #include "PerfReader.h" 11 #include "ProfiledBinary.h" 12 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 13 #include "llvm/ProfileData/ProfileCommon.h" 14 #include <algorithm> 15 #include <float.h> 16 #include <unordered_set> 17 #include <utility> 18 19 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), 20 cl::Required, 21 cl::desc("Output profile file")); 22 static cl::alias OutputA("o", cl::desc("Alias for --output"), 23 cl::aliasopt(OutputFilename)); 24 25 static cl::opt<SampleProfileFormat> OutputFormat( 26 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary), 27 cl::values( 28 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"), 29 clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"), 30 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"), 31 clEnumValN(SPF_Text, "text", "Text encoding"), 32 clEnumValN(SPF_GCC, "gcc", 33 "GCC encoding (only meaningful for -sample)"))); 34 35 cl::opt<bool> UseMD5( 36 "use-md5", cl::init(false), cl::Hidden, 37 cl::desc("Use md5 to represent function names in the output profile (only " 38 "meaningful for -extbinary)")); 39 40 static cl::opt<bool> PopulateProfileSymbolList( 41 "populate-profile-symbol-list", cl::init(false), cl::Hidden, 42 cl::desc("Populate profile symbol list (only meaningful for -extbinary)")); 43 44 static cl::opt<bool> FillZeroForAllFuncs( 45 "fill-zero-for-all-funcs", cl::init(false), cl::Hidden, 46 cl::desc("Attribute all functions' range with zero count " 47 "even it's not hit by any samples.")); 48 49 static cl::opt<int32_t, true> RecursionCompression( 50 "compress-recursion", 51 cl::desc("Compressing recursion by deduplicating adjacent frame " 52 "sequences up to the specified size. -1 means no size limit."), 53 cl::Hidden, 54 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); 55 56 static cl::opt<bool> 57 TrimColdProfile("trim-cold-profile", 58 cl::desc("If the total count of the profile is smaller " 59 "than threshold, it will be trimmed.")); 60 61 static cl::opt<bool> CSProfMergeColdContext( 62 "csprof-merge-cold-context", cl::init(true), 63 cl::desc("If the total count of context profile is smaller than " 64 "the threshold, it will be merged into context-less base " 65 "profile.")); 66 67 static cl::opt<uint32_t> CSProfMaxColdContextDepth( 68 "csprof-max-cold-context-depth", cl::init(1), 69 cl::desc("Keep the last K contexts while merging cold profile. 1 means the " 70 "context-less base profile")); 71 72 static cl::opt<int, true> CSProfMaxContextDepth( 73 "csprof-max-context-depth", 74 cl::desc("Keep the last K contexts while merging profile. -1 means no " 75 "depth limit."), 76 cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth)); 77 78 static cl::opt<double> HotFunctionDensityThreshold( 79 "hot-function-density-threshold", llvm::cl::init(1000), 80 llvm::cl::desc( 81 "specify density threshold for hot functions (default: 1000)"), 82 llvm::cl::Optional); 83 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false), 84 llvm::cl::desc("show profile density details"), 85 llvm::cl::Optional); 86 87 static cl::opt<bool> UpdateTotalSamples( 88 "update-total-samples", llvm::cl::init(false), 89 llvm::cl::desc( 90 "Update total samples by accumulating all its body samples."), 91 llvm::cl::Optional); 92 93 extern cl::opt<int> ProfileSummaryCutoffHot; 94 extern cl::opt<bool> UseContextLessSummary; 95 96 static cl::opt<bool> GenCSNestedProfile( 97 "gen-cs-nested-profile", cl::Hidden, cl::init(true), 98 cl::desc("Generate nested function profiles for CSSPGO")); 99 100 using namespace llvm; 101 using namespace sampleprof; 102 103 namespace llvm { 104 namespace sampleprof { 105 106 // Initialize the MaxCompressionSize to -1 which means no size limit 107 int32_t CSProfileGenerator::MaxCompressionSize = -1; 108 109 int CSProfileGenerator::MaxContextDepth = -1; 110 111 bool ProfileGeneratorBase::UseFSDiscriminator = false; 112 113 std::unique_ptr<ProfileGeneratorBase> 114 ProfileGeneratorBase::create(ProfiledBinary *Binary, 115 const ContextSampleCounterMap *SampleCounters, 116 bool ProfileIsCS) { 117 std::unique_ptr<ProfileGeneratorBase> Generator; 118 if (ProfileIsCS) { 119 if (Binary->useFSDiscriminator()) 120 exitWithError("FS discriminator is not supported in CS profile."); 121 Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); 122 } else { 123 Generator.reset(new ProfileGenerator(Binary, SampleCounters)); 124 } 125 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); 126 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); 127 128 return Generator; 129 } 130 131 std::unique_ptr<ProfileGeneratorBase> 132 ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles, 133 bool ProfileIsCS) { 134 std::unique_ptr<ProfileGeneratorBase> Generator; 135 if (ProfileIsCS) { 136 if (Binary->useFSDiscriminator()) 137 exitWithError("FS discriminator is not supported in CS profile."); 138 Generator.reset(new CSProfileGenerator(Binary, Profiles)); 139 } else { 140 Generator.reset(new ProfileGenerator(Binary, std::move(Profiles))); 141 } 142 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); 143 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); 144 145 return Generator; 146 } 147 148 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer, 149 SampleProfileMap &ProfileMap) { 150 // Populate profile symbol list if extended binary format is used. 151 ProfileSymbolList SymbolList; 152 153 if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) { 154 Binary->populateSymbolListFromDWARF(SymbolList); 155 Writer->setProfileSymbolList(&SymbolList); 156 } 157 158 if (std::error_code EC = Writer->write(ProfileMap)) 159 exitWithError(std::move(EC)); 160 } 161 162 void ProfileGeneratorBase::write() { 163 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); 164 if (std::error_code EC = WriterOrErr.getError()) 165 exitWithError(EC, OutputFilename); 166 167 if (UseMD5) { 168 if (OutputFormat != SPF_Ext_Binary) 169 WithColor::warning() << "-use-md5 is ignored. Specify " 170 "--format=extbinary to enable it\n"; 171 else 172 WriterOrErr.get()->setUseMD5(); 173 } 174 175 write(std::move(WriterOrErr.get()), ProfileMap); 176 } 177 178 void ProfileGeneratorBase::showDensitySuggestion(double Density) { 179 if (Density == 0.0) 180 WithColor::warning() << "The --profile-summary-cutoff-hot option may be " 181 "set too low. Please check your command.\n"; 182 else if (Density < HotFunctionDensityThreshold) 183 WithColor::warning() 184 << "AutoFDO is estimated to optimize better with " 185 << format("%.1f", HotFunctionDensityThreshold / Density) 186 << "x more samples. Please consider increasing sampling rate or " 187 "profiling for longer duration to get more samples.\n"; 188 189 if (ShowDensity) 190 outs() << "Minimum profile density for hot functions with top " 191 << format("%.2f", 192 static_cast<double>(ProfileSummaryCutoffHot.getValue()) / 193 10000) 194 << "% total samples: " << format("%.1f", Density) << "\n"; 195 } 196 197 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles, 198 uint64_t HotCntThreshold) { 199 double Density = DBL_MAX; 200 std::vector<const FunctionSamples *> HotFuncs; 201 for (auto &I : Profiles) { 202 auto &FuncSamples = I.second; 203 if (FuncSamples.getTotalSamples() < HotCntThreshold) 204 continue; 205 HotFuncs.emplace_back(&FuncSamples); 206 } 207 208 for (auto *FuncSamples : HotFuncs) { 209 auto *Func = Binary->getBinaryFunction(FuncSamples->getName()); 210 if (!Func) 211 continue; 212 uint64_t FuncSize = Func->getFuncSize(); 213 if (FuncSize == 0) 214 continue; 215 Density = 216 std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) / 217 FuncSize); 218 } 219 220 return Density == DBL_MAX ? 0.0 : Density; 221 } 222 223 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges, 224 const RangeSample &Ranges) { 225 226 /* 227 Regions may overlap with each other. Using the boundary info, find all 228 disjoint ranges and their sample count. BoundaryPoint contains the count 229 multiple samples begin/end at this points. 230 231 |<--100-->| Sample1 232 |<------200------>| Sample2 233 A B C 234 235 In the example above, 236 Sample1 begins at A, ends at B, its value is 100. 237 Sample2 beings at A, ends at C, its value is 200. 238 For A, BeginCount is the sum of sample begins at A, which is 300 and no 239 samples ends at A, so EndCount is 0. 240 Then boundary points A, B, and C with begin/end counts are: 241 A: (300, 0) 242 B: (0, 100) 243 C: (0, 200) 244 */ 245 struct BoundaryPoint { 246 // Sum of sample counts beginning at this point 247 uint64_t BeginCount = UINT64_MAX; 248 // Sum of sample counts ending at this point 249 uint64_t EndCount = UINT64_MAX; 250 // Is the begin point of a zero range. 251 bool IsZeroRangeBegin = false; 252 // Is the end point of a zero range. 253 bool IsZeroRangeEnd = false; 254 255 void addBeginCount(uint64_t Count) { 256 if (BeginCount == UINT64_MAX) 257 BeginCount = 0; 258 BeginCount += Count; 259 } 260 261 void addEndCount(uint64_t Count) { 262 if (EndCount == UINT64_MAX) 263 EndCount = 0; 264 EndCount += Count; 265 } 266 }; 267 268 /* 269 For the above example. With boundary points, follwing logic finds two 270 disjoint region of 271 272 [A,B]: 300 273 [B+1,C]: 200 274 275 If there is a boundary point that both begin and end, the point itself 276 becomes a separate disjoint region. For example, if we have original 277 ranges of 278 279 |<--- 100 --->| 280 |<--- 200 --->| 281 A B C 282 283 there are three boundary points with their begin/end counts of 284 285 A: (100, 0) 286 B: (200, 100) 287 C: (0, 200) 288 289 the disjoint ranges would be 290 291 [A, B-1]: 100 292 [B, B]: 300 293 [B+1, C]: 200. 294 295 Example for zero value range: 296 297 |<--- 100 --->| 298 |<--- 200 --->| 299 |<--------------- 0 ----------------->| 300 A B C D E F 301 302 [A, B-1] : 0 303 [B, C] : 100 304 [C+1, D-1]: 0 305 [D, E] : 200 306 [E+1, F] : 0 307 */ 308 std::map<uint64_t, BoundaryPoint> Boundaries; 309 310 for (const auto &Item : Ranges) { 311 assert(Item.first.first <= Item.first.second && 312 "Invalid instruction range"); 313 auto &BeginPoint = Boundaries[Item.first.first]; 314 auto &EndPoint = Boundaries[Item.first.second]; 315 uint64_t Count = Item.second; 316 317 BeginPoint.addBeginCount(Count); 318 EndPoint.addEndCount(Count); 319 if (Count == 0) { 320 BeginPoint.IsZeroRangeBegin = true; 321 EndPoint.IsZeroRangeEnd = true; 322 } 323 } 324 325 // Use UINT64_MAX to indicate there is no existing range between BeginAddress 326 // and the next valid address 327 uint64_t BeginAddress = UINT64_MAX; 328 int ZeroRangeDepth = 0; 329 uint64_t Count = 0; 330 for (const auto &Item : Boundaries) { 331 uint64_t Address = Item.first; 332 const BoundaryPoint &Point = Item.second; 333 if (Point.BeginCount != UINT64_MAX) { 334 if (BeginAddress != UINT64_MAX) 335 DisjointRanges[{BeginAddress, Address - 1}] = Count; 336 Count += Point.BeginCount; 337 BeginAddress = Address; 338 ZeroRangeDepth += Point.IsZeroRangeBegin; 339 } 340 if (Point.EndCount != UINT64_MAX) { 341 assert((BeginAddress != UINT64_MAX) && 342 "First boundary point cannot be 'end' point"); 343 DisjointRanges[{BeginAddress, Address}] = Count; 344 assert(Count >= Point.EndCount && "Mismatched live ranges"); 345 Count -= Point.EndCount; 346 BeginAddress = Address + 1; 347 ZeroRangeDepth -= Point.IsZeroRangeEnd; 348 // If the remaining count is zero and it's no longer in a zero range, this 349 // means we consume all the ranges before, thus mark BeginAddress as 350 // UINT64_MAX. e.g. supposing we have two non-overlapping ranges: 351 // [<---- 10 ---->] 352 // [<---- 20 ---->] 353 // A B C D 354 // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't 355 // have the [B+1, C-1] zero range. 356 if (Count == 0 && ZeroRangeDepth == 0) 357 BeginAddress = UINT64_MAX; 358 } 359 } 360 } 361 362 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( 363 FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, 364 uint64_t Count) { 365 // Use the maximum count of samples with same line location 366 uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator); 367 368 // Use duplication factor to compensated for loop unroll/vectorization. 369 // Note that this is only needed when we're taking MAX of the counts at 370 // the location instead of SUM. 371 Count *= getDuplicationFactor(LeafLoc.Location.Discriminator); 372 373 ErrorOr<uint64_t> R = 374 FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator); 375 376 uint64_t PreviousCount = R ? R.get() : 0; 377 if (PreviousCount <= Count) { 378 FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator, 379 Count - PreviousCount); 380 } 381 } 382 383 void ProfileGeneratorBase::updateTotalSamples() { 384 for (auto &Item : ProfileMap) { 385 FunctionSamples &FunctionProfile = Item.second; 386 FunctionProfile.updateTotalSamples(); 387 } 388 } 389 390 void ProfileGeneratorBase::updateCallsiteSamples() { 391 for (auto &Item : ProfileMap) { 392 FunctionSamples &FunctionProfile = Item.second; 393 FunctionProfile.updateCallsiteSamples(); 394 } 395 } 396 397 void ProfileGeneratorBase::updateFunctionSamples() { 398 updateCallsiteSamples(); 399 400 if (UpdateTotalSamples) 401 updateTotalSamples(); 402 } 403 404 void ProfileGeneratorBase::collectProfiledFunctions() { 405 std::unordered_set<const BinaryFunction *> ProfiledFunctions; 406 if (collectFunctionsFromRawProfile(ProfiledFunctions)) 407 Binary->setProfiledFunctions(ProfiledFunctions); 408 else if (collectFunctionsFromLLVMProfile(ProfiledFunctions)) 409 Binary->setProfiledFunctions(ProfiledFunctions); 410 else 411 llvm_unreachable("Unsupported input profile"); 412 } 413 414 bool ProfileGeneratorBase::collectFunctionsFromRawProfile( 415 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) { 416 if (!SampleCounters) 417 return false; 418 // Go through all the stacks, ranges and branches in sample counters, use 419 // the start of the range to look up the function it belongs and record the 420 // function. 421 for (const auto &CI : *SampleCounters) { 422 if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) { 423 for (auto StackAddr : CtxKey->Context) { 424 if (FuncRange *FRange = Binary->findFuncRange(StackAddr)) 425 ProfiledFunctions.insert(FRange->Func); 426 } 427 } 428 429 for (auto Item : CI.second.RangeCounter) { 430 uint64_t StartAddress = Item.first.first; 431 if (FuncRange *FRange = Binary->findFuncRange(StartAddress)) 432 ProfiledFunctions.insert(FRange->Func); 433 } 434 435 for (auto Item : CI.second.BranchCounter) { 436 uint64_t SourceAddress = Item.first.first; 437 uint64_t TargetAddress = Item.first.first; 438 if (FuncRange *FRange = Binary->findFuncRange(SourceAddress)) 439 ProfiledFunctions.insert(FRange->Func); 440 if (FuncRange *FRange = Binary->findFuncRange(TargetAddress)) 441 ProfiledFunctions.insert(FRange->Func); 442 } 443 } 444 return true; 445 } 446 447 bool ProfileGenerator::collectFunctionsFromLLVMProfile( 448 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) { 449 for (const auto &FS : ProfileMap) { 450 if (auto *Func = Binary->getBinaryFunction(FS.first.getName())) 451 ProfiledFunctions.insert(Func); 452 } 453 return true; 454 } 455 456 bool CSProfileGenerator::collectFunctionsFromLLVMProfile( 457 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) { 458 for (auto *Node : ContextTracker) { 459 if (!Node->getFuncName().empty()) 460 if (auto *Func = Binary->getBinaryFunction(Node->getFuncName())) 461 ProfiledFunctions.insert(Func); 462 } 463 return true; 464 } 465 466 FunctionSamples & 467 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) { 468 SampleContext Context(FuncName); 469 auto Ret = ProfileMap.emplace(Context, FunctionSamples()); 470 if (Ret.second) { 471 FunctionSamples &FProfile = Ret.first->second; 472 FProfile.setContext(Context); 473 } 474 return Ret.first->second; 475 } 476 477 void ProfileGenerator::generateProfile() { 478 collectProfiledFunctions(); 479 480 if (Binary->usePseudoProbes()) 481 Binary->decodePseudoProbe(); 482 483 if (SampleCounters) { 484 if (Binary->usePseudoProbes()) { 485 generateProbeBasedProfile(); 486 } else { 487 generateLineNumBasedProfile(); 488 } 489 } 490 491 postProcessProfiles(); 492 } 493 494 void ProfileGenerator::postProcessProfiles() { 495 computeSummaryAndThreshold(ProfileMap); 496 trimColdProfiles(ProfileMap, ColdCountThreshold); 497 calculateAndShowDensity(ProfileMap); 498 } 499 500 void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles, 501 uint64_t ColdCntThreshold) { 502 if (!TrimColdProfile) 503 return; 504 505 // Move cold profiles into a tmp container. 506 std::vector<SampleContext> ColdProfiles; 507 for (const auto &I : ProfileMap) { 508 if (I.second.getTotalSamples() < ColdCntThreshold) 509 ColdProfiles.emplace_back(I.first); 510 } 511 512 // Remove the cold profile from ProfileMap. 513 for (const auto &I : ColdProfiles) 514 ProfileMap.erase(I); 515 } 516 517 void ProfileGenerator::generateLineNumBasedProfile() { 518 assert(SampleCounters->size() == 1 && 519 "Must have one entry for profile generation."); 520 const SampleCounter &SC = SampleCounters->begin()->second; 521 // Fill in function body samples 522 populateBodySamplesForAllFunctions(SC.RangeCounter); 523 // Fill in boundary sample counts as well as call site samples for calls 524 populateBoundarySamplesForAllFunctions(SC.BranchCounter); 525 526 updateFunctionSamples(); 527 } 528 529 void ProfileGenerator::generateProbeBasedProfile() { 530 assert(SampleCounters->size() == 1 && 531 "Must have one entry for profile generation."); 532 // Enable pseudo probe functionalities in SampleProf 533 FunctionSamples::ProfileIsProbeBased = true; 534 const SampleCounter &SC = SampleCounters->begin()->second; 535 // Fill in function body samples 536 populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter); 537 // Fill in boundary sample counts as well as call site samples for calls 538 populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter); 539 540 updateFunctionSamples(); 541 } 542 543 void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions( 544 const RangeSample &RangeCounter) { 545 ProbeCounterMap ProbeCounter; 546 // preprocessRangeCounter returns disjoint ranges, so no longer to redo it 547 // inside extractProbesFromRange. 548 extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter, 549 false); 550 551 for (const auto &PI : ProbeCounter) { 552 const MCDecodedPseudoProbe *Probe = PI.first; 553 uint64_t Count = PI.second; 554 SampleContextFrameVector FrameVec; 555 Binary->getInlineContextForProbe(Probe, FrameVec, true); 556 FunctionSamples &FunctionProfile = 557 getLeafProfileAndAddTotalSamples(FrameVec, Count); 558 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); 559 if (Probe->isEntry()) 560 FunctionProfile.addHeadSamples(Count); 561 } 562 } 563 564 void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions( 565 const BranchSample &BranchCounters) { 566 for (const auto &Entry : BranchCounters) { 567 uint64_t SourceAddress = Entry.first.first; 568 uint64_t TargetAddress = Entry.first.second; 569 uint64_t Count = Entry.second; 570 assert(Count != 0 && "Unexpected zero weight branch"); 571 572 StringRef CalleeName = getCalleeNameForAddress(TargetAddress); 573 if (CalleeName.size() == 0) 574 continue; 575 576 const MCDecodedPseudoProbe *CallProbe = 577 Binary->getCallProbeForAddr(SourceAddress); 578 if (CallProbe == nullptr) 579 continue; 580 581 // Record called target sample and its count. 582 SampleContextFrameVector FrameVec; 583 Binary->getInlineContextForProbe(CallProbe, FrameVec, true); 584 585 if (!FrameVec.empty()) { 586 FunctionSamples &FunctionProfile = 587 getLeafProfileAndAddTotalSamples(FrameVec, 0); 588 FunctionProfile.addCalledTargetSamples( 589 FrameVec.back().Location.LineOffset, 0, CalleeName, Count); 590 } 591 } 592 } 593 594 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples( 595 const SampleContextFrameVector &FrameVec, uint64_t Count) { 596 // Get top level profile 597 FunctionSamples *FunctionProfile = 598 &getTopLevelFunctionProfile(FrameVec[0].FuncName); 599 FunctionProfile->addTotalSamples(Count); 600 if (Binary->usePseudoProbes()) { 601 const auto *FuncDesc = Binary->getFuncDescForGUID( 602 Function::getGUID(FunctionProfile->getName())); 603 FunctionProfile->setFunctionHash(FuncDesc->FuncHash); 604 } 605 606 for (size_t I = 1; I < FrameVec.size(); I++) { 607 LineLocation Callsite( 608 FrameVec[I - 1].Location.LineOffset, 609 getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator)); 610 FunctionSamplesMap &SamplesMap = 611 FunctionProfile->functionSamplesAt(Callsite); 612 auto Ret = 613 SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples()); 614 if (Ret.second) { 615 SampleContext Context(FrameVec[I].FuncName); 616 Ret.first->second.setContext(Context); 617 } 618 FunctionProfile = &Ret.first->second; 619 FunctionProfile->addTotalSamples(Count); 620 if (Binary->usePseudoProbes()) { 621 const auto *FuncDesc = Binary->getFuncDescForGUID( 622 Function::getGUID(FunctionProfile->getName())); 623 FunctionProfile->setFunctionHash(FuncDesc->FuncHash); 624 } 625 } 626 627 return *FunctionProfile; 628 } 629 630 RangeSample 631 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { 632 RangeSample Ranges(RangeCounter.begin(), RangeCounter.end()); 633 if (FillZeroForAllFuncs) { 634 for (auto &FuncI : Binary->getAllBinaryFunctions()) { 635 for (auto &R : FuncI.second.Ranges) { 636 Ranges[{R.first, R.second - 1}] += 0; 637 } 638 } 639 } else { 640 // For each range, we search for all ranges of the function it belongs to 641 // and initialize it with zero count, so it remains zero if doesn't hit any 642 // samples. This is to be consistent with compiler that interpret zero count 643 // as unexecuted(cold). 644 for (const auto &I : RangeCounter) { 645 uint64_t StartAddress = I.first.first; 646 for (const auto &Range : Binary->getRanges(StartAddress)) 647 Ranges[{Range.first, Range.second - 1}] += 0; 648 } 649 } 650 RangeSample DisjointRanges; 651 findDisjointRanges(DisjointRanges, Ranges); 652 return DisjointRanges; 653 } 654 655 void ProfileGenerator::populateBodySamplesForAllFunctions( 656 const RangeSample &RangeCounter) { 657 for (const auto &Range : preprocessRangeCounter(RangeCounter)) { 658 uint64_t RangeBegin = Range.first.first; 659 uint64_t RangeEnd = Range.first.second; 660 uint64_t Count = Range.second; 661 662 InstructionPointer IP(Binary, RangeBegin, true); 663 // Disjoint ranges may have range in the middle of two instr, 664 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 665 // can be Addr1+1 to Addr2-1. We should ignore such range. 666 if (IP.Address > RangeEnd) 667 continue; 668 669 do { 670 const SampleContextFrameVector FrameVec = 671 Binary->getFrameLocationStack(IP.Address); 672 if (!FrameVec.empty()) { 673 // FIXME: As accumulating total count per instruction caused some 674 // regression, we changed to accumulate total count per byte as a 675 // workaround. Tuning hotness threshold on the compiler side might be 676 // necessary in the future. 677 FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples( 678 FrameVec, Count * Binary->getInstSize(IP.Address)); 679 updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), 680 Count); 681 } 682 } while (IP.advance() && IP.Address <= RangeEnd); 683 } 684 } 685 686 StringRef 687 ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) { 688 // Get the function range by branch target if it's a call branch. 689 auto *FRange = Binary->findFuncRangeForStartAddr(TargetAddress); 690 691 // We won't accumulate sample count for a range whose start is not the real 692 // function entry such as outlined function or inner labels. 693 if (!FRange || !FRange->IsFuncEntry) 694 return StringRef(); 695 696 return FunctionSamples::getCanonicalFnName(FRange->getFuncName()); 697 } 698 699 void ProfileGenerator::populateBoundarySamplesForAllFunctions( 700 const BranchSample &BranchCounters) { 701 for (const auto &Entry : BranchCounters) { 702 uint64_t SourceAddress = Entry.first.first; 703 uint64_t TargetAddress = Entry.first.second; 704 uint64_t Count = Entry.second; 705 assert(Count != 0 && "Unexpected zero weight branch"); 706 707 StringRef CalleeName = getCalleeNameForAddress(TargetAddress); 708 if (CalleeName.size() == 0) 709 continue; 710 // Record called target sample and its count. 711 const SampleContextFrameVector &FrameVec = 712 Binary->getCachedFrameLocationStack(SourceAddress); 713 if (!FrameVec.empty()) { 714 FunctionSamples &FunctionProfile = 715 getLeafProfileAndAddTotalSamples(FrameVec, 0); 716 FunctionProfile.addCalledTargetSamples( 717 FrameVec.back().Location.LineOffset, 718 getBaseDiscriminator(FrameVec.back().Location.Discriminator), 719 CalleeName, Count); 720 } 721 // Add head samples for callee. 722 FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName); 723 CalleeProfile.addHeadSamples(Count); 724 } 725 } 726 727 void ProfileGeneratorBase::calculateAndShowDensity( 728 const SampleProfileMap &Profiles) { 729 double Density = calculateDensity(Profiles, HotCountThreshold); 730 showDensitySuggestion(Density); 731 } 732 733 FunctionSamples * 734 CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode, 735 bool WasLeafInlined) { 736 FunctionSamples *FProfile = ContextNode->getFunctionSamples(); 737 if (!FProfile) { 738 FSamplesList.emplace_back(); 739 FProfile = &FSamplesList.back(); 740 FProfile->setName(ContextNode->getFuncName()); 741 ContextNode->setFunctionSamples(FProfile); 742 } 743 // Update ContextWasInlined attribute for existing contexts. 744 // The current function can be called in two ways: 745 // - when processing a probe of the current frame 746 // - when processing the entry probe of an inlinee's frame, which 747 // is then used to update the callsite count of the current frame. 748 // The two can happen in any order, hence here we are making sure 749 // `ContextWasInlined` is always set as expected. 750 // TODO: Note that the former does not always happen if no probes of the 751 // current frame has samples, and if the latter happens, we could lose the 752 // attribute. This should be fixed. 753 if (WasLeafInlined) 754 FProfile->getContext().setAttribute(ContextWasInlined); 755 return FProfile; 756 } 757 758 ContextTrieNode * 759 CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context, 760 bool WasLeafInlined) { 761 ContextTrieNode *ContextNode = 762 ContextTracker.getOrCreateContextPath(Context, true); 763 getOrCreateFunctionSamples(ContextNode, WasLeafInlined); 764 return ContextNode; 765 } 766 767 void CSProfileGenerator::generateProfile() { 768 FunctionSamples::ProfileIsCS = true; 769 770 collectProfiledFunctions(); 771 772 if (Binary->usePseudoProbes()) 773 Binary->decodePseudoProbe(); 774 775 if (SampleCounters) { 776 if (Binary->usePseudoProbes()) { 777 generateProbeBasedProfile(); 778 } else { 779 generateLineNumBasedProfile(); 780 } 781 } 782 783 if (Binary->getTrackFuncContextSize()) 784 computeSizeForProfiledFunctions(); 785 786 postProcessProfiles(); 787 } 788 789 void CSProfileGenerator::computeSizeForProfiledFunctions() { 790 for (auto *Func : Binary->getProfiledFunctions()) 791 Binary->computeInlinedContextSizeForFunc(Func); 792 793 // Flush the symbolizer to save memory. 794 Binary->flushSymbolizer(); 795 } 796 797 void CSProfileGenerator::updateFunctionSamples() { 798 for (auto *Node : ContextTracker) { 799 FunctionSamples *FSamples = Node->getFunctionSamples(); 800 if (FSamples) { 801 if (UpdateTotalSamples) 802 FSamples->updateTotalSamples(); 803 FSamples->updateCallsiteSamples(); 804 } 805 } 806 } 807 808 void CSProfileGenerator::generateLineNumBasedProfile() { 809 for (const auto &CI : *SampleCounters) { 810 const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr()); 811 812 ContextTrieNode *ContextNode = &getRootContext(); 813 // Sample context will be empty if the jump is an external-to-internal call 814 // pattern, the head samples should be added for the internal function. 815 if (!CtxKey->Context.empty()) { 816 // Get or create function profile for the range 817 ContextNode = 818 getOrCreateContextNode(CtxKey->Context, CtxKey->WasLeafInlined); 819 // Fill in function body samples 820 populateBodySamplesForFunction(*ContextNode->getFunctionSamples(), 821 CI.second.RangeCounter); 822 } 823 // Fill in boundary sample counts as well as call site samples for calls 824 populateBoundarySamplesForFunction(ContextNode, CI.second.BranchCounter); 825 } 826 // Fill in call site value sample for inlined calls and also use context to 827 // infer missing samples. Since we don't have call count for inlined 828 // functions, we estimate it from inlinee's profile using the entry of the 829 // body sample. 830 populateInferredFunctionSamples(getRootContext()); 831 832 updateFunctionSamples(); 833 } 834 835 void CSProfileGenerator::populateBodySamplesForFunction( 836 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) { 837 // Compute disjoint ranges first, so we can use MAX 838 // for calculating count for each location. 839 RangeSample Ranges; 840 findDisjointRanges(Ranges, RangeCounter); 841 for (const auto &Range : Ranges) { 842 uint64_t RangeBegin = Range.first.first; 843 uint64_t RangeEnd = Range.first.second; 844 uint64_t Count = Range.second; 845 // Disjoint ranges have introduce zero-filled gap that 846 // doesn't belong to current context, filter them out. 847 if (Count == 0) 848 continue; 849 850 InstructionPointer IP(Binary, RangeBegin, true); 851 // Disjoint ranges may have range in the middle of two instr, 852 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 853 // can be Addr1+1 to Addr2-1. We should ignore such range. 854 if (IP.Address > RangeEnd) 855 continue; 856 857 do { 858 auto LeafLoc = Binary->getInlineLeafFrameLoc(IP.Address); 859 if (LeafLoc) { 860 // Recording body sample for this specific context 861 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); 862 FunctionProfile.addTotalSamples(Count); 863 } 864 } while (IP.advance() && IP.Address <= RangeEnd); 865 } 866 } 867 868 void CSProfileGenerator::populateBoundarySamplesForFunction( 869 ContextTrieNode *Node, const BranchSample &BranchCounters) { 870 871 for (const auto &Entry : BranchCounters) { 872 uint64_t SourceAddress = Entry.first.first; 873 uint64_t TargetAddress = Entry.first.second; 874 uint64_t Count = Entry.second; 875 assert(Count != 0 && "Unexpected zero weight branch"); 876 877 StringRef CalleeName = getCalleeNameForAddress(TargetAddress); 878 if (CalleeName.size() == 0) 879 continue; 880 881 ContextTrieNode *CallerNode = Node; 882 LineLocation CalleeCallSite(0, 0); 883 if (CallerNode != &getRootContext()) { 884 // Record called target sample and its count 885 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceAddress); 886 if (LeafLoc) { 887 CallerNode->getFunctionSamples()->addCalledTargetSamples( 888 LeafLoc->Location.LineOffset, 889 getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName, 890 Count); 891 // Record head sample for called target(callee) 892 CalleeCallSite = LeafLoc->Location; 893 } 894 } 895 896 ContextTrieNode *CalleeNode = 897 CallerNode->getOrCreateChildContext(CalleeCallSite, CalleeName); 898 FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(CalleeNode); 899 CalleeProfile->addHeadSamples(Count); 900 } 901 } 902 903 void CSProfileGenerator::populateInferredFunctionSamples( 904 ContextTrieNode &Node) { 905 // There is no call jmp sample between the inliner and inlinee, we need to use 906 // the inlinee's context to infer inliner's context, i.e. parent(inliner)'s 907 // sample depends on child(inlinee)'s sample, so traverse the tree in 908 // post-order. 909 for (auto &It : Node.getAllChildContext()) 910 populateInferredFunctionSamples(It.second); 911 912 FunctionSamples *CalleeProfile = Node.getFunctionSamples(); 913 if (!CalleeProfile) 914 return; 915 // If we already have head sample counts, we must have value profile 916 // for call sites added already. Skip to avoid double counting. 917 if (CalleeProfile->getHeadSamples()) 918 return; 919 ContextTrieNode *CallerNode = Node.getParentContext(); 920 // If we don't have context, nothing to do for caller's call site. 921 // This could happen for entry point function. 922 if (CallerNode == &getRootContext()) 923 return; 924 925 LineLocation CallerLeafFrameLoc = Node.getCallSiteLoc(); 926 FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode); 927 // Since we don't have call count for inlined functions, we 928 // estimate it from inlinee's profile using entry body sample. 929 uint64_t EstimatedCallCount = CalleeProfile->getHeadSamplesEstimate(); 930 // If we don't have samples with location, use 1 to indicate live. 931 if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size()) 932 EstimatedCallCount = 1; 933 CallerProfile.addCalledTargetSamples(CallerLeafFrameLoc.LineOffset, 934 CallerLeafFrameLoc.Discriminator, 935 Node.getFuncName(), EstimatedCallCount); 936 CallerProfile.addBodySamples(CallerLeafFrameLoc.LineOffset, 937 CallerLeafFrameLoc.Discriminator, 938 EstimatedCallCount); 939 CallerProfile.addTotalSamples(EstimatedCallCount); 940 } 941 942 void CSProfileGenerator::convertToProfileMap( 943 ContextTrieNode &Node, SampleContextFrameVector &Context) { 944 FunctionSamples *FProfile = Node.getFunctionSamples(); 945 if (FProfile) { 946 Context.emplace_back(Node.getFuncName(), LineLocation(0, 0)); 947 // Save the new context for future references. 948 SampleContextFrames NewContext = *Contexts.insert(Context).first; 949 auto Ret = ProfileMap.emplace(NewContext, std::move(*FProfile)); 950 FunctionSamples &NewProfile = Ret.first->second; 951 NewProfile.getContext().setContext(NewContext); 952 Context.pop_back(); 953 } 954 955 for (auto &It : Node.getAllChildContext()) { 956 ContextTrieNode &ChildNode = It.second; 957 Context.emplace_back(Node.getFuncName(), ChildNode.getCallSiteLoc()); 958 convertToProfileMap(ChildNode, Context); 959 Context.pop_back(); 960 } 961 } 962 963 void CSProfileGenerator::convertToProfileMap() { 964 assert(ProfileMap.empty() && 965 "ProfileMap should be empty before converting from the trie"); 966 assert(IsProfileValidOnTrie && 967 "Do not convert the trie twice, it's already destroyed"); 968 969 SampleContextFrameVector Context; 970 for (auto &It : getRootContext().getAllChildContext()) 971 convertToProfileMap(It.second, Context); 972 973 IsProfileValidOnTrie = false; 974 } 975 976 void CSProfileGenerator::postProcessProfiles() { 977 // Compute hot/cold threshold based on profile. This will be used for cold 978 // context profile merging/trimming. 979 computeSummaryAndThreshold(); 980 981 // Run global pre-inliner to adjust/merge context profile based on estimated 982 // inline decisions. 983 if (EnableCSPreInliner) { 984 ContextTracker.populateFuncToCtxtMap(); 985 CSPreInliner(ContextTracker, *Binary, Summary.get()).run(); 986 // Turn off the profile merger by default unless it is explicitly enabled. 987 if (!CSProfMergeColdContext.getNumOccurrences()) 988 CSProfMergeColdContext = false; 989 } 990 991 convertToProfileMap(); 992 993 // Trim and merge cold context profile using cold threshold above. 994 if (TrimColdProfile || CSProfMergeColdContext) { 995 SampleContextTrimmer(ProfileMap) 996 .trimAndMergeColdContextProfiles( 997 HotCountThreshold, TrimColdProfile, CSProfMergeColdContext, 998 CSProfMaxColdContextDepth, EnableCSPreInliner); 999 } 1000 1001 // Merge function samples of CS profile to calculate profile density. 1002 sampleprof::SampleProfileMap ContextLessProfiles; 1003 for (const auto &I : ProfileMap) { 1004 ContextLessProfiles[I.second.getName()].merge(I.second); 1005 } 1006 1007 calculateAndShowDensity(ContextLessProfiles); 1008 if (GenCSNestedProfile) { 1009 CSProfileConverter CSConverter(ProfileMap); 1010 CSConverter.convertProfiles(); 1011 FunctionSamples::ProfileIsCS = false; 1012 } 1013 } 1014 1015 void ProfileGeneratorBase::computeSummaryAndThreshold( 1016 SampleProfileMap &Profiles) { 1017 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 1018 Summary = Builder.computeSummaryForProfiles(Profiles); 1019 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( 1020 (Summary->getDetailedSummary())); 1021 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( 1022 (Summary->getDetailedSummary())); 1023 } 1024 1025 void CSProfileGenerator::computeSummaryAndThreshold() { 1026 // Always merge and use context-less profile map to compute summary. 1027 SampleProfileMap ContextLessProfiles; 1028 ContextTracker.createContextLessProfileMap(ContextLessProfiles); 1029 1030 // Set the flag below to avoid merging the profile again in 1031 // computeSummaryAndThreshold 1032 FunctionSamples::ProfileIsCS = false; 1033 assert( 1034 (!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && 1035 "Don't set --profile-summary-contextless to false for profile " 1036 "generation"); 1037 ProfileGeneratorBase::computeSummaryAndThreshold(ContextLessProfiles); 1038 // Recover the old value. 1039 FunctionSamples::ProfileIsCS = true; 1040 } 1041 1042 void ProfileGeneratorBase::extractProbesFromRange( 1043 const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter, 1044 bool FindDisjointRanges) { 1045 const RangeSample *PRanges = &RangeCounter; 1046 RangeSample Ranges; 1047 if (FindDisjointRanges) { 1048 findDisjointRanges(Ranges, RangeCounter); 1049 PRanges = &Ranges; 1050 } 1051 1052 for (const auto &Range : *PRanges) { 1053 uint64_t RangeBegin = Range.first.first; 1054 uint64_t RangeEnd = Range.first.second; 1055 uint64_t Count = Range.second; 1056 1057 InstructionPointer IP(Binary, RangeBegin, true); 1058 // Disjoint ranges may have range in the middle of two instr, 1059 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 1060 // can be Addr1+1 to Addr2-1. We should ignore such range. 1061 if (IP.Address > RangeEnd) 1062 continue; 1063 1064 do { 1065 const AddressProbesMap &Address2ProbesMap = 1066 Binary->getAddress2ProbesMap(); 1067 auto It = Address2ProbesMap.find(IP.Address); 1068 if (It != Address2ProbesMap.end()) { 1069 for (const auto &Probe : It->second) { 1070 ProbeCounter[&Probe] += Count; 1071 } 1072 } 1073 } while (IP.advance() && IP.Address <= RangeEnd); 1074 } 1075 } 1076 1077 static void extractPrefixContextStack(SampleContextFrameVector &ContextStack, 1078 const SmallVectorImpl<uint64_t> &AddrVec, 1079 ProfiledBinary *Binary) { 1080 SmallVector<const MCDecodedPseudoProbe *, 16> Probes; 1081 for (auto Address : reverse(AddrVec)) { 1082 const MCDecodedPseudoProbe *CallProbe = 1083 Binary->getCallProbeForAddr(Address); 1084 // These could be the cases when a probe is not found at a calliste. Cutting 1085 // off the context from here since the inliner will not know how to consume 1086 // a context with unknown callsites. 1087 // 1. for functions that are not sampled when 1088 // --decode-probe-for-profiled-functions-only is on. 1089 // 2. for a merged callsite. Callsite merging may cause the loss of original 1090 // probe IDs. 1091 // 3. for an external callsite. 1092 if (!CallProbe) 1093 break; 1094 Probes.push_back(CallProbe); 1095 } 1096 1097 std::reverse(Probes.begin(), Probes.end()); 1098 1099 // Extract context stack for reusing, leaf context stack will be added 1100 // compressed while looking up function profile. 1101 for (const auto *P : Probes) { 1102 Binary->getInlineContextForProbe(P, ContextStack, true); 1103 } 1104 } 1105 1106 void CSProfileGenerator::generateProbeBasedProfile() { 1107 // Enable pseudo probe functionalities in SampleProf 1108 FunctionSamples::ProfileIsProbeBased = true; 1109 for (const auto &CI : *SampleCounters) { 1110 const AddrBasedCtxKey *CtxKey = 1111 dyn_cast<AddrBasedCtxKey>(CI.first.getPtr()); 1112 SampleContextFrameVector ContextStack; 1113 extractPrefixContextStack(ContextStack, CtxKey->Context, Binary); 1114 // Fill in function body samples from probes, also infer caller's samples 1115 // from callee's probe 1116 populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack); 1117 // Fill in boundary samples for a call probe 1118 populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack); 1119 } 1120 } 1121 1122 void CSProfileGenerator::populateBodySamplesWithProbes( 1123 const RangeSample &RangeCounter, SampleContextFrames ContextStack) { 1124 ProbeCounterMap ProbeCounter; 1125 // Extract the top frame probes by looking up each address among the range in 1126 // the Address2ProbeMap 1127 extractProbesFromRange(RangeCounter, ProbeCounter); 1128 std::unordered_map<MCDecodedPseudoProbeInlineTree *, 1129 std::unordered_set<FunctionSamples *>> 1130 FrameSamples; 1131 for (const auto &PI : ProbeCounter) { 1132 const MCDecodedPseudoProbe *Probe = PI.first; 1133 uint64_t Count = PI.second; 1134 // Disjoint ranges have introduce zero-filled gap that 1135 // doesn't belong to current context, filter them out. 1136 if (!Probe->isBlock() || Count == 0) 1137 continue; 1138 1139 ContextTrieNode *ContextNode = 1140 getContextNodeForLeafProbe(ContextStack, Probe); 1141 FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples(); 1142 // Record the current frame and FunctionProfile whenever samples are 1143 // collected for non-danglie probes. This is for reporting all of the 1144 // zero count probes of the frame later. 1145 FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile); 1146 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); 1147 FunctionProfile.addTotalSamples(Count); 1148 if (Probe->isEntry()) { 1149 FunctionProfile.addHeadSamples(Count); 1150 // Look up for the caller's function profile 1151 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); 1152 ContextTrieNode *CallerNode = ContextNode->getParentContext(); 1153 if (InlinerDesc != nullptr && CallerNode != &getRootContext()) { 1154 // Since the context id will be compressed, we have to use callee's 1155 // context id to infer caller's context id to ensure they share the 1156 // same context prefix. 1157 uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset; 1158 assert(CallerIndex && 1159 "Inferred caller's location index shouldn't be zero!"); 1160 FunctionSamples &CallerProfile = 1161 *getOrCreateFunctionSamples(CallerNode); 1162 CallerProfile.setFunctionHash(InlinerDesc->FuncHash); 1163 CallerProfile.addBodySamples(CallerIndex, 0, Count); 1164 CallerProfile.addTotalSamples(Count); 1165 CallerProfile.addCalledTargetSamples(CallerIndex, 0, 1166 ContextNode->getFuncName(), Count); 1167 } 1168 } 1169 } 1170 1171 // Assign zero count for remaining probes without sample hits to 1172 // differentiate from probes optimized away, of which the counts are unknown 1173 // and will be inferred by the compiler. 1174 for (auto &I : FrameSamples) { 1175 for (auto *FunctionProfile : I.second) { 1176 for (auto *Probe : I.first->getProbes()) { 1177 FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0); 1178 } 1179 } 1180 } 1181 } 1182 1183 void CSProfileGenerator::populateBoundarySamplesWithProbes( 1184 const BranchSample &BranchCounter, SampleContextFrames ContextStack) { 1185 for (const auto &BI : BranchCounter) { 1186 uint64_t SourceAddress = BI.first.first; 1187 uint64_t TargetAddress = BI.first.second; 1188 uint64_t Count = BI.second; 1189 const MCDecodedPseudoProbe *CallProbe = 1190 Binary->getCallProbeForAddr(SourceAddress); 1191 if (CallProbe == nullptr) 1192 continue; 1193 FunctionSamples &FunctionProfile = 1194 getFunctionProfileForLeafProbe(ContextStack, CallProbe); 1195 FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count); 1196 FunctionProfile.addTotalSamples(Count); 1197 StringRef CalleeName = getCalleeNameForAddress(TargetAddress); 1198 if (CalleeName.size() == 0) 1199 continue; 1200 FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName, 1201 Count); 1202 } 1203 } 1204 1205 ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe( 1206 SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { 1207 1208 // Explicitly copy the context for appending the leaf context 1209 SampleContextFrameVector NewContextStack(ContextStack.begin(), 1210 ContextStack.end()); 1211 Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true); 1212 // For leaf inlined context with the top frame, we should strip off the top 1213 // frame's probe id, like: 1214 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" 1215 auto LeafFrame = NewContextStack.back(); 1216 LeafFrame.Location = LineLocation(0, 0); 1217 NewContextStack.pop_back(); 1218 // Compress the context string except for the leaf frame 1219 CSProfileGenerator::compressRecursionContext(NewContextStack); 1220 CSProfileGenerator::trimContext(NewContextStack); 1221 NewContextStack.push_back(LeafFrame); 1222 1223 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid()); 1224 bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); 1225 ContextTrieNode *ContextNode = 1226 getOrCreateContextNode(NewContextStack, WasLeafInlined); 1227 ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash); 1228 return ContextNode; 1229 } 1230 1231 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( 1232 SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { 1233 return *getContextNodeForLeafProbe(ContextStack, LeafProbe) 1234 ->getFunctionSamples(); 1235 } 1236 1237 } // end namespace sampleprof 1238 } // end namespace llvm 1239