1 //===- bolt/Profile/StaleProfileMatching.cpp - Profile data matching ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // BOLT often has to deal with profiles collected on binaries built from several 10 // revisions behind release. As a result, a certain percentage of functions is 11 // considered stale and not optimized. This file implements an ability to match 12 // profile to functions that are not 100% binary identical, and thus, increasing 13 // the optimization coverage and boost the performance of applications. 14 // 15 // The algorithm consists of two phases: matching and inference: 16 // - At the matching phase, we try to "guess" as many block and jump counts from 17 // the stale profile as possible. To this end, the content of each basic block 18 // is hashed and stored in the (yaml) profile. When BOLT optimizes a binary, 19 // it computes block hashes and identifies the corresponding entries in the 20 // stale profile. It yields a partial profile for every CFG in the binary. 21 // - At the inference phase, we employ a network flow-based algorithm (profi) to 22 // reconstruct "realistic" block and jump counts from the partial profile 23 // generated at the first stage. In practice, we don't always produce proper 24 // profile data but the majority (e.g., >90%) of CFGs get the correct counts. 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "bolt/Core/HashUtilities.h" 29 #include "bolt/Profile/YAMLProfileReader.h" 30 #include "llvm/ADT/Bitfields.h" 31 #include "llvm/ADT/Hashing.h" 32 #include "llvm/MC/MCPseudoProbe.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Timer.h" 35 #include "llvm/Support/xxhash.h" 36 #include "llvm/Transforms/Utils/SampleProfileInference.h" 37 38 #include <queue> 39 40 using namespace llvm; 41 42 #undef DEBUG_TYPE 43 #define DEBUG_TYPE "bolt-prof" 44 45 namespace opts { 46 47 extern cl::opt<bool> TimeRewrite; 48 extern cl::OptionCategory BoltOptCategory; 49 50 cl::opt<bool> 51 InferStaleProfile("infer-stale-profile", 52 cl::desc("Infer counts from stale profile data."), 53 cl::init(false), cl::Hidden, cl::cat(BoltOptCategory)); 54 55 cl::opt<unsigned> StaleMatchingMinMatchedBlock( 56 "stale-matching-min-matched-block", 57 cl::desc("Percentage threshold of matched basic blocks at which stale " 58 "profile inference is executed."), 59 cl::init(0), cl::Hidden, cl::cat(BoltOptCategory)); 60 61 cl::opt<unsigned> StaleMatchingMaxFuncSize( 62 "stale-matching-max-func-size", 63 cl::desc("The maximum size of a function to consider for inference."), 64 cl::init(10000), cl::Hidden, cl::cat(BoltOptCategory)); 65 66 // Parameters of the profile inference algorithm. The default values are tuned 67 // on several benchmarks. 68 cl::opt<bool> StaleMatchingEvenFlowDistribution( 69 "stale-matching-even-flow-distribution", 70 cl::desc("Try to evenly distribute flow when there are multiple equally " 71 "likely options."), 72 cl::init(true), cl::ReallyHidden, cl::cat(BoltOptCategory)); 73 74 cl::opt<bool> StaleMatchingRebalanceUnknown( 75 "stale-matching-rebalance-unknown", 76 cl::desc("Evenly re-distribute flow among unknown subgraphs."), 77 cl::init(false), cl::ReallyHidden, cl::cat(BoltOptCategory)); 78 79 cl::opt<bool> StaleMatchingJoinIslands( 80 "stale-matching-join-islands", 81 cl::desc("Join isolated components having positive flow."), cl::init(true), 82 cl::ReallyHidden, cl::cat(BoltOptCategory)); 83 84 cl::opt<unsigned> StaleMatchingCostBlockInc( 85 "stale-matching-cost-block-inc", 86 cl::desc("The cost of increasing a block count by one."), cl::init(150), 87 cl::ReallyHidden, cl::cat(BoltOptCategory)); 88 89 cl::opt<unsigned> StaleMatchingCostBlockDec( 90 "stale-matching-cost-block-dec", 91 cl::desc("The cost of decreasing a block count by one."), cl::init(150), 92 cl::ReallyHidden, cl::cat(BoltOptCategory)); 93 94 cl::opt<unsigned> StaleMatchingCostJumpInc( 95 "stale-matching-cost-jump-inc", 96 cl::desc("The cost of increasing a jump count by one."), cl::init(150), 97 cl::ReallyHidden, cl::cat(BoltOptCategory)); 98 99 cl::opt<unsigned> StaleMatchingCostJumpDec( 100 "stale-matching-cost-jump-dec", 101 cl::desc("The cost of decreasing a jump count by one."), cl::init(150), 102 cl::ReallyHidden, cl::cat(BoltOptCategory)); 103 104 cl::opt<unsigned> StaleMatchingCostBlockUnknownInc( 105 "stale-matching-cost-block-unknown-inc", 106 cl::desc("The cost of increasing an unknown block count by one."), 107 cl::init(1), cl::ReallyHidden, cl::cat(BoltOptCategory)); 108 109 cl::opt<unsigned> StaleMatchingCostJumpUnknownInc( 110 "stale-matching-cost-jump-unknown-inc", 111 cl::desc("The cost of increasing an unknown jump count by one."), 112 cl::init(140), cl::ReallyHidden, cl::cat(BoltOptCategory)); 113 114 cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc( 115 "stale-matching-cost-jump-unknown-ft-inc", 116 cl::desc( 117 "The cost of increasing an unknown fall-through jump count by one."), 118 cl::init(3), cl::ReallyHidden, cl::cat(BoltOptCategory)); 119 120 cl::opt<bool> StaleMatchingWithPseudoProbes( 121 "stale-matching-with-pseudo-probes", 122 cl::desc("Turns on stale matching with block pseudo probes."), 123 cl::init(false), cl::ReallyHidden, cl::cat(BoltOptCategory)); 124 125 } // namespace opts 126 127 namespace llvm { 128 namespace bolt { 129 130 /// An object wrapping several components of a basic block hash. The combined 131 /// (blended) hash is represented and stored as one uint64_t, while individual 132 /// components are of smaller size (e.g., uint16_t or uint8_t). 133 struct BlendedBlockHash { 134 private: 135 using ValueOffset = Bitfield::Element<uint16_t, 0, 16>; 136 using ValueOpcode = Bitfield::Element<uint16_t, 16, 16>; 137 using ValueInstr = Bitfield::Element<uint16_t, 32, 16>; 138 using ValuePred = Bitfield::Element<uint8_t, 48, 8>; 139 using ValueSucc = Bitfield::Element<uint8_t, 56, 8>; 140 141 public: 142 explicit BlendedBlockHash() {} 143 144 explicit BlendedBlockHash(uint64_t Hash) { 145 Offset = Bitfield::get<ValueOffset>(Hash); 146 OpcodeHash = Bitfield::get<ValueOpcode>(Hash); 147 InstrHash = Bitfield::get<ValueInstr>(Hash); 148 PredHash = Bitfield::get<ValuePred>(Hash); 149 SuccHash = Bitfield::get<ValueSucc>(Hash); 150 } 151 152 /// Combine the blended hash into uint64_t. 153 uint64_t combine() const { 154 uint64_t Hash = 0; 155 Bitfield::set<ValueOffset>(Hash, Offset); 156 Bitfield::set<ValueOpcode>(Hash, OpcodeHash); 157 Bitfield::set<ValueInstr>(Hash, InstrHash); 158 Bitfield::set<ValuePred>(Hash, PredHash); 159 Bitfield::set<ValueSucc>(Hash, SuccHash); 160 return Hash; 161 } 162 163 /// Compute a distance between two given blended hashes. The smaller the 164 /// distance, the more similar two blocks are. For identical basic blocks, 165 /// the distance is zero. 166 uint64_t distance(const BlendedBlockHash &BBH) const { 167 assert(OpcodeHash == BBH.OpcodeHash && 168 "incorrect blended hash distance computation"); 169 uint64_t Dist = 0; 170 // Account for NeighborHash 171 Dist += SuccHash == BBH.SuccHash ? 0 : 1; 172 Dist += PredHash == BBH.PredHash ? 0 : 1; 173 Dist <<= 16; 174 // Account for InstrHash 175 Dist += InstrHash == BBH.InstrHash ? 0 : 1; 176 Dist <<= 16; 177 // Account for Offset 178 Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset); 179 return Dist; 180 } 181 182 /// The offset of the basic block from the function start. 183 uint16_t Offset{0}; 184 /// (Loose) Hash of the basic block instructions, excluding operands. 185 uint16_t OpcodeHash{0}; 186 /// (Strong) Hash of the basic block instructions, including opcodes and 187 /// operands. 188 uint16_t InstrHash{0}; 189 /// (Loose) Hashes of the predecessors of the basic block. 190 uint8_t PredHash{0}; 191 /// (Loose) Hashes of the successors of the basic block. 192 uint8_t SuccHash{0}; 193 }; 194 195 /// The object is used to identify and match basic blocks in a BinaryFunction 196 /// given their hashes computed on a binary built from several revisions behind 197 /// release. 198 class StaleMatcher { 199 public: 200 /// Initialize stale matcher. 201 void init(const std::vector<FlowBlock *> &Blocks, 202 const std::vector<BlendedBlockHash> &Hashes, 203 const std::vector<uint64_t> &CallHashes) { 204 assert(Blocks.size() == Hashes.size() && 205 Hashes.size() == CallHashes.size() && 206 "incorrect matcher initialization"); 207 for (size_t I = 0; I < Blocks.size(); I++) { 208 FlowBlock *Block = Blocks[I]; 209 uint16_t OpHash = Hashes[I].OpcodeHash; 210 OpHashToBlocks[OpHash].push_back(std::make_pair(Hashes[I], Block)); 211 if (CallHashes[I]) 212 CallHashToBlocks[CallHashes[I]].push_back( 213 std::make_pair(Hashes[I], Block)); 214 } 215 } 216 217 /// Creates a mapping from a pseudo probe to a flow block. 218 void mapProbeToBB(const MCDecodedPseudoProbe *Probe, FlowBlock *Block) { 219 BBPseudoProbeToBlock[Probe] = Block; 220 } 221 222 enum MatchMethod : char { 223 MATCH_EXACT = 0, 224 MATCH_PROBE_EXACT, 225 MATCH_PROBE_LOOSE, 226 MATCH_OPCODE, 227 MATCH_CALL, 228 NO_MATCH 229 }; 230 231 /// Find the most similar flow block for a profile block given blended hash. 232 std::pair<const FlowBlock *, MatchMethod> 233 matchBlockStrict(BlendedBlockHash BlendedHash) { 234 const auto &[Block, ExactHash] = matchWithOpcodes(BlendedHash); 235 if (Block && ExactHash) 236 return {Block, MATCH_EXACT}; 237 return {nullptr, NO_MATCH}; 238 } 239 240 /// Find the most similar flow block for a profile block given pseudo probes. 241 std::pair<const FlowBlock *, MatchMethod> matchBlockProbe( 242 const ArrayRef<yaml::bolt::PseudoProbeInfo> PseudoProbes, 243 const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) { 244 const auto &[ProbeBlock, ExactProbe] = 245 matchWithPseudoProbes(PseudoProbes, InlineTreeNodeMap); 246 if (ProbeBlock) 247 return {ProbeBlock, ExactProbe ? MATCH_PROBE_EXACT : MATCH_PROBE_LOOSE}; 248 return {nullptr, NO_MATCH}; 249 } 250 251 /// Find the most similar flow block for a profile block given its hashes. 252 std::pair<const FlowBlock *, MatchMethod> 253 matchBlockLoose(BlendedBlockHash BlendedHash, uint64_t CallHash) { 254 if (const FlowBlock *CallBlock = matchWithCalls(BlendedHash, CallHash)) 255 return {CallBlock, MATCH_CALL}; 256 if (const FlowBlock *OpcodeBlock = matchWithOpcodes(BlendedHash).first) 257 return {OpcodeBlock, MATCH_OPCODE}; 258 return {nullptr, NO_MATCH}; 259 } 260 261 /// Returns true if the two basic blocks (in the binary and in the profile) 262 /// corresponding to the given hashes are matched to each other with a high 263 /// confidence. 264 static bool isHighConfidenceMatch(BlendedBlockHash Hash1, 265 BlendedBlockHash Hash2) { 266 return Hash1.InstrHash == Hash2.InstrHash; 267 } 268 269 private: 270 using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>; 271 std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks; 272 std::unordered_map<uint64_t, std::vector<HashBlockPairType>> CallHashToBlocks; 273 DenseMap<const MCDecodedPseudoProbe *, FlowBlock *> BBPseudoProbeToBlock; 274 275 // Uses OpcodeHash to find the most similar block for a given hash. 276 std::pair<const FlowBlock *, bool> 277 matchWithOpcodes(BlendedBlockHash BlendedHash) const { 278 auto BlockIt = OpHashToBlocks.find(BlendedHash.OpcodeHash); 279 if (BlockIt == OpHashToBlocks.end()) 280 return {nullptr, false}; 281 FlowBlock *BestBlock = nullptr; 282 uint64_t BestDist = std::numeric_limits<uint64_t>::max(); 283 BlendedBlockHash BestHash; 284 for (const auto &[Hash, Block] : BlockIt->second) { 285 uint64_t Dist = Hash.distance(BlendedHash); 286 if (BestBlock == nullptr || Dist < BestDist) { 287 BestDist = Dist; 288 BestBlock = Block; 289 BestHash = Hash; 290 } 291 } 292 return {BestBlock, isHighConfidenceMatch(BestHash, BlendedHash)}; 293 } 294 295 // Uses CallHash to find the most similar block for a given hash. 296 const FlowBlock *matchWithCalls(BlendedBlockHash BlendedHash, 297 uint64_t CallHash) const { 298 if (!CallHash) 299 return nullptr; 300 auto BlockIt = CallHashToBlocks.find(CallHash); 301 if (BlockIt == CallHashToBlocks.end()) 302 return nullptr; 303 FlowBlock *BestBlock = nullptr; 304 uint64_t BestDist = std::numeric_limits<uint64_t>::max(); 305 for (const auto &[Hash, Block] : BlockIt->second) { 306 uint64_t Dist = Hash.OpcodeHash > BlendedHash.OpcodeHash 307 ? Hash.OpcodeHash - BlendedHash.OpcodeHash 308 : BlendedHash.OpcodeHash - Hash.OpcodeHash; 309 if (BestBlock == nullptr || Dist < BestDist) { 310 BestDist = Dist; 311 BestBlock = Block; 312 } 313 } 314 return BestBlock; 315 } 316 317 /// Matches a profile block with a binary block based on pseudo probes. 318 /// Returns the best matching block (or nullptr) and whether the match is 319 /// unambiguous. 320 std::pair<const FlowBlock *, bool> matchWithPseudoProbes( 321 const ArrayRef<yaml::bolt::PseudoProbeInfo> BlockPseudoProbes, 322 const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) const { 323 324 if (!opts::StaleMatchingWithPseudoProbes) 325 return {nullptr, false}; 326 327 DenseMap<const FlowBlock *, uint32_t> FlowBlockMatchCount; 328 329 auto matchProfileProbeToBlock = [&](uint32_t NodeId, 330 uint64_t ProbeId) -> const FlowBlock * { 331 const MCDecodedPseudoProbeInlineTree *BinaryNode = 332 InlineTreeNodeMap.getInlineTreeNode(NodeId); 333 if (!BinaryNode) 334 return nullptr; 335 const MCDecodedPseudoProbe Dummy(0, ProbeId, PseudoProbeType::Block, 0, 0, 336 nullptr); 337 ArrayRef<MCDecodedPseudoProbe> BinaryProbes = BinaryNode->getProbes(); 338 auto BinaryProbeIt = llvm::lower_bound( 339 BinaryProbes, Dummy, [](const auto &LHS, const auto &RHS) { 340 return LHS.getIndex() < RHS.getIndex(); 341 }); 342 if (BinaryProbeIt == BinaryNode->getProbes().end() || 343 BinaryProbeIt->getIndex() != ProbeId) 344 return nullptr; 345 auto It = BBPseudoProbeToBlock.find(&*BinaryProbeIt); 346 if (It == BBPseudoProbeToBlock.end()) 347 return nullptr; 348 return It->second; 349 }; 350 351 auto matchPseudoProbeInfo = [&](const yaml::bolt::PseudoProbeInfo 352 &ProfileProbe, 353 uint32_t NodeId) { 354 for (uint64_t Index = 0; Index < 64; ++Index) 355 if (ProfileProbe.BlockMask & 1ull << Index) 356 ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, Index + 1)]; 357 for (const auto &ProfileProbes : 358 {ProfileProbe.BlockProbes, ProfileProbe.IndCallProbes, 359 ProfileProbe.CallProbes}) 360 for (uint64_t ProfileProbe : ProfileProbes) 361 ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, ProfileProbe)]; 362 }; 363 364 for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes) { 365 if (!ProfileProbe.InlineTreeNodes.empty()) 366 for (uint32_t ProfileInlineTreeNode : ProfileProbe.InlineTreeNodes) 367 matchPseudoProbeInfo(ProfileProbe, ProfileInlineTreeNode); 368 else 369 matchPseudoProbeInfo(ProfileProbe, ProfileProbe.InlineTreeIndex); 370 } 371 uint32_t BestMatchCount = 0; 372 uint32_t TotalMatchCount = 0; 373 const FlowBlock *BestMatchBlock = nullptr; 374 for (const auto &[FlowBlock, Count] : FlowBlockMatchCount) { 375 TotalMatchCount += Count; 376 if (Count < BestMatchCount || (Count == BestMatchCount && BestMatchBlock)) 377 continue; 378 BestMatchBlock = FlowBlock; 379 BestMatchCount = Count; 380 } 381 return {BestMatchBlock, BestMatchCount == TotalMatchCount}; 382 } 383 }; 384 385 void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const { 386 if (size() == 0) 387 return; 388 389 assert(hasCFG() && "the function is expected to have CFG"); 390 391 std::vector<BlendedBlockHash> BlendedHashes(BasicBlocks.size()); 392 std::vector<uint64_t> OpcodeHashes(BasicBlocks.size()); 393 // Initialize hash components. 394 for (size_t I = 0; I < BasicBlocks.size(); I++) { 395 const BinaryBasicBlock *BB = BasicBlocks[I]; 396 assert(BB->getIndex() == I && "incorrect block index"); 397 BlendedHashes[I].Offset = BB->getOffset(); 398 // Hashing complete instructions. 399 std::string InstrHashStr = hashBlock( 400 BC, *BB, [&](const MCOperand &Op) { return hashInstOperand(BC, Op); }); 401 if (HashFunction == HashFunction::StdHash) { 402 uint64_t InstrHash = std::hash<std::string>{}(InstrHashStr); 403 BlendedHashes[I].InstrHash = (uint16_t)hash_value(InstrHash); 404 } else if (HashFunction == HashFunction::XXH3) { 405 uint64_t InstrHash = llvm::xxh3_64bits(InstrHashStr); 406 BlendedHashes[I].InstrHash = (uint16_t)InstrHash; 407 } else { 408 llvm_unreachable("Unhandled HashFunction"); 409 } 410 // Hashing opcodes. 411 std::string OpcodeHashStr = hashBlockLoose(BC, *BB); 412 if (HashFunction == HashFunction::StdHash) { 413 OpcodeHashes[I] = std::hash<std::string>{}(OpcodeHashStr); 414 BlendedHashes[I].OpcodeHash = (uint16_t)hash_value(OpcodeHashes[I]); 415 } else if (HashFunction == HashFunction::XXH3) { 416 OpcodeHashes[I] = llvm::xxh3_64bits(OpcodeHashStr); 417 BlendedHashes[I].OpcodeHash = (uint16_t)OpcodeHashes[I]; 418 } else { 419 llvm_unreachable("Unhandled HashFunction"); 420 } 421 } 422 423 // Initialize neighbor hash. 424 for (size_t I = 0; I < BasicBlocks.size(); I++) { 425 const BinaryBasicBlock *BB = BasicBlocks[I]; 426 // Append hashes of successors. 427 uint64_t Hash = 0; 428 for (BinaryBasicBlock *SuccBB : BB->successors()) { 429 uint64_t SuccHash = OpcodeHashes[SuccBB->getIndex()]; 430 Hash = hashing::detail::hash_16_bytes(Hash, SuccHash); 431 } 432 if (HashFunction == HashFunction::StdHash) { 433 // Compatibility with old behavior. 434 BlendedHashes[I].SuccHash = (uint8_t)hash_value(Hash); 435 } else { 436 BlendedHashes[I].SuccHash = (uint8_t)Hash; 437 } 438 439 // Append hashes of predecessors. 440 Hash = 0; 441 for (BinaryBasicBlock *PredBB : BB->predecessors()) { 442 uint64_t PredHash = OpcodeHashes[PredBB->getIndex()]; 443 Hash = hashing::detail::hash_16_bytes(Hash, PredHash); 444 } 445 if (HashFunction == HashFunction::StdHash) { 446 // Compatibility with old behavior. 447 BlendedHashes[I].PredHash = (uint8_t)hash_value(Hash); 448 } else { 449 BlendedHashes[I].PredHash = (uint8_t)Hash; 450 } 451 } 452 453 // Assign hashes. 454 for (size_t I = 0; I < BasicBlocks.size(); I++) { 455 const BinaryBasicBlock *BB = BasicBlocks[I]; 456 BB->setHash(BlendedHashes[I].combine()); 457 } 458 } 459 // TODO: mediate the difference between flow function construction here in BOLT 460 // and in the compiler by splitting blocks with exception throwing calls at the 461 // call and adding the landing pad as the successor. 462 /// Create a wrapper flow function to use with the profile inference algorithm, 463 /// and initialize its jumps and metadata. 464 FlowFunction 465 createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) { 466 FlowFunction Func; 467 468 // Add a special "dummy" source so that there is always a unique entry point. 469 FlowBlock EntryBlock; 470 EntryBlock.Index = 0; 471 Func.Blocks.push_back(EntryBlock); 472 473 // Create FlowBlock for every basic block in the binary function. 474 for (const BinaryBasicBlock *BB : BlockOrder) { 475 Func.Blocks.emplace_back(); 476 FlowBlock &Block = Func.Blocks.back(); 477 Block.Index = Func.Blocks.size() - 1; 478 (void)BB; 479 assert(Block.Index == BB->getIndex() + 1 && 480 "incorrectly assigned basic block index"); 481 } 482 483 // Add a special "dummy" sink block so there is always a unique sink. 484 FlowBlock SinkBlock; 485 SinkBlock.Index = Func.Blocks.size(); 486 Func.Blocks.push_back(SinkBlock); 487 488 // Create FlowJump for each jump between basic blocks in the binary function. 489 std::vector<uint64_t> InDegree(Func.Blocks.size(), 0); 490 for (const BinaryBasicBlock *SrcBB : BlockOrder) { 491 std::unordered_set<const BinaryBasicBlock *> UniqueSuccs; 492 // Collect regular jumps 493 for (const BinaryBasicBlock *DstBB : SrcBB->successors()) { 494 // Ignoring parallel edges 495 if (UniqueSuccs.find(DstBB) != UniqueSuccs.end()) 496 continue; 497 498 Func.Jumps.emplace_back(); 499 FlowJump &Jump = Func.Jumps.back(); 500 Jump.Source = SrcBB->getIndex() + 1; 501 Jump.Target = DstBB->getIndex() + 1; 502 InDegree[Jump.Target]++; 503 UniqueSuccs.insert(DstBB); 504 } 505 // TODO: set jump from exit block to landing pad to Unlikely. 506 // If the block is an exit, add a dummy edge from it to the sink block. 507 if (UniqueSuccs.empty()) { 508 Func.Jumps.emplace_back(); 509 FlowJump &Jump = Func.Jumps.back(); 510 Jump.Source = SrcBB->getIndex() + 1; 511 Jump.Target = Func.Blocks.size() - 1; 512 InDegree[Jump.Target]++; 513 } 514 515 // Collect jumps to landing pads 516 for (const BinaryBasicBlock *DstBB : SrcBB->landing_pads()) { 517 // Ignoring parallel edges 518 if (UniqueSuccs.find(DstBB) != UniqueSuccs.end()) 519 continue; 520 521 Func.Jumps.emplace_back(); 522 FlowJump &Jump = Func.Jumps.back(); 523 Jump.Source = SrcBB->getIndex() + 1; 524 Jump.Target = DstBB->getIndex() + 1; 525 InDegree[Jump.Target]++; 526 UniqueSuccs.insert(DstBB); 527 } 528 } 529 530 // Add dummy edges to the extra sources. If there are multiple entry blocks, 531 // add an unlikely edge from 0 to the subsequent ones. Skips the sink block. 532 assert(InDegree[0] == 0 && "dummy entry blocks shouldn't have predecessors"); 533 for (uint64_t I = 1; I < Func.Blocks.size() - 1; I++) { 534 const BinaryBasicBlock *BB = BlockOrder[I - 1]; 535 if (BB->isEntryPoint() || InDegree[I] == 0) { 536 Func.Jumps.emplace_back(); 537 FlowJump &Jump = Func.Jumps.back(); 538 Jump.Source = 0; 539 Jump.Target = I; 540 if (!BB->isEntryPoint()) 541 Jump.IsUnlikely = true; 542 } 543 } 544 545 // Create necessary metadata for the flow function 546 for (FlowJump &Jump : Func.Jumps) { 547 assert(Jump.Source < Func.Blocks.size()); 548 Func.Blocks[Jump.Source].SuccJumps.push_back(&Jump); 549 assert(Jump.Target < Func.Blocks.size()); 550 Func.Blocks[Jump.Target].PredJumps.push_back(&Jump); 551 } 552 return Func; 553 } 554 555 /// Assign initial block/jump weights based on the stale profile data. The goal 556 /// is to extract as much information from the stale profile as possible. Here 557 /// we assume that each basic block is specified via a hash value computed from 558 /// its content and the hashes of the unchanged basic blocks stay the same 559 /// across different revisions of the binary. Blocks may also have pseudo probe 560 /// information in the profile and the binary which is used for matching. 561 /// Whenever there is a count in the profile with the hash corresponding to one 562 /// of the basic blocks in the binary, the count is "matched" to the block. 563 /// Similarly, if both the source and the target of a count in the profile are 564 /// matched to a jump in the binary, the count is recorded in CFG. 565 size_t matchWeights( 566 BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder, 567 const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func, 568 HashFunction HashFunction, YAMLProfileReader::ProfileLookupMap &IdToYamlBF, 569 const BinaryFunction &BF, 570 const ArrayRef<YAMLProfileReader::ProbeMatchSpec> ProbeMatchSpecs) { 571 572 assert(Func.Blocks.size() == BlockOrder.size() + 2); 573 574 std::vector<uint64_t> CallHashes; 575 std::vector<FlowBlock *> Blocks; 576 std::vector<BlendedBlockHash> BlendedHashes; 577 for (uint64_t I = 0; I < BlockOrder.size(); I++) { 578 const BinaryBasicBlock *BB = BlockOrder[I]; 579 assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock"); 580 581 std::string CallHashStr = hashBlockCalls(BC, *BB); 582 if (CallHashStr.empty()) { 583 CallHashes.push_back(0); 584 } else { 585 if (HashFunction == HashFunction::StdHash) 586 CallHashes.push_back(std::hash<std::string>{}(CallHashStr)); 587 else if (HashFunction == HashFunction::XXH3) 588 CallHashes.push_back(llvm::xxh3_64bits(CallHashStr)); 589 else 590 llvm_unreachable("Unhandled HashFunction"); 591 } 592 593 Blocks.push_back(&Func.Blocks[I + 1]); 594 BlendedBlockHash BlendedHash(BB->getHash()); 595 BlendedHashes.push_back(BlendedHash); 596 LLVM_DEBUG(dbgs() << "BB with index " << I << " has hash = " 597 << Twine::utohexstr(BB->getHash()) << "\n"); 598 } 599 StaleMatcher Matcher; 600 // Collects function pseudo probes for use in the StaleMatcher. 601 if (opts::StaleMatchingWithPseudoProbes) { 602 const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); 603 assert(Decoder && 604 "If pseudo probes are in use, pseudo probe decoder should exist"); 605 const AddressProbesMap &ProbeMap = Decoder->getAddress2ProbesMap(); 606 const uint64_t FuncAddr = BF.getAddress(); 607 for (const MCDecodedPseudoProbe &Probe : 608 ProbeMap.find(FuncAddr, FuncAddr + BF.getSize())) 609 if (const BinaryBasicBlock *BB = 610 BF.getBasicBlockContainingOffset(Probe.getAddress() - FuncAddr)) 611 Matcher.mapProbeToBB(&Probe, Blocks[BB->getIndex()]); 612 } 613 Matcher.init(Blocks, BlendedHashes, CallHashes); 614 615 using FlowBlockTy = 616 std::pair<const FlowBlock *, const yaml::bolt::BinaryBasicBlockProfile *>; 617 using ProfileBlockMatchMap = DenseMap<uint32_t, FlowBlockTy>; 618 // Binary profile => block index => matched block + its block profile 619 DenseMap<const yaml::bolt::BinaryFunctionProfile *, ProfileBlockMatchMap> 620 MatchedBlocks; 621 622 // Map of FlowBlock and matching method. 623 DenseMap<const FlowBlock *, StaleMatcher::MatchMethod> MatchedFlowBlocks; 624 625 auto addMatchedBlock = 626 [&](std::pair<const FlowBlock *, StaleMatcher::MatchMethod> BlockMethod, 627 const yaml::bolt::BinaryFunctionProfile &YamlBP, 628 const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { 629 const auto &[MatchedBlock, Method] = BlockMethod; 630 if (!MatchedBlock) 631 return; 632 // Don't override earlier matches 633 if (MatchedFlowBlocks.contains(MatchedBlock)) 634 return; 635 MatchedFlowBlocks.try_emplace(MatchedBlock, Method); 636 MatchedBlocks[&YamlBP][YamlBB.Index] = {MatchedBlock, &YamlBB}; 637 }; 638 639 // Match blocks from the profile to the blocks in CFG by strict hash. 640 for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { 641 // Update matching stats. 642 ++BC.Stats.NumStaleBlocks; 643 BC.Stats.StaleSampleCount += YamlBB.ExecCount; 644 645 assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile"); 646 BlendedBlockHash YamlHash(YamlBB.Hash); 647 addMatchedBlock(Matcher.matchBlockStrict(YamlHash), YamlBF, YamlBB); 648 } 649 // Match blocks from the profile to the blocks in CFG by pseudo probes. 650 for (const auto &[InlineNodeMap, YamlBP] : ProbeMatchSpecs) { 651 for (const yaml::bolt::BinaryBasicBlockProfile &BB : YamlBP.get().Blocks) 652 if (!BB.PseudoProbes.empty()) 653 addMatchedBlock(Matcher.matchBlockProbe(BB.PseudoProbes, InlineNodeMap), 654 YamlBP, BB); 655 } 656 // Match blocks from the profile to the blocks in CFG with loose methods. 657 for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { 658 assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile"); 659 BlendedBlockHash YamlHash(YamlBB.Hash); 660 661 std::string CallHashStr = hashBlockCalls(IdToYamlBF, YamlBB); 662 uint64_t CallHash = 0; 663 if (!CallHashStr.empty()) { 664 if (HashFunction == HashFunction::StdHash) 665 CallHash = std::hash<std::string>{}(CallHashStr); 666 else if (HashFunction == HashFunction::XXH3) 667 CallHash = llvm::xxh3_64bits(CallHashStr); 668 else 669 llvm_unreachable("Unhandled HashFunction"); 670 } 671 auto [MatchedBlock, Method] = Matcher.matchBlockLoose(YamlHash, CallHash); 672 if (MatchedBlock == nullptr && YamlBB.Index == 0) { 673 MatchedBlock = Blocks[0]; 674 // Report as loose match 675 Method = StaleMatcher::MATCH_OPCODE; 676 } 677 if (!MatchedBlock) { 678 LLVM_DEBUG(dbgs() << "Couldn't match yaml block (bid = " << YamlBB.Index 679 << ")" << " with hash " << Twine::utohexstr(YamlBB.Hash) 680 << "\n"); 681 continue; 682 } 683 addMatchedBlock({MatchedBlock, Method}, YamlBF, YamlBB); 684 } 685 686 // Match jumps from the profile to the jumps from CFG 687 std::vector<uint64_t> OutWeight(Func.Blocks.size(), 0); 688 std::vector<uint64_t> InWeight(Func.Blocks.size(), 0); 689 690 for (const auto &[YamlBF, MatchMap] : MatchedBlocks) { 691 for (const auto &[YamlBBIdx, FlowBlockProfile] : MatchMap) { 692 const auto &[MatchedBlock, YamlBB] = FlowBlockProfile; 693 StaleMatcher::MatchMethod Method = MatchedFlowBlocks.lookup(MatchedBlock); 694 BlendedBlockHash BinHash = BlendedHashes[MatchedBlock->Index - 1]; 695 LLVM_DEBUG(dbgs() << "Matched yaml block (bid = " << YamlBBIdx << ")" 696 << " with hash " << Twine::utohexstr(YamlBB->Hash) 697 << " to BB (index = " << MatchedBlock->Index - 1 << ")" 698 << " with hash " << Twine::utohexstr(BinHash.combine()) 699 << "\n"); 700 (void)BinHash; 701 uint64_t ExecCount = YamlBB->ExecCount; 702 // Update matching stats accounting for the matched block. 703 switch (Method) { 704 case StaleMatcher::MATCH_EXACT: 705 ++BC.Stats.NumExactMatchedBlocks; 706 BC.Stats.ExactMatchedSampleCount += ExecCount; 707 LLVM_DEBUG(dbgs() << " exact match\n"); 708 break; 709 case StaleMatcher::MATCH_PROBE_EXACT: 710 ++BC.Stats.NumPseudoProbeExactMatchedBlocks; 711 BC.Stats.PseudoProbeExactMatchedSampleCount += ExecCount; 712 LLVM_DEBUG(dbgs() << " exact pseudo probe match\n"); 713 break; 714 case StaleMatcher::MATCH_PROBE_LOOSE: 715 ++BC.Stats.NumPseudoProbeLooseMatchedBlocks; 716 BC.Stats.PseudoProbeLooseMatchedSampleCount += ExecCount; 717 LLVM_DEBUG(dbgs() << " loose pseudo probe match\n"); 718 break; 719 case StaleMatcher::MATCH_CALL: 720 ++BC.Stats.NumCallMatchedBlocks; 721 BC.Stats.CallMatchedSampleCount += ExecCount; 722 LLVM_DEBUG(dbgs() << " call match\n"); 723 break; 724 case StaleMatcher::MATCH_OPCODE: 725 ++BC.Stats.NumLooseMatchedBlocks; 726 BC.Stats.LooseMatchedSampleCount += ExecCount; 727 LLVM_DEBUG(dbgs() << " loose match\n"); 728 break; 729 case StaleMatcher::NO_MATCH: 730 LLVM_DEBUG(dbgs() << " no match\n"); 731 } 732 } 733 734 for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF->Blocks) { 735 for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) { 736 if (YamlSI.Count == 0) 737 continue; 738 739 // Try to find the jump for a given (src, dst) pair from the profile and 740 // assign the jump weight based on the profile count 741 const uint64_t SrcIndex = YamlBB.Index; 742 const uint64_t DstIndex = YamlSI.Index; 743 744 const FlowBlock *MatchedSrcBlock = MatchMap.lookup(SrcIndex).first; 745 const FlowBlock *MatchedDstBlock = MatchMap.lookup(DstIndex).first; 746 747 if (MatchedSrcBlock != nullptr && MatchedDstBlock != nullptr) { 748 // Find a jump between the two blocks 749 FlowJump *Jump = nullptr; 750 for (FlowJump *SuccJump : MatchedSrcBlock->SuccJumps) { 751 if (SuccJump->Target == MatchedDstBlock->Index) { 752 Jump = SuccJump; 753 break; 754 } 755 } 756 // Assign the weight, if the corresponding jump is found 757 if (Jump != nullptr) { 758 Jump->Weight = YamlSI.Count; 759 Jump->HasUnknownWeight = false; 760 } 761 } 762 // Assign the weight for the src block, if it is found 763 if (MatchedSrcBlock != nullptr) 764 OutWeight[MatchedSrcBlock->Index] += YamlSI.Count; 765 // Assign the weight for the dst block, if it is found 766 if (MatchedDstBlock != nullptr) 767 InWeight[MatchedDstBlock->Index] += YamlSI.Count; 768 } 769 } 770 } 771 772 // Assign block counts based on in-/out- jumps 773 for (FlowBlock &Block : Func.Blocks) { 774 if (OutWeight[Block.Index] == 0 && InWeight[Block.Index] == 0) { 775 assert(Block.HasUnknownWeight && "unmatched block with a positive count"); 776 continue; 777 } 778 Block.HasUnknownWeight = false; 779 Block.Weight = std::max(OutWeight[Block.Index], InWeight[Block.Index]); 780 } 781 782 return MatchedBlocks[&YamlBF].size(); 783 } 784 785 /// The function finds all blocks that are (i) reachable from the Entry block 786 /// and (ii) do not have a path to an exit, and marks all such blocks 'cold' 787 /// so that profi does not send any flow to such blocks. 788 void preprocessUnreachableBlocks(FlowFunction &Func) { 789 const uint64_t NumBlocks = Func.Blocks.size(); 790 791 // Start bfs from the source 792 std::queue<uint64_t> Queue; 793 std::vector<bool> VisitedEntry(NumBlocks, false); 794 for (uint64_t I = 0; I < NumBlocks; I++) { 795 FlowBlock &Block = Func.Blocks[I]; 796 if (Block.isEntry()) { 797 Queue.push(I); 798 VisitedEntry[I] = true; 799 break; 800 } 801 } 802 while (!Queue.empty()) { 803 const uint64_t Src = Queue.front(); 804 Queue.pop(); 805 for (FlowJump *Jump : Func.Blocks[Src].SuccJumps) { 806 const uint64_t Dst = Jump->Target; 807 if (!VisitedEntry[Dst]) { 808 Queue.push(Dst); 809 VisitedEntry[Dst] = true; 810 } 811 } 812 } 813 814 // Start bfs from all sinks 815 std::vector<bool> VisitedExit(NumBlocks, false); 816 for (uint64_t I = 0; I < NumBlocks; I++) { 817 FlowBlock &Block = Func.Blocks[I]; 818 if (Block.isExit() && VisitedEntry[I]) { 819 Queue.push(I); 820 VisitedExit[I] = true; 821 } 822 } 823 while (!Queue.empty()) { 824 const uint64_t Src = Queue.front(); 825 Queue.pop(); 826 for (FlowJump *Jump : Func.Blocks[Src].PredJumps) { 827 const uint64_t Dst = Jump->Source; 828 if (!VisitedExit[Dst]) { 829 Queue.push(Dst); 830 VisitedExit[Dst] = true; 831 } 832 } 833 } 834 835 // Make all blocks of zero weight so that flow is not sent 836 for (uint64_t I = 0; I < NumBlocks; I++) { 837 FlowBlock &Block = Func.Blocks[I]; 838 if (Block.Weight == 0) 839 continue; 840 if (!VisitedEntry[I] || !VisitedExit[I]) { 841 Block.Weight = 0; 842 Block.HasUnknownWeight = true; 843 Block.IsUnlikely = true; 844 for (FlowJump *Jump : Block.SuccJumps) { 845 if (Jump->Source == Block.Index && Jump->Target == Block.Index) { 846 Jump->Weight = 0; 847 Jump->HasUnknownWeight = true; 848 Jump->IsUnlikely = true; 849 } 850 } 851 } 852 } 853 } 854 855 /// Decide if stale profile matching can be applied for a given function. 856 /// Currently we skip inference for (very) large instances and for instances 857 /// having "unexpected" control flow (e.g., having no sink basic blocks). 858 bool canApplyInference(const FlowFunction &Func, 859 const yaml::bolt::BinaryFunctionProfile &YamlBF, 860 const uint64_t &MatchedBlocks) { 861 if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize) 862 return false; 863 864 if (MatchedBlocks * 100 < 865 opts::StaleMatchingMinMatchedBlock * YamlBF.Blocks.size()) 866 return false; 867 868 // Returns false if the artificial sink block has no predecessors meaning 869 // there are no exit blocks. 870 if (Func.Blocks[Func.Blocks.size() - 1].isEntry()) 871 return false; 872 873 return true; 874 } 875 876 /// Apply the profile inference algorithm for a given flow function. 877 void applyInference(FlowFunction &Func) { 878 ProfiParams Params; 879 // Set the params from the command-line flags. 880 Params.EvenFlowDistribution = opts::StaleMatchingEvenFlowDistribution; 881 Params.RebalanceUnknown = opts::StaleMatchingRebalanceUnknown; 882 Params.JoinIslands = opts::StaleMatchingJoinIslands; 883 884 Params.CostBlockInc = opts::StaleMatchingCostBlockInc; 885 Params.CostBlockEntryInc = opts::StaleMatchingCostBlockInc; 886 Params.CostBlockDec = opts::StaleMatchingCostBlockDec; 887 Params.CostBlockEntryDec = opts::StaleMatchingCostBlockDec; 888 Params.CostBlockUnknownInc = opts::StaleMatchingCostBlockUnknownInc; 889 890 Params.CostJumpInc = opts::StaleMatchingCostJumpInc; 891 Params.CostJumpFTInc = opts::StaleMatchingCostJumpInc; 892 Params.CostJumpDec = opts::StaleMatchingCostJumpDec; 893 Params.CostJumpFTDec = opts::StaleMatchingCostJumpDec; 894 Params.CostJumpUnknownInc = opts::StaleMatchingCostJumpUnknownInc; 895 Params.CostJumpUnknownFTInc = opts::StaleMatchingCostJumpUnknownFTInc; 896 897 applyFlowInference(Params, Func); 898 } 899 900 /// Collect inferred counts from the flow function and update annotations in 901 /// the binary function. 902 void assignProfile(BinaryFunction &BF, 903 const BinaryFunction::BasicBlockOrderType &BlockOrder, 904 FlowFunction &Func) { 905 BinaryContext &BC = BF.getBinaryContext(); 906 907 assert(Func.Blocks.size() == BlockOrder.size() + 2); 908 for (uint64_t I = 0; I < BlockOrder.size(); I++) { 909 FlowBlock &Block = Func.Blocks[I + 1]; 910 BinaryBasicBlock *BB = BlockOrder[I]; 911 912 // Update block's count 913 BB->setExecutionCount(Block.Flow); 914 915 // Update jump counts: (i) clean existing counts and then (ii) set new ones 916 auto BI = BB->branch_info_begin(); 917 for (const BinaryBasicBlock *DstBB : BB->successors()) { 918 (void)DstBB; 919 BI->Count = 0; 920 BI->MispredictedCount = 0; 921 ++BI; 922 } 923 for (FlowJump *Jump : Block.SuccJumps) { 924 if (Jump->IsUnlikely) 925 continue; 926 if (Jump->Flow == 0) 927 continue; 928 929 // Skips the artificial sink block. 930 if (Jump->Target == Func.Blocks.size() - 1) 931 continue; 932 BinaryBasicBlock &SuccBB = *BlockOrder[Jump->Target - 1]; 933 // Check if the edge corresponds to a regular jump or a landing pad 934 if (BB->getSuccessor(SuccBB.getLabel())) { 935 BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(SuccBB); 936 BI.Count += Jump->Flow; 937 } else { 938 BinaryBasicBlock *LP = BB->getLandingPad(SuccBB.getLabel()); 939 if (LP && LP->getKnownExecutionCount() < Jump->Flow) 940 LP->setExecutionCount(Jump->Flow); 941 } 942 } 943 944 // Update call-site annotations 945 auto setOrUpdateAnnotation = [&](MCInst &Instr, StringRef Name, 946 uint64_t Count) { 947 if (BC.MIB->hasAnnotation(Instr, Name)) 948 BC.MIB->removeAnnotation(Instr, Name); 949 // Do not add zero-count annotations 950 if (Count == 0) 951 return; 952 BC.MIB->addAnnotation(Instr, Name, Count); 953 }; 954 955 for (MCInst &Instr : *BB) { 956 // Ignore pseudo instructions 957 if (BC.MIB->isPseudo(Instr)) 958 continue; 959 // Ignore jump tables 960 const MCInst *LastInstr = BB->getLastNonPseudoInstr(); 961 if (BC.MIB->getJumpTable(*LastInstr) && LastInstr == &Instr) 962 continue; 963 964 if (BC.MIB->isIndirectCall(Instr) || BC.MIB->isIndirectBranch(Instr)) { 965 auto &ICSP = BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>( 966 Instr, "CallProfile"); 967 if (!ICSP.empty()) { 968 // Try to evenly distribute the counts among the call sites 969 const uint64_t TotalCount = Block.Flow; 970 const uint64_t NumSites = ICSP.size(); 971 for (uint64_t Idx = 0; Idx < ICSP.size(); Idx++) { 972 IndirectCallProfile &CSP = ICSP[Idx]; 973 uint64_t CountPerSite = TotalCount / NumSites; 974 // When counts cannot be exactly distributed, increase by 1 the 975 // counts of the first (TotalCount % NumSites) call sites 976 if (Idx < TotalCount % NumSites) 977 CountPerSite++; 978 CSP.Count = CountPerSite; 979 } 980 } else { 981 ICSP.emplace_back(nullptr, Block.Flow, 0); 982 } 983 } else if (BC.MIB->getConditionalTailCall(Instr)) { 984 // We don't know exactly the number of times the conditional tail call 985 // is executed; conservatively, setting it to the count of the block 986 setOrUpdateAnnotation(Instr, "CTCTakenCount", Block.Flow); 987 BC.MIB->removeAnnotation(Instr, "CTCMispredCount"); 988 } else if (BC.MIB->isCall(Instr)) { 989 setOrUpdateAnnotation(Instr, "Count", Block.Flow); 990 } 991 } 992 } 993 994 // Update function's execution count and mark the function inferred. 995 BF.setExecutionCount(Func.Blocks[0].Flow); 996 BF.setHasInferredProfile(true); 997 } 998 999 bool YAMLProfileReader::inferStaleProfile( 1000 BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF, 1001 const ArrayRef<ProbeMatchSpec> ProbeMatchSpecs) { 1002 1003 NamedRegionTimer T("inferStaleProfile", "stale profile inference", "rewrite", 1004 "Rewrite passes", opts::TimeRewrite); 1005 1006 if (!BF.hasCFG()) 1007 return false; 1008 1009 LLVM_DEBUG(dbgs() << "BOLT-INFO: applying profile inference for " 1010 << "\"" << BF.getPrintName() << "\"\n"); 1011 1012 // Make sure that block hashes are up to date. 1013 BF.computeBlockHashes(YamlBP.Header.HashFunction); 1014 1015 const BinaryFunction::BasicBlockOrderType BlockOrder( 1016 BF.getLayout().block_begin(), BF.getLayout().block_end()); 1017 1018 // Tracks the number of matched blocks. 1019 1020 // Create a wrapper flow function to use with the profile inference algorithm. 1021 FlowFunction Func = createFlowFunction(BlockOrder); 1022 1023 // Match as many block/jump counts from the stale profile as possible 1024 size_t MatchedBlocks = 1025 matchWeights(BF.getBinaryContext(), BlockOrder, YamlBF, Func, 1026 YamlBP.Header.HashFunction, IdToYamLBF, BF, ProbeMatchSpecs); 1027 1028 // Adjust the flow function by marking unreachable blocks Unlikely so that 1029 // they don't get any counts assigned. 1030 preprocessUnreachableBlocks(Func); 1031 1032 // Check if profile inference can be applied for the instance. 1033 if (!canApplyInference(Func, YamlBF, MatchedBlocks)) 1034 return false; 1035 1036 // Apply the profile inference algorithm. 1037 applyInference(Func); 1038 1039 // Collect inferred counts and update function annotations. 1040 assignProfile(BF, BlockOrder, Func); 1041 1042 // As of now, we always mark the binary function having "correct" profile. 1043 // In the future, we may discard the results for instances with poor inference 1044 // metrics and keep such functions un-optimized. 1045 return true; 1046 } 1047 1048 } // end namespace bolt 1049 } // end namespace llvm 1050