144268271Sspupyrev //===- bolt/Profile/StaleProfileMatching.cpp - Profile data matching ----===// 244268271Sspupyrev // 344268271Sspupyrev // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 444268271Sspupyrev // See https://llvm.org/LICENSE.txt for license information. 544268271Sspupyrev // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 644268271Sspupyrev // 744268271Sspupyrev //===----------------------------------------------------------------------===// 844268271Sspupyrev // 944268271Sspupyrev // BOLT often has to deal with profiles collected on binaries built from several 1044268271Sspupyrev // revisions behind release. As a result, a certain percentage of functions is 1144268271Sspupyrev // considered stale and not optimized. This file implements an ability to match 1244268271Sspupyrev // profile to functions that are not 100% binary identical, and thus, increasing 1344268271Sspupyrev // the optimization coverage and boost the performance of applications. 1444268271Sspupyrev // 1544268271Sspupyrev // The algorithm consists of two phases: matching and inference: 1644268271Sspupyrev // - At the matching phase, we try to "guess" as many block and jump counts from 1744268271Sspupyrev // the stale profile as possible. To this end, the content of each basic block 1844268271Sspupyrev // is hashed and stored in the (yaml) profile. When BOLT optimizes a binary, 1944268271Sspupyrev // it computes block hashes and identifies the corresponding entries in the 2044268271Sspupyrev // stale profile. It yields a partial profile for every CFG in the binary. 2144268271Sspupyrev // - At the inference phase, we employ a network flow-based algorithm (profi) to 2244268271Sspupyrev // reconstruct "realistic" block and jump counts from the partial profile 2344268271Sspupyrev // generated at the first stage. In practice, we don't always produce proper 2444268271Sspupyrev // profile data but the majority (e.g., >90%) of CFGs get the correct counts. 2544268271Sspupyrev // 2644268271Sspupyrev //===----------------------------------------------------------------------===// 2744268271Sspupyrev 2844268271Sspupyrev #include "bolt/Core/HashUtilities.h" 2944268271Sspupyrev #include "bolt/Profile/YAMLProfileReader.h" 306d1502c6Sspupyrev #include "llvm/ADT/Bitfields.h" 3144268271Sspupyrev #include "llvm/ADT/Hashing.h" 329a9af0a2SShaw Young #include "llvm/MC/MCPseudoProbe.h" 3344268271Sspupyrev #include "llvm/Support/CommandLine.h" 3496378b3dSshaw young #include "llvm/Support/Timer.h" 35e7dd596cSspupyrev #include "llvm/Support/xxhash.h" 3644268271Sspupyrev #include "llvm/Transforms/Utils/SampleProfileInference.h" 3744268271Sspupyrev 3844268271Sspupyrev #include <queue> 3944268271Sspupyrev 406d1502c6Sspupyrev using namespace llvm; 416d1502c6Sspupyrev 422316a10fSspupyrev #undef DEBUG_TYPE 432316a10fSspupyrev #define DEBUG_TYPE "bolt-prof" 442316a10fSspupyrev 4544268271Sspupyrev namespace opts { 4644268271Sspupyrev 4796378b3dSshaw young extern cl::opt<bool> TimeRewrite; 4844268271Sspupyrev extern cl::OptionCategory BoltOptCategory; 4944268271Sspupyrev 5044268271Sspupyrev cl::opt<bool> 5144268271Sspupyrev InferStaleProfile("infer-stale-profile", 5244268271Sspupyrev cl::desc("Infer counts from stale profile data."), 5344268271Sspupyrev cl::init(false), cl::Hidden, cl::cat(BoltOptCategory)); 5444268271Sspupyrev 5568fc8dffSshaw young cl::opt<unsigned> StaleMatchingMinMatchedBlock( 5668fc8dffSshaw young "stale-matching-min-matched-block", 5768fc8dffSshaw young cl::desc("Percentage threshold of matched basic blocks at which stale " 5868fc8dffSshaw young "profile inference is executed."), 5968fc8dffSshaw young cl::init(0), cl::Hidden, cl::cat(BoltOptCategory)); 6068fc8dffSshaw young 6144268271Sspupyrev cl::opt<unsigned> StaleMatchingMaxFuncSize( 6244268271Sspupyrev "stale-matching-max-func-size", 6344268271Sspupyrev cl::desc("The maximum size of a function to consider for inference."), 6444268271Sspupyrev cl::init(10000), cl::Hidden, cl::cat(BoltOptCategory)); 6544268271Sspupyrev 6644268271Sspupyrev // Parameters of the profile inference algorithm. The default values are tuned 6744268271Sspupyrev // on several benchmarks. 6844268271Sspupyrev cl::opt<bool> StaleMatchingEvenFlowDistribution( 6944268271Sspupyrev "stale-matching-even-flow-distribution", 7044268271Sspupyrev cl::desc("Try to evenly distribute flow when there are multiple equally " 7144268271Sspupyrev "likely options."), 7244268271Sspupyrev cl::init(true), cl::ReallyHidden, cl::cat(BoltOptCategory)); 7344268271Sspupyrev 7444268271Sspupyrev cl::opt<bool> StaleMatchingRebalanceUnknown( 7544268271Sspupyrev "stale-matching-rebalance-unknown", 7644268271Sspupyrev cl::desc("Evenly re-distribute flow among unknown subgraphs."), 7744268271Sspupyrev cl::init(false), cl::ReallyHidden, cl::cat(BoltOptCategory)); 7844268271Sspupyrev 7944268271Sspupyrev cl::opt<bool> StaleMatchingJoinIslands( 8044268271Sspupyrev "stale-matching-join-islands", 8144268271Sspupyrev cl::desc("Join isolated components having positive flow."), cl::init(true), 8244268271Sspupyrev cl::ReallyHidden, cl::cat(BoltOptCategory)); 8344268271Sspupyrev 8444268271Sspupyrev cl::opt<unsigned> StaleMatchingCostBlockInc( 8544268271Sspupyrev "stale-matching-cost-block-inc", 861256ef27Sspupyrev cl::desc("The cost of increasing a block count by one."), cl::init(150), 8744268271Sspupyrev cl::ReallyHidden, cl::cat(BoltOptCategory)); 8844268271Sspupyrev 8944268271Sspupyrev cl::opt<unsigned> StaleMatchingCostBlockDec( 9044268271Sspupyrev "stale-matching-cost-block-dec", 911256ef27Sspupyrev cl::desc("The cost of decreasing a block count by one."), cl::init(150), 9244268271Sspupyrev cl::ReallyHidden, cl::cat(BoltOptCategory)); 9344268271Sspupyrev 9444268271Sspupyrev cl::opt<unsigned> StaleMatchingCostJumpInc( 9544268271Sspupyrev "stale-matching-cost-jump-inc", 961256ef27Sspupyrev cl::desc("The cost of increasing a jump count by one."), cl::init(150), 9744268271Sspupyrev cl::ReallyHidden, cl::cat(BoltOptCategory)); 9844268271Sspupyrev 9944268271Sspupyrev cl::opt<unsigned> StaleMatchingCostJumpDec( 10044268271Sspupyrev "stale-matching-cost-jump-dec", 1011256ef27Sspupyrev cl::desc("The cost of decreasing a jump count by one."), cl::init(150), 10244268271Sspupyrev cl::ReallyHidden, cl::cat(BoltOptCategory)); 10344268271Sspupyrev 1041256ef27Sspupyrev cl::opt<unsigned> StaleMatchingCostBlockUnknownInc( 1051256ef27Sspupyrev "stale-matching-cost-block-unknown-inc", 1061256ef27Sspupyrev cl::desc("The cost of increasing an unknown block count by one."), 1071256ef27Sspupyrev cl::init(1), cl::ReallyHidden, cl::cat(BoltOptCategory)); 10844268271Sspupyrev 10944268271Sspupyrev cl::opt<unsigned> StaleMatchingCostJumpUnknownInc( 11044268271Sspupyrev "stale-matching-cost-jump-unknown-inc", 1111256ef27Sspupyrev cl::desc("The cost of increasing an unknown jump count by one."), 1121256ef27Sspupyrev cl::init(140), cl::ReallyHidden, cl::cat(BoltOptCategory)); 11344268271Sspupyrev 11444268271Sspupyrev cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc( 11544268271Sspupyrev "stale-matching-cost-jump-unknown-ft-inc", 11644268271Sspupyrev cl::desc( 1171256ef27Sspupyrev "The cost of increasing an unknown fall-through jump count by one."), 1181256ef27Sspupyrev cl::init(3), cl::ReallyHidden, cl::cat(BoltOptCategory)); 11944268271Sspupyrev 1209a9af0a2SShaw Young cl::opt<bool> StaleMatchingWithPseudoProbes( 1219a9af0a2SShaw Young "stale-matching-with-pseudo-probes", 1229a9af0a2SShaw Young cl::desc("Turns on stale matching with block pseudo probes."), 1239a9af0a2SShaw Young cl::init(false), cl::ReallyHidden, cl::cat(BoltOptCategory)); 1249a9af0a2SShaw Young 12544268271Sspupyrev } // namespace opts 12644268271Sspupyrev 12744268271Sspupyrev namespace llvm { 12844268271Sspupyrev namespace bolt { 12944268271Sspupyrev 1302316a10fSspupyrev /// An object wrapping several components of a basic block hash. The combined 1312316a10fSspupyrev /// (blended) hash is represented and stored as one uint64_t, while individual 1322316a10fSspupyrev /// components are of smaller size (e.g., uint16_t or uint8_t). 1332316a10fSspupyrev struct BlendedBlockHash { 1342316a10fSspupyrev private: 1356d1502c6Sspupyrev using ValueOffset = Bitfield::Element<uint16_t, 0, 16>; 1366d1502c6Sspupyrev using ValueOpcode = Bitfield::Element<uint16_t, 16, 16>; 1376d1502c6Sspupyrev using ValueInstr = Bitfield::Element<uint16_t, 32, 16>; 1381256ef27Sspupyrev using ValuePred = Bitfield::Element<uint8_t, 48, 8>; 1391256ef27Sspupyrev using ValueSucc = Bitfield::Element<uint8_t, 56, 8>; 1402316a10fSspupyrev 1412316a10fSspupyrev public: 1422316a10fSspupyrev explicit BlendedBlockHash() {} 1432316a10fSspupyrev 1446d1502c6Sspupyrev explicit BlendedBlockHash(uint64_t Hash) { 1456d1502c6Sspupyrev Offset = Bitfield::get<ValueOffset>(Hash); 1466d1502c6Sspupyrev OpcodeHash = Bitfield::get<ValueOpcode>(Hash); 1476d1502c6Sspupyrev InstrHash = Bitfield::get<ValueInstr>(Hash); 1481256ef27Sspupyrev PredHash = Bitfield::get<ValuePred>(Hash); 1491256ef27Sspupyrev SuccHash = Bitfield::get<ValueSucc>(Hash); 1502316a10fSspupyrev } 1512316a10fSspupyrev 1522316a10fSspupyrev /// Combine the blended hash into uint64_t. 1532316a10fSspupyrev uint64_t combine() const { 1546d1502c6Sspupyrev uint64_t Hash = 0; 1556d1502c6Sspupyrev Bitfield::set<ValueOffset>(Hash, Offset); 1566d1502c6Sspupyrev Bitfield::set<ValueOpcode>(Hash, OpcodeHash); 1576d1502c6Sspupyrev Bitfield::set<ValueInstr>(Hash, InstrHash); 1581256ef27Sspupyrev Bitfield::set<ValuePred>(Hash, PredHash); 1591256ef27Sspupyrev Bitfield::set<ValueSucc>(Hash, SuccHash); 1606d1502c6Sspupyrev return Hash; 1612316a10fSspupyrev } 1622316a10fSspupyrev 1632316a10fSspupyrev /// Compute a distance between two given blended hashes. The smaller the 1642316a10fSspupyrev /// distance, the more similar two blocks are. For identical basic blocks, 1652316a10fSspupyrev /// the distance is zero. 1662316a10fSspupyrev uint64_t distance(const BlendedBlockHash &BBH) const { 1672316a10fSspupyrev assert(OpcodeHash == BBH.OpcodeHash && 1682316a10fSspupyrev "incorrect blended hash distance computation"); 1692316a10fSspupyrev uint64_t Dist = 0; 1702316a10fSspupyrev // Account for NeighborHash 1711256ef27Sspupyrev Dist += SuccHash == BBH.SuccHash ? 0 : 1; 1721256ef27Sspupyrev Dist += PredHash == BBH.PredHash ? 0 : 1; 1732316a10fSspupyrev Dist <<= 16; 1742316a10fSspupyrev // Account for InstrHash 1752316a10fSspupyrev Dist += InstrHash == BBH.InstrHash ? 0 : 1; 1762316a10fSspupyrev Dist <<= 16; 1772316a10fSspupyrev // Account for Offset 1782316a10fSspupyrev Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset); 1792316a10fSspupyrev return Dist; 1802316a10fSspupyrev } 1812316a10fSspupyrev 1822316a10fSspupyrev /// The offset of the basic block from the function start. 1832316a10fSspupyrev uint16_t Offset{0}; 1842316a10fSspupyrev /// (Loose) Hash of the basic block instructions, excluding operands. 1852316a10fSspupyrev uint16_t OpcodeHash{0}; 1862316a10fSspupyrev /// (Strong) Hash of the basic block instructions, including opcodes and 1872316a10fSspupyrev /// operands. 1882316a10fSspupyrev uint16_t InstrHash{0}; 1891256ef27Sspupyrev /// (Loose) Hashes of the predecessors of the basic block. 1901256ef27Sspupyrev uint8_t PredHash{0}; 1911256ef27Sspupyrev /// (Loose) Hashes of the successors of the basic block. 1921256ef27Sspupyrev uint8_t SuccHash{0}; 1932316a10fSspupyrev }; 1942316a10fSspupyrev 1952316a10fSspupyrev /// The object is used to identify and match basic blocks in a BinaryFunction 1962316a10fSspupyrev /// given their hashes computed on a binary built from several revisions behind 1972316a10fSspupyrev /// release. 1982316a10fSspupyrev class StaleMatcher { 1992316a10fSspupyrev public: 2002316a10fSspupyrev /// Initialize stale matcher. 2012316a10fSspupyrev void init(const std::vector<FlowBlock *> &Blocks, 202131eb305SShaw Young const std::vector<BlendedBlockHash> &Hashes, 203131eb305SShaw Young const std::vector<uint64_t> &CallHashes) { 2042316a10fSspupyrev assert(Blocks.size() == Hashes.size() && 205131eb305SShaw Young Hashes.size() == CallHashes.size() && 2062316a10fSspupyrev "incorrect matcher initialization"); 2072316a10fSspupyrev for (size_t I = 0; I < Blocks.size(); I++) { 2082316a10fSspupyrev FlowBlock *Block = Blocks[I]; 2092316a10fSspupyrev uint16_t OpHash = Hashes[I].OpcodeHash; 2102316a10fSspupyrev OpHashToBlocks[OpHash].push_back(std::make_pair(Hashes[I], Block)); 211131eb305SShaw Young if (CallHashes[I]) 212131eb305SShaw Young CallHashToBlocks[CallHashes[I]].push_back( 213131eb305SShaw Young std::make_pair(Hashes[I], Block)); 2142316a10fSspupyrev } 2152316a10fSspupyrev } 2162316a10fSspupyrev 2179a9af0a2SShaw Young /// Creates a mapping from a pseudo probe to a flow block. 2189a9af0a2SShaw Young void mapProbeToBB(const MCDecodedPseudoProbe *Probe, FlowBlock *Block) { 2199a9af0a2SShaw Young BBPseudoProbeToBlock[Probe] = Block; 2209a9af0a2SShaw Young } 2219a9af0a2SShaw Young 2229a9af0a2SShaw Young enum MatchMethod : char { 2239a9af0a2SShaw Young MATCH_EXACT = 0, 2249a9af0a2SShaw Young MATCH_PROBE_EXACT, 2259a9af0a2SShaw Young MATCH_PROBE_LOOSE, 2269a9af0a2SShaw Young MATCH_OPCODE, 2279a9af0a2SShaw Young MATCH_CALL, 2289a9af0a2SShaw Young NO_MATCH 2299a9af0a2SShaw Young }; 2309a9af0a2SShaw Young 2319a9af0a2SShaw Young /// Find the most similar flow block for a profile block given blended hash. 2329a9af0a2SShaw Young std::pair<const FlowBlock *, MatchMethod> 2339a9af0a2SShaw Young matchBlockStrict(BlendedBlockHash BlendedHash) { 2349a9af0a2SShaw Young const auto &[Block, ExactHash] = matchWithOpcodes(BlendedHash); 2359a9af0a2SShaw Young if (Block && ExactHash) 2369a9af0a2SShaw Young return {Block, MATCH_EXACT}; 2379a9af0a2SShaw Young return {nullptr, NO_MATCH}; 2389a9af0a2SShaw Young } 2399a9af0a2SShaw Young 2409a9af0a2SShaw Young /// Find the most similar flow block for a profile block given pseudo probes. 2419a9af0a2SShaw Young std::pair<const FlowBlock *, MatchMethod> matchBlockProbe( 2429a9af0a2SShaw Young const ArrayRef<yaml::bolt::PseudoProbeInfo> PseudoProbes, 2439a9af0a2SShaw Young const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) { 2449a9af0a2SShaw Young const auto &[ProbeBlock, ExactProbe] = 2459a9af0a2SShaw Young matchWithPseudoProbes(PseudoProbes, InlineTreeNodeMap); 2469a9af0a2SShaw Young if (ProbeBlock) 2479a9af0a2SShaw Young return {ProbeBlock, ExactProbe ? MATCH_PROBE_EXACT : MATCH_PROBE_LOOSE}; 2489a9af0a2SShaw Young return {nullptr, NO_MATCH}; 2499a9af0a2SShaw Young } 2509a9af0a2SShaw Young 2519a9af0a2SShaw Young /// Find the most similar flow block for a profile block given its hashes. 2529a9af0a2SShaw Young std::pair<const FlowBlock *, MatchMethod> 2539a9af0a2SShaw Young matchBlockLoose(BlendedBlockHash BlendedHash, uint64_t CallHash) { 2549a9af0a2SShaw Young if (const FlowBlock *CallBlock = matchWithCalls(BlendedHash, CallHash)) 2559a9af0a2SShaw Young return {CallBlock, MATCH_CALL}; 2569a9af0a2SShaw Young if (const FlowBlock *OpcodeBlock = matchWithOpcodes(BlendedHash).first) 2579a9af0a2SShaw Young return {OpcodeBlock, MATCH_OPCODE}; 2589a9af0a2SShaw Young return {nullptr, NO_MATCH}; 259131eb305SShaw Young } 260131eb305SShaw Young 261131eb305SShaw Young /// Returns true if the two basic blocks (in the binary and in the profile) 262131eb305SShaw Young /// corresponding to the given hashes are matched to each other with a high 263131eb305SShaw Young /// confidence. 264131eb305SShaw Young static bool isHighConfidenceMatch(BlendedBlockHash Hash1, 265131eb305SShaw Young BlendedBlockHash Hash2) { 266131eb305SShaw Young return Hash1.InstrHash == Hash2.InstrHash; 267131eb305SShaw Young } 268131eb305SShaw Young 269131eb305SShaw Young private: 270131eb305SShaw Young using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>; 271131eb305SShaw Young std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks; 272131eb305SShaw Young std::unordered_map<uint64_t, std::vector<HashBlockPairType>> CallHashToBlocks; 2739a9af0a2SShaw Young DenseMap<const MCDecodedPseudoProbe *, FlowBlock *> BBPseudoProbeToBlock; 274131eb305SShaw Young 275131eb305SShaw Young // Uses OpcodeHash to find the most similar block for a given hash. 2769a9af0a2SShaw Young std::pair<const FlowBlock *, bool> 2779a9af0a2SShaw Young matchWithOpcodes(BlendedBlockHash BlendedHash) const { 2782316a10fSspupyrev auto BlockIt = OpHashToBlocks.find(BlendedHash.OpcodeHash); 27931e8a9f4Sspupyrev if (BlockIt == OpHashToBlocks.end()) 2809a9af0a2SShaw Young return {nullptr, false}; 2812316a10fSspupyrev FlowBlock *BestBlock = nullptr; 2822316a10fSspupyrev uint64_t BestDist = std::numeric_limits<uint64_t>::max(); 2839a9af0a2SShaw Young BlendedBlockHash BestHash; 28431e8a9f4Sspupyrev for (const auto &[Hash, Block] : BlockIt->second) { 2852316a10fSspupyrev uint64_t Dist = Hash.distance(BlendedHash); 2862316a10fSspupyrev if (BestBlock == nullptr || Dist < BestDist) { 2872316a10fSspupyrev BestDist = Dist; 2882316a10fSspupyrev BestBlock = Block; 2899a9af0a2SShaw Young BestHash = Hash; 2902316a10fSspupyrev } 2912316a10fSspupyrev } 2929a9af0a2SShaw Young return {BestBlock, isHighConfidenceMatch(BestHash, BlendedHash)}; 2932316a10fSspupyrev } 2942316a10fSspupyrev 295131eb305SShaw Young // Uses CallHash to find the most similar block for a given hash. 296131eb305SShaw Young const FlowBlock *matchWithCalls(BlendedBlockHash BlendedHash, 297131eb305SShaw Young uint64_t CallHash) const { 298131eb305SShaw Young if (!CallHash) 299131eb305SShaw Young return nullptr; 300131eb305SShaw Young auto BlockIt = CallHashToBlocks.find(CallHash); 301131eb305SShaw Young if (BlockIt == CallHashToBlocks.end()) 302131eb305SShaw Young return nullptr; 303131eb305SShaw Young FlowBlock *BestBlock = nullptr; 304131eb305SShaw Young uint64_t BestDist = std::numeric_limits<uint64_t>::max(); 305131eb305SShaw Young for (const auto &[Hash, Block] : BlockIt->second) { 306131eb305SShaw Young uint64_t Dist = Hash.OpcodeHash > BlendedHash.OpcodeHash 307131eb305SShaw Young ? Hash.OpcodeHash - BlendedHash.OpcodeHash 308131eb305SShaw Young : BlendedHash.OpcodeHash - Hash.OpcodeHash; 309131eb305SShaw Young if (BestBlock == nullptr || Dist < BestDist) { 310131eb305SShaw Young BestDist = Dist; 311131eb305SShaw Young BestBlock = Block; 31231e8a9f4Sspupyrev } 313131eb305SShaw Young } 314131eb305SShaw Young return BestBlock; 315131eb305SShaw Young } 3169a9af0a2SShaw Young 3179a9af0a2SShaw Young /// Matches a profile block with a binary block based on pseudo probes. 3189a9af0a2SShaw Young /// Returns the best matching block (or nullptr) and whether the match is 3199a9af0a2SShaw Young /// unambiguous. 3209a9af0a2SShaw Young std::pair<const FlowBlock *, bool> matchWithPseudoProbes( 3219a9af0a2SShaw Young const ArrayRef<yaml::bolt::PseudoProbeInfo> BlockPseudoProbes, 3229a9af0a2SShaw Young const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) const { 3239a9af0a2SShaw Young 3249a9af0a2SShaw Young if (!opts::StaleMatchingWithPseudoProbes) 3259a9af0a2SShaw Young return {nullptr, false}; 3269a9af0a2SShaw Young 3279a9af0a2SShaw Young DenseMap<const FlowBlock *, uint32_t> FlowBlockMatchCount; 3289a9af0a2SShaw Young 3299a9af0a2SShaw Young auto matchProfileProbeToBlock = [&](uint32_t NodeId, 3309a9af0a2SShaw Young uint64_t ProbeId) -> const FlowBlock * { 3319a9af0a2SShaw Young const MCDecodedPseudoProbeInlineTree *BinaryNode = 3329a9af0a2SShaw Young InlineTreeNodeMap.getInlineTreeNode(NodeId); 3339a9af0a2SShaw Young if (!BinaryNode) 3349a9af0a2SShaw Young return nullptr; 3359a9af0a2SShaw Young const MCDecodedPseudoProbe Dummy(0, ProbeId, PseudoProbeType::Block, 0, 0, 3369a9af0a2SShaw Young nullptr); 3379a9af0a2SShaw Young ArrayRef<MCDecodedPseudoProbe> BinaryProbes = BinaryNode->getProbes(); 3389a9af0a2SShaw Young auto BinaryProbeIt = llvm::lower_bound( 3399a9af0a2SShaw Young BinaryProbes, Dummy, [](const auto &LHS, const auto &RHS) { 3409a9af0a2SShaw Young return LHS.getIndex() < RHS.getIndex(); 3419a9af0a2SShaw Young }); 3429a9af0a2SShaw Young if (BinaryProbeIt == BinaryNode->getProbes().end() || 3439a9af0a2SShaw Young BinaryProbeIt->getIndex() != ProbeId) 3449a9af0a2SShaw Young return nullptr; 3459a9af0a2SShaw Young auto It = BBPseudoProbeToBlock.find(&*BinaryProbeIt); 3469a9af0a2SShaw Young if (It == BBPseudoProbeToBlock.end()) 3479a9af0a2SShaw Young return nullptr; 3489a9af0a2SShaw Young return It->second; 3499a9af0a2SShaw Young }; 3509a9af0a2SShaw Young 3519a9af0a2SShaw Young auto matchPseudoProbeInfo = [&](const yaml::bolt::PseudoProbeInfo 3529a9af0a2SShaw Young &ProfileProbe, 3539a9af0a2SShaw Young uint32_t NodeId) { 3549a9af0a2SShaw Young for (uint64_t Index = 0; Index < 64; ++Index) 3559a9af0a2SShaw Young if (ProfileProbe.BlockMask & 1ull << Index) 3569a9af0a2SShaw Young ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, Index + 1)]; 3579a9af0a2SShaw Young for (const auto &ProfileProbes : 3589a9af0a2SShaw Young {ProfileProbe.BlockProbes, ProfileProbe.IndCallProbes, 3599a9af0a2SShaw Young ProfileProbe.CallProbes}) 3609a9af0a2SShaw Young for (uint64_t ProfileProbe : ProfileProbes) 3619a9af0a2SShaw Young ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, ProfileProbe)]; 3629a9af0a2SShaw Young }; 3639a9af0a2SShaw Young 3649a9af0a2SShaw Young for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes) { 3659a9af0a2SShaw Young if (!ProfileProbe.InlineTreeNodes.empty()) 3669a9af0a2SShaw Young for (uint32_t ProfileInlineTreeNode : ProfileProbe.InlineTreeNodes) 3679a9af0a2SShaw Young matchPseudoProbeInfo(ProfileProbe, ProfileInlineTreeNode); 3689a9af0a2SShaw Young else 3699a9af0a2SShaw Young matchPseudoProbeInfo(ProfileProbe, ProfileProbe.InlineTreeIndex); 3709a9af0a2SShaw Young } 3719a9af0a2SShaw Young uint32_t BestMatchCount = 0; 3729a9af0a2SShaw Young uint32_t TotalMatchCount = 0; 3739a9af0a2SShaw Young const FlowBlock *BestMatchBlock = nullptr; 3749a9af0a2SShaw Young for (const auto &[FlowBlock, Count] : FlowBlockMatchCount) { 3759a9af0a2SShaw Young TotalMatchCount += Count; 3769a9af0a2SShaw Young if (Count < BestMatchCount || (Count == BestMatchCount && BestMatchBlock)) 3779a9af0a2SShaw Young continue; 3789a9af0a2SShaw Young BestMatchBlock = FlowBlock; 3799a9af0a2SShaw Young BestMatchCount = Count; 3809a9af0a2SShaw Young } 3819a9af0a2SShaw Young return {BestMatchBlock, BestMatchCount == TotalMatchCount}; 3829a9af0a2SShaw Young } 3832316a10fSspupyrev }; 3842316a10fSspupyrev 385b039ccc6SAmir Ayupov void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const { 3862316a10fSspupyrev if (size() == 0) 3872316a10fSspupyrev return; 3882316a10fSspupyrev 3892316a10fSspupyrev assert(hasCFG() && "the function is expected to have CFG"); 3902316a10fSspupyrev 3912316a10fSspupyrev std::vector<BlendedBlockHash> BlendedHashes(BasicBlocks.size()); 3922316a10fSspupyrev std::vector<uint64_t> OpcodeHashes(BasicBlocks.size()); 3931256ef27Sspupyrev // Initialize hash components. 3942316a10fSspupyrev for (size_t I = 0; I < BasicBlocks.size(); I++) { 3952316a10fSspupyrev const BinaryBasicBlock *BB = BasicBlocks[I]; 3962316a10fSspupyrev assert(BB->getIndex() == I && "incorrect block index"); 3972316a10fSspupyrev BlendedHashes[I].Offset = BB->getOffset(); 3981256ef27Sspupyrev // Hashing complete instructions. 3992316a10fSspupyrev std::string InstrHashStr = hashBlock( 4002316a10fSspupyrev BC, *BB, [&](const MCOperand &Op) { return hashInstOperand(BC, Op); }); 401b039ccc6SAmir Ayupov if (HashFunction == HashFunction::StdHash) { 402b039ccc6SAmir Ayupov uint64_t InstrHash = std::hash<std::string>{}(InstrHashStr); 403b039ccc6SAmir Ayupov BlendedHashes[I].InstrHash = (uint16_t)hash_value(InstrHash); 404b039ccc6SAmir Ayupov } else if (HashFunction == HashFunction::XXH3) { 405e7dd596cSspupyrev uint64_t InstrHash = llvm::xxh3_64bits(InstrHashStr); 406e7dd596cSspupyrev BlendedHashes[I].InstrHash = (uint16_t)InstrHash; 407b039ccc6SAmir Ayupov } else { 408b039ccc6SAmir Ayupov llvm_unreachable("Unhandled HashFunction"); 409b039ccc6SAmir Ayupov } 4101256ef27Sspupyrev // Hashing opcodes. 4111256ef27Sspupyrev std::string OpcodeHashStr = hashBlockLoose(BC, *BB); 412b039ccc6SAmir Ayupov if (HashFunction == HashFunction::StdHash) { 413b039ccc6SAmir Ayupov OpcodeHashes[I] = std::hash<std::string>{}(OpcodeHashStr); 414b039ccc6SAmir Ayupov BlendedHashes[I].OpcodeHash = (uint16_t)hash_value(OpcodeHashes[I]); 415b039ccc6SAmir Ayupov } else if (HashFunction == HashFunction::XXH3) { 416e7dd596cSspupyrev OpcodeHashes[I] = llvm::xxh3_64bits(OpcodeHashStr); 417e7dd596cSspupyrev BlendedHashes[I].OpcodeHash = (uint16_t)OpcodeHashes[I]; 418b039ccc6SAmir Ayupov } else { 419b039ccc6SAmir Ayupov llvm_unreachable("Unhandled HashFunction"); 420b039ccc6SAmir Ayupov } 4212316a10fSspupyrev } 4222316a10fSspupyrev 4231256ef27Sspupyrev // Initialize neighbor hash. 4242316a10fSspupyrev for (size_t I = 0; I < BasicBlocks.size(); I++) { 4252316a10fSspupyrev const BinaryBasicBlock *BB = BasicBlocks[I]; 4261256ef27Sspupyrev // Append hashes of successors. 4271256ef27Sspupyrev uint64_t Hash = 0; 4282316a10fSspupyrev for (BinaryBasicBlock *SuccBB : BB->successors()) { 4292316a10fSspupyrev uint64_t SuccHash = OpcodeHashes[SuccBB->getIndex()]; 4302316a10fSspupyrev Hash = hashing::detail::hash_16_bytes(Hash, SuccHash); 4312316a10fSspupyrev } 432b039ccc6SAmir Ayupov if (HashFunction == HashFunction::StdHash) { 433b039ccc6SAmir Ayupov // Compatibility with old behavior. 434b039ccc6SAmir Ayupov BlendedHashes[I].SuccHash = (uint8_t)hash_value(Hash); 435b039ccc6SAmir Ayupov } else { 436e7dd596cSspupyrev BlendedHashes[I].SuccHash = (uint8_t)Hash; 437b039ccc6SAmir Ayupov } 4381256ef27Sspupyrev 4391256ef27Sspupyrev // Append hashes of predecessors. 4401256ef27Sspupyrev Hash = 0; 4412316a10fSspupyrev for (BinaryBasicBlock *PredBB : BB->predecessors()) { 4422316a10fSspupyrev uint64_t PredHash = OpcodeHashes[PredBB->getIndex()]; 4432316a10fSspupyrev Hash = hashing::detail::hash_16_bytes(Hash, PredHash); 4442316a10fSspupyrev } 445b039ccc6SAmir Ayupov if (HashFunction == HashFunction::StdHash) { 446b039ccc6SAmir Ayupov // Compatibility with old behavior. 447b039ccc6SAmir Ayupov BlendedHashes[I].PredHash = (uint8_t)hash_value(Hash); 448b039ccc6SAmir Ayupov } else { 449e7dd596cSspupyrev BlendedHashes[I].PredHash = (uint8_t)Hash; 4502316a10fSspupyrev } 451b039ccc6SAmir Ayupov } 4522316a10fSspupyrev 4531256ef27Sspupyrev // Assign hashes. 4542316a10fSspupyrev for (size_t I = 0; I < BasicBlocks.size(); I++) { 4552316a10fSspupyrev const BinaryBasicBlock *BB = BasicBlocks[I]; 4562316a10fSspupyrev BB->setHash(BlendedHashes[I].combine()); 4572316a10fSspupyrev } 4582316a10fSspupyrev } 459753498eeSshaw young // TODO: mediate the difference between flow function construction here in BOLT 460753498eeSshaw young // and in the compiler by splitting blocks with exception throwing calls at the 461753498eeSshaw young // call and adding the landing pad as the successor. 46244268271Sspupyrev /// Create a wrapper flow function to use with the profile inference algorithm, 46344268271Sspupyrev /// and initialize its jumps and metadata. 46444268271Sspupyrev FlowFunction 46544268271Sspupyrev createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) { 46644268271Sspupyrev FlowFunction Func; 46744268271Sspupyrev 46844268271Sspupyrev // Add a special "dummy" source so that there is always a unique entry point. 46944268271Sspupyrev FlowBlock EntryBlock; 47044268271Sspupyrev EntryBlock.Index = 0; 47144268271Sspupyrev Func.Blocks.push_back(EntryBlock); 47244268271Sspupyrev 473753498eeSshaw young // Create FlowBlock for every basic block in the binary function. 47444268271Sspupyrev for (const BinaryBasicBlock *BB : BlockOrder) { 47544268271Sspupyrev Func.Blocks.emplace_back(); 47644268271Sspupyrev FlowBlock &Block = Func.Blocks.back(); 47744268271Sspupyrev Block.Index = Func.Blocks.size() - 1; 47844268271Sspupyrev (void)BB; 4796d1502c6Sspupyrev assert(Block.Index == BB->getIndex() + 1 && 48044268271Sspupyrev "incorrectly assigned basic block index"); 48144268271Sspupyrev } 48244268271Sspupyrev 483753498eeSshaw young // Add a special "dummy" sink block so there is always a unique sink. 484753498eeSshaw young FlowBlock SinkBlock; 485753498eeSshaw young SinkBlock.Index = Func.Blocks.size(); 486753498eeSshaw young Func.Blocks.push_back(SinkBlock); 487753498eeSshaw young 488753498eeSshaw young // Create FlowJump for each jump between basic blocks in the binary function. 48944268271Sspupyrev std::vector<uint64_t> InDegree(Func.Blocks.size(), 0); 49044268271Sspupyrev for (const BinaryBasicBlock *SrcBB : BlockOrder) { 49144268271Sspupyrev std::unordered_set<const BinaryBasicBlock *> UniqueSuccs; 49244268271Sspupyrev // Collect regular jumps 49344268271Sspupyrev for (const BinaryBasicBlock *DstBB : SrcBB->successors()) { 49444268271Sspupyrev // Ignoring parallel edges 49544268271Sspupyrev if (UniqueSuccs.find(DstBB) != UniqueSuccs.end()) 49644268271Sspupyrev continue; 49744268271Sspupyrev 49844268271Sspupyrev Func.Jumps.emplace_back(); 49944268271Sspupyrev FlowJump &Jump = Func.Jumps.back(); 5006d1502c6Sspupyrev Jump.Source = SrcBB->getIndex() + 1; 5016d1502c6Sspupyrev Jump.Target = DstBB->getIndex() + 1; 50244268271Sspupyrev InDegree[Jump.Target]++; 50344268271Sspupyrev UniqueSuccs.insert(DstBB); 50444268271Sspupyrev } 505753498eeSshaw young // TODO: set jump from exit block to landing pad to Unlikely. 506753498eeSshaw young // If the block is an exit, add a dummy edge from it to the sink block. 507753498eeSshaw young if (UniqueSuccs.empty()) { 508753498eeSshaw young Func.Jumps.emplace_back(); 509753498eeSshaw young FlowJump &Jump = Func.Jumps.back(); 510753498eeSshaw young Jump.Source = SrcBB->getIndex() + 1; 511753498eeSshaw young Jump.Target = Func.Blocks.size() - 1; 512753498eeSshaw young InDegree[Jump.Target]++; 513753498eeSshaw young } 514753498eeSshaw young 51544268271Sspupyrev // Collect jumps to landing pads 51644268271Sspupyrev for (const BinaryBasicBlock *DstBB : SrcBB->landing_pads()) { 51744268271Sspupyrev // Ignoring parallel edges 51844268271Sspupyrev if (UniqueSuccs.find(DstBB) != UniqueSuccs.end()) 51944268271Sspupyrev continue; 52044268271Sspupyrev 52144268271Sspupyrev Func.Jumps.emplace_back(); 52244268271Sspupyrev FlowJump &Jump = Func.Jumps.back(); 5236d1502c6Sspupyrev Jump.Source = SrcBB->getIndex() + 1; 5246d1502c6Sspupyrev Jump.Target = DstBB->getIndex() + 1; 52544268271Sspupyrev InDegree[Jump.Target]++; 52644268271Sspupyrev UniqueSuccs.insert(DstBB); 52744268271Sspupyrev } 52844268271Sspupyrev } 52944268271Sspupyrev 53044268271Sspupyrev // Add dummy edges to the extra sources. If there are multiple entry blocks, 531753498eeSshaw young // add an unlikely edge from 0 to the subsequent ones. Skips the sink block. 53244268271Sspupyrev assert(InDegree[0] == 0 && "dummy entry blocks shouldn't have predecessors"); 533753498eeSshaw young for (uint64_t I = 1; I < Func.Blocks.size() - 1; I++) { 53444268271Sspupyrev const BinaryBasicBlock *BB = BlockOrder[I - 1]; 53544268271Sspupyrev if (BB->isEntryPoint() || InDegree[I] == 0) { 53644268271Sspupyrev Func.Jumps.emplace_back(); 53744268271Sspupyrev FlowJump &Jump = Func.Jumps.back(); 53844268271Sspupyrev Jump.Source = 0; 53944268271Sspupyrev Jump.Target = I; 54044268271Sspupyrev if (!BB->isEntryPoint()) 54144268271Sspupyrev Jump.IsUnlikely = true; 54244268271Sspupyrev } 54344268271Sspupyrev } 54444268271Sspupyrev 54544268271Sspupyrev // Create necessary metadata for the flow function 54644268271Sspupyrev for (FlowJump &Jump : Func.Jumps) { 547c8fc234eSshaw young assert(Jump.Source < Func.Blocks.size()); 548c8fc234eSshaw young Func.Blocks[Jump.Source].SuccJumps.push_back(&Jump); 549c8fc234eSshaw young assert(Jump.Target < Func.Blocks.size()); 550c8fc234eSshaw young Func.Blocks[Jump.Target].PredJumps.push_back(&Jump); 55144268271Sspupyrev } 55244268271Sspupyrev return Func; 55344268271Sspupyrev } 55444268271Sspupyrev 55544268271Sspupyrev /// Assign initial block/jump weights based on the stale profile data. The goal 55644268271Sspupyrev /// is to extract as much information from the stale profile as possible. Here 55744268271Sspupyrev /// we assume that each basic block is specified via a hash value computed from 55844268271Sspupyrev /// its content and the hashes of the unchanged basic blocks stay the same 5599a9af0a2SShaw Young /// across different revisions of the binary. Blocks may also have pseudo probe 5609a9af0a2SShaw Young /// information in the profile and the binary which is used for matching. 56144268271Sspupyrev /// Whenever there is a count in the profile with the hash corresponding to one 56244268271Sspupyrev /// of the basic blocks in the binary, the count is "matched" to the block. 56344268271Sspupyrev /// Similarly, if both the source and the target of a count in the profile are 56444268271Sspupyrev /// matched to a jump in the binary, the count is recorded in CFG. 5659a9af0a2SShaw Young size_t matchWeights( 5669a9af0a2SShaw Young BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder, 5679a9af0a2SShaw Young const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func, 5689a9af0a2SShaw Young HashFunction HashFunction, YAMLProfileReader::ProfileLookupMap &IdToYamlBF, 5699a9af0a2SShaw Young const BinaryFunction &BF, 5709a9af0a2SShaw Young const ArrayRef<YAMLProfileReader::ProbeMatchSpec> ProbeMatchSpecs) { 571131eb305SShaw Young 572753498eeSshaw young assert(Func.Blocks.size() == BlockOrder.size() + 2); 5732316a10fSspupyrev 574131eb305SShaw Young std::vector<uint64_t> CallHashes; 5752316a10fSspupyrev std::vector<FlowBlock *> Blocks; 5762316a10fSspupyrev std::vector<BlendedBlockHash> BlendedHashes; 57744268271Sspupyrev for (uint64_t I = 0; I < BlockOrder.size(); I++) { 57844268271Sspupyrev const BinaryBasicBlock *BB = BlockOrder[I]; 57944268271Sspupyrev assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock"); 580131eb305SShaw Young 581131eb305SShaw Young std::string CallHashStr = hashBlockCalls(BC, *BB); 582131eb305SShaw Young if (CallHashStr.empty()) { 583131eb305SShaw Young CallHashes.push_back(0); 584131eb305SShaw Young } else { 585131eb305SShaw Young if (HashFunction == HashFunction::StdHash) 586131eb305SShaw Young CallHashes.push_back(std::hash<std::string>{}(CallHashStr)); 587131eb305SShaw Young else if (HashFunction == HashFunction::XXH3) 588131eb305SShaw Young CallHashes.push_back(llvm::xxh3_64bits(CallHashStr)); 589131eb305SShaw Young else 590131eb305SShaw Young llvm_unreachable("Unhandled HashFunction"); 591131eb305SShaw Young } 592131eb305SShaw Young 5932316a10fSspupyrev Blocks.push_back(&Func.Blocks[I + 1]); 5942316a10fSspupyrev BlendedBlockHash BlendedHash(BB->getHash()); 5952316a10fSspupyrev BlendedHashes.push_back(BlendedHash); 5962316a10fSspupyrev LLVM_DEBUG(dbgs() << "BB with index " << I << " has hash = " 5972316a10fSspupyrev << Twine::utohexstr(BB->getHash()) << "\n"); 59844268271Sspupyrev } 5992316a10fSspupyrev StaleMatcher Matcher; 6009a9af0a2SShaw Young // Collects function pseudo probes for use in the StaleMatcher. 6019a9af0a2SShaw Young if (opts::StaleMatchingWithPseudoProbes) { 6029a9af0a2SShaw Young const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); 6039a9af0a2SShaw Young assert(Decoder && 6049a9af0a2SShaw Young "If pseudo probes are in use, pseudo probe decoder should exist"); 6059a9af0a2SShaw Young const AddressProbesMap &ProbeMap = Decoder->getAddress2ProbesMap(); 6069a9af0a2SShaw Young const uint64_t FuncAddr = BF.getAddress(); 6079a9af0a2SShaw Young for (const MCDecodedPseudoProbe &Probe : 6089a9af0a2SShaw Young ProbeMap.find(FuncAddr, FuncAddr + BF.getSize())) 6099a9af0a2SShaw Young if (const BinaryBasicBlock *BB = 6109a9af0a2SShaw Young BF.getBasicBlockContainingOffset(Probe.getAddress() - FuncAddr)) 6119a9af0a2SShaw Young Matcher.mapProbeToBB(&Probe, Blocks[BB->getIndex()]); 6129a9af0a2SShaw Young } 613131eb305SShaw Young Matcher.init(Blocks, BlendedHashes, CallHashes); 61444268271Sspupyrev 6159a9af0a2SShaw Young using FlowBlockTy = 6169a9af0a2SShaw Young std::pair<const FlowBlock *, const yaml::bolt::BinaryBasicBlockProfile *>; 6179a9af0a2SShaw Young using ProfileBlockMatchMap = DenseMap<uint32_t, FlowBlockTy>; 6189a9af0a2SShaw Young // Binary profile => block index => matched block + its block profile 6199a9af0a2SShaw Young DenseMap<const yaml::bolt::BinaryFunctionProfile *, ProfileBlockMatchMap> 6209a9af0a2SShaw Young MatchedBlocks; 6219a9af0a2SShaw Young 6229a9af0a2SShaw Young // Map of FlowBlock and matching method. 6239a9af0a2SShaw Young DenseMap<const FlowBlock *, StaleMatcher::MatchMethod> MatchedFlowBlocks; 6249a9af0a2SShaw Young 6259a9af0a2SShaw Young auto addMatchedBlock = 6269a9af0a2SShaw Young [&](std::pair<const FlowBlock *, StaleMatcher::MatchMethod> BlockMethod, 6279a9af0a2SShaw Young const yaml::bolt::BinaryFunctionProfile &YamlBP, 6289a9af0a2SShaw Young const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { 6299a9af0a2SShaw Young const auto &[MatchedBlock, Method] = BlockMethod; 6309a9af0a2SShaw Young if (!MatchedBlock) 6319a9af0a2SShaw Young return; 6329a9af0a2SShaw Young // Don't override earlier matches 6339a9af0a2SShaw Young if (MatchedFlowBlocks.contains(MatchedBlock)) 6349a9af0a2SShaw Young return; 6359a9af0a2SShaw Young MatchedFlowBlocks.try_emplace(MatchedBlock, Method); 6369a9af0a2SShaw Young MatchedBlocks[&YamlBP][YamlBB.Index] = {MatchedBlock, &YamlBB}; 6379a9af0a2SShaw Young }; 6389a9af0a2SShaw Young 6399a9af0a2SShaw Young // Match blocks from the profile to the blocks in CFG by strict hash. 6409a9af0a2SShaw Young for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { 6419a9af0a2SShaw Young // Update matching stats. 6429a9af0a2SShaw Young ++BC.Stats.NumStaleBlocks; 6439a9af0a2SShaw Young BC.Stats.StaleSampleCount += YamlBB.ExecCount; 6449a9af0a2SShaw Young 6459a9af0a2SShaw Young assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile"); 6469a9af0a2SShaw Young BlendedBlockHash YamlHash(YamlBB.Hash); 6479a9af0a2SShaw Young addMatchedBlock(Matcher.matchBlockStrict(YamlHash), YamlBF, YamlBB); 6489a9af0a2SShaw Young } 6499a9af0a2SShaw Young // Match blocks from the profile to the blocks in CFG by pseudo probes. 6509a9af0a2SShaw Young for (const auto &[InlineNodeMap, YamlBP] : ProbeMatchSpecs) { 6519a9af0a2SShaw Young for (const yaml::bolt::BinaryBasicBlockProfile &BB : YamlBP.get().Blocks) 6529a9af0a2SShaw Young if (!BB.PseudoProbes.empty()) 6539a9af0a2SShaw Young addMatchedBlock(Matcher.matchBlockProbe(BB.PseudoProbes, InlineNodeMap), 6549a9af0a2SShaw Young YamlBP, BB); 6559a9af0a2SShaw Young } 6569a9af0a2SShaw Young // Match blocks from the profile to the blocks in CFG with loose methods. 65744268271Sspupyrev for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { 65844268271Sspupyrev assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile"); 65931e8a9f4Sspupyrev BlendedBlockHash YamlHash(YamlBB.Hash); 660131eb305SShaw Young 661131eb305SShaw Young std::string CallHashStr = hashBlockCalls(IdToYamlBF, YamlBB); 662131eb305SShaw Young uint64_t CallHash = 0; 663131eb305SShaw Young if (!CallHashStr.empty()) { 664131eb305SShaw Young if (HashFunction == HashFunction::StdHash) 665131eb305SShaw Young CallHash = std::hash<std::string>{}(CallHashStr); 666131eb305SShaw Young else if (HashFunction == HashFunction::XXH3) 667131eb305SShaw Young CallHash = llvm::xxh3_64bits(CallHashStr); 668131eb305SShaw Young else 669131eb305SShaw Young llvm_unreachable("Unhandled HashFunction"); 670131eb305SShaw Young } 6719a9af0a2SShaw Young auto [MatchedBlock, Method] = Matcher.matchBlockLoose(YamlHash, CallHash); 6729a9af0a2SShaw Young if (MatchedBlock == nullptr && YamlBB.Index == 0) { 67342da84fdSspupyrev MatchedBlock = Blocks[0]; 6749a9af0a2SShaw Young // Report as loose match 6759a9af0a2SShaw Young Method = StaleMatcher::MATCH_OPCODE; 6769a9af0a2SShaw Young } 6779a9af0a2SShaw Young if (!MatchedBlock) { 6789a9af0a2SShaw Young LLVM_DEBUG(dbgs() << "Couldn't match yaml block (bid = " << YamlBB.Index 6799a9af0a2SShaw Young << ")" << " with hash " << Twine::utohexstr(YamlBB.Hash) 6802316a10fSspupyrev << "\n"); 6819a9af0a2SShaw Young continue; 68231e8a9f4Sspupyrev } 6839a9af0a2SShaw Young addMatchedBlock({MatchedBlock, Method}, YamlBF, YamlBB); 68444268271Sspupyrev } 68544268271Sspupyrev 68644268271Sspupyrev // Match jumps from the profile to the jumps from CFG 68744268271Sspupyrev std::vector<uint64_t> OutWeight(Func.Blocks.size(), 0); 68844268271Sspupyrev std::vector<uint64_t> InWeight(Func.Blocks.size(), 0); 6899a9af0a2SShaw Young 6909a9af0a2SShaw Young for (const auto &[YamlBF, MatchMap] : MatchedBlocks) { 6919a9af0a2SShaw Young for (const auto &[YamlBBIdx, FlowBlockProfile] : MatchMap) { 6929a9af0a2SShaw Young const auto &[MatchedBlock, YamlBB] = FlowBlockProfile; 6939a9af0a2SShaw Young StaleMatcher::MatchMethod Method = MatchedFlowBlocks.lookup(MatchedBlock); 6949a9af0a2SShaw Young BlendedBlockHash BinHash = BlendedHashes[MatchedBlock->Index - 1]; 6959a9af0a2SShaw Young LLVM_DEBUG(dbgs() << "Matched yaml block (bid = " << YamlBBIdx << ")" 6969a9af0a2SShaw Young << " with hash " << Twine::utohexstr(YamlBB->Hash) 6979a9af0a2SShaw Young << " to BB (index = " << MatchedBlock->Index - 1 << ")" 6989a9af0a2SShaw Young << " with hash " << Twine::utohexstr(BinHash.combine()) 6999a9af0a2SShaw Young << "\n"); 700*06e08696SKazu Hirata (void)BinHash; 7019a9af0a2SShaw Young uint64_t ExecCount = YamlBB->ExecCount; 7029a9af0a2SShaw Young // Update matching stats accounting for the matched block. 7039a9af0a2SShaw Young switch (Method) { 7049a9af0a2SShaw Young case StaleMatcher::MATCH_EXACT: 7059a9af0a2SShaw Young ++BC.Stats.NumExactMatchedBlocks; 7069a9af0a2SShaw Young BC.Stats.ExactMatchedSampleCount += ExecCount; 7079a9af0a2SShaw Young LLVM_DEBUG(dbgs() << " exact match\n"); 7089a9af0a2SShaw Young break; 7099a9af0a2SShaw Young case StaleMatcher::MATCH_PROBE_EXACT: 7109a9af0a2SShaw Young ++BC.Stats.NumPseudoProbeExactMatchedBlocks; 7119a9af0a2SShaw Young BC.Stats.PseudoProbeExactMatchedSampleCount += ExecCount; 7129a9af0a2SShaw Young LLVM_DEBUG(dbgs() << " exact pseudo probe match\n"); 7139a9af0a2SShaw Young break; 7149a9af0a2SShaw Young case StaleMatcher::MATCH_PROBE_LOOSE: 7159a9af0a2SShaw Young ++BC.Stats.NumPseudoProbeLooseMatchedBlocks; 7169a9af0a2SShaw Young BC.Stats.PseudoProbeLooseMatchedSampleCount += ExecCount; 7179a9af0a2SShaw Young LLVM_DEBUG(dbgs() << " loose pseudo probe match\n"); 7189a9af0a2SShaw Young break; 7199a9af0a2SShaw Young case StaleMatcher::MATCH_CALL: 7209a9af0a2SShaw Young ++BC.Stats.NumCallMatchedBlocks; 7219a9af0a2SShaw Young BC.Stats.CallMatchedSampleCount += ExecCount; 7229a9af0a2SShaw Young LLVM_DEBUG(dbgs() << " call match\n"); 7239a9af0a2SShaw Young break; 7249a9af0a2SShaw Young case StaleMatcher::MATCH_OPCODE: 7259a9af0a2SShaw Young ++BC.Stats.NumLooseMatchedBlocks; 7269a9af0a2SShaw Young BC.Stats.LooseMatchedSampleCount += ExecCount; 7279a9af0a2SShaw Young LLVM_DEBUG(dbgs() << " loose match\n"); 7289a9af0a2SShaw Young break; 7299a9af0a2SShaw Young case StaleMatcher::NO_MATCH: 7309a9af0a2SShaw Young LLVM_DEBUG(dbgs() << " no match\n"); 7319a9af0a2SShaw Young } 7329a9af0a2SShaw Young } 7339a9af0a2SShaw Young 7349a9af0a2SShaw Young for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF->Blocks) { 73544268271Sspupyrev for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) { 73644268271Sspupyrev if (YamlSI.Count == 0) 73744268271Sspupyrev continue; 73844268271Sspupyrev 73944268271Sspupyrev // Try to find the jump for a given (src, dst) pair from the profile and 74044268271Sspupyrev // assign the jump weight based on the profile count 74144268271Sspupyrev const uint64_t SrcIndex = YamlBB.Index; 74244268271Sspupyrev const uint64_t DstIndex = YamlSI.Index; 74344268271Sspupyrev 7449a9af0a2SShaw Young const FlowBlock *MatchedSrcBlock = MatchMap.lookup(SrcIndex).first; 7459a9af0a2SShaw Young const FlowBlock *MatchedDstBlock = MatchMap.lookup(DstIndex).first; 74644268271Sspupyrev 74744268271Sspupyrev if (MatchedSrcBlock != nullptr && MatchedDstBlock != nullptr) { 74844268271Sspupyrev // Find a jump between the two blocks 74944268271Sspupyrev FlowJump *Jump = nullptr; 75044268271Sspupyrev for (FlowJump *SuccJump : MatchedSrcBlock->SuccJumps) { 75144268271Sspupyrev if (SuccJump->Target == MatchedDstBlock->Index) { 75244268271Sspupyrev Jump = SuccJump; 75344268271Sspupyrev break; 75444268271Sspupyrev } 75544268271Sspupyrev } 75644268271Sspupyrev // Assign the weight, if the corresponding jump is found 75744268271Sspupyrev if (Jump != nullptr) { 75844268271Sspupyrev Jump->Weight = YamlSI.Count; 75944268271Sspupyrev Jump->HasUnknownWeight = false; 76044268271Sspupyrev } 76144268271Sspupyrev } 76244268271Sspupyrev // Assign the weight for the src block, if it is found 76344268271Sspupyrev if (MatchedSrcBlock != nullptr) 76444268271Sspupyrev OutWeight[MatchedSrcBlock->Index] += YamlSI.Count; 76544268271Sspupyrev // Assign the weight for the dst block, if it is found 76644268271Sspupyrev if (MatchedDstBlock != nullptr) 76744268271Sspupyrev InWeight[MatchedDstBlock->Index] += YamlSI.Count; 76844268271Sspupyrev } 76944268271Sspupyrev } 7709a9af0a2SShaw Young } 77144268271Sspupyrev 77244268271Sspupyrev // Assign block counts based on in-/out- jumps 77344268271Sspupyrev for (FlowBlock &Block : Func.Blocks) { 77444268271Sspupyrev if (OutWeight[Block.Index] == 0 && InWeight[Block.Index] == 0) { 77531e8a9f4Sspupyrev assert(Block.HasUnknownWeight && "unmatched block with a positive count"); 77644268271Sspupyrev continue; 77744268271Sspupyrev } 77844268271Sspupyrev Block.HasUnknownWeight = false; 77944268271Sspupyrev Block.Weight = std::max(OutWeight[Block.Index], InWeight[Block.Index]); 78044268271Sspupyrev } 78168fc8dffSshaw young 7829a9af0a2SShaw Young return MatchedBlocks[&YamlBF].size(); 78344268271Sspupyrev } 78444268271Sspupyrev 78544268271Sspupyrev /// The function finds all blocks that are (i) reachable from the Entry block 78644268271Sspupyrev /// and (ii) do not have a path to an exit, and marks all such blocks 'cold' 78744268271Sspupyrev /// so that profi does not send any flow to such blocks. 78844268271Sspupyrev void preprocessUnreachableBlocks(FlowFunction &Func) { 78944268271Sspupyrev const uint64_t NumBlocks = Func.Blocks.size(); 79044268271Sspupyrev 79144268271Sspupyrev // Start bfs from the source 79244268271Sspupyrev std::queue<uint64_t> Queue; 79344268271Sspupyrev std::vector<bool> VisitedEntry(NumBlocks, false); 79444268271Sspupyrev for (uint64_t I = 0; I < NumBlocks; I++) { 79544268271Sspupyrev FlowBlock &Block = Func.Blocks[I]; 79644268271Sspupyrev if (Block.isEntry()) { 79744268271Sspupyrev Queue.push(I); 79844268271Sspupyrev VisitedEntry[I] = true; 79944268271Sspupyrev break; 80044268271Sspupyrev } 80144268271Sspupyrev } 80244268271Sspupyrev while (!Queue.empty()) { 80344268271Sspupyrev const uint64_t Src = Queue.front(); 80444268271Sspupyrev Queue.pop(); 80544268271Sspupyrev for (FlowJump *Jump : Func.Blocks[Src].SuccJumps) { 80644268271Sspupyrev const uint64_t Dst = Jump->Target; 80744268271Sspupyrev if (!VisitedEntry[Dst]) { 80844268271Sspupyrev Queue.push(Dst); 80944268271Sspupyrev VisitedEntry[Dst] = true; 81044268271Sspupyrev } 81144268271Sspupyrev } 81244268271Sspupyrev } 81344268271Sspupyrev 81444268271Sspupyrev // Start bfs from all sinks 81544268271Sspupyrev std::vector<bool> VisitedExit(NumBlocks, false); 81644268271Sspupyrev for (uint64_t I = 0; I < NumBlocks; I++) { 81744268271Sspupyrev FlowBlock &Block = Func.Blocks[I]; 81844268271Sspupyrev if (Block.isExit() && VisitedEntry[I]) { 81944268271Sspupyrev Queue.push(I); 82044268271Sspupyrev VisitedExit[I] = true; 82144268271Sspupyrev } 82244268271Sspupyrev } 82344268271Sspupyrev while (!Queue.empty()) { 82444268271Sspupyrev const uint64_t Src = Queue.front(); 82544268271Sspupyrev Queue.pop(); 82644268271Sspupyrev for (FlowJump *Jump : Func.Blocks[Src].PredJumps) { 82744268271Sspupyrev const uint64_t Dst = Jump->Source; 82844268271Sspupyrev if (!VisitedExit[Dst]) { 82944268271Sspupyrev Queue.push(Dst); 83044268271Sspupyrev VisitedExit[Dst] = true; 83144268271Sspupyrev } 83244268271Sspupyrev } 83344268271Sspupyrev } 83444268271Sspupyrev 83544268271Sspupyrev // Make all blocks of zero weight so that flow is not sent 83644268271Sspupyrev for (uint64_t I = 0; I < NumBlocks; I++) { 83744268271Sspupyrev FlowBlock &Block = Func.Blocks[I]; 83844268271Sspupyrev if (Block.Weight == 0) 83944268271Sspupyrev continue; 84044268271Sspupyrev if (!VisitedEntry[I] || !VisitedExit[I]) { 84144268271Sspupyrev Block.Weight = 0; 84244268271Sspupyrev Block.HasUnknownWeight = true; 84344268271Sspupyrev Block.IsUnlikely = true; 84444268271Sspupyrev for (FlowJump *Jump : Block.SuccJumps) { 84544268271Sspupyrev if (Jump->Source == Block.Index && Jump->Target == Block.Index) { 84644268271Sspupyrev Jump->Weight = 0; 84744268271Sspupyrev Jump->HasUnknownWeight = true; 84844268271Sspupyrev Jump->IsUnlikely = true; 84944268271Sspupyrev } 85044268271Sspupyrev } 85144268271Sspupyrev } 85244268271Sspupyrev } 85344268271Sspupyrev } 85444268271Sspupyrev 85544268271Sspupyrev /// Decide if stale profile matching can be applied for a given function. 85644268271Sspupyrev /// Currently we skip inference for (very) large instances and for instances 85744268271Sspupyrev /// having "unexpected" control flow (e.g., having no sink basic blocks). 85868fc8dffSshaw young bool canApplyInference(const FlowFunction &Func, 85968fc8dffSshaw young const yaml::bolt::BinaryFunctionProfile &YamlBF, 86068fc8dffSshaw young const uint64_t &MatchedBlocks) { 86144268271Sspupyrev if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize) 86244268271Sspupyrev return false; 86344268271Sspupyrev 86468fc8dffSshaw young if (MatchedBlocks * 100 < 86568fc8dffSshaw young opts::StaleMatchingMinMatchedBlock * YamlBF.Blocks.size()) 86668fc8dffSshaw young return false; 86768fc8dffSshaw young 868753498eeSshaw young // Returns false if the artificial sink block has no predecessors meaning 869753498eeSshaw young // there are no exit blocks. 870753498eeSshaw young if (Func.Blocks[Func.Blocks.size() - 1].isEntry()) 87144268271Sspupyrev return false; 87244268271Sspupyrev 87344268271Sspupyrev return true; 87444268271Sspupyrev } 87544268271Sspupyrev 87644268271Sspupyrev /// Apply the profile inference algorithm for a given flow function. 87744268271Sspupyrev void applyInference(FlowFunction &Func) { 87844268271Sspupyrev ProfiParams Params; 87944268271Sspupyrev // Set the params from the command-line flags. 88044268271Sspupyrev Params.EvenFlowDistribution = opts::StaleMatchingEvenFlowDistribution; 88144268271Sspupyrev Params.RebalanceUnknown = opts::StaleMatchingRebalanceUnknown; 88244268271Sspupyrev Params.JoinIslands = opts::StaleMatchingJoinIslands; 88344268271Sspupyrev 88444268271Sspupyrev Params.CostBlockInc = opts::StaleMatchingCostBlockInc; 8851256ef27Sspupyrev Params.CostBlockEntryInc = opts::StaleMatchingCostBlockInc; 88644268271Sspupyrev Params.CostBlockDec = opts::StaleMatchingCostBlockDec; 8871256ef27Sspupyrev Params.CostBlockEntryDec = opts::StaleMatchingCostBlockDec; 88844268271Sspupyrev Params.CostBlockUnknownInc = opts::StaleMatchingCostBlockUnknownInc; 88944268271Sspupyrev 89044268271Sspupyrev Params.CostJumpInc = opts::StaleMatchingCostJumpInc; 8911256ef27Sspupyrev Params.CostJumpFTInc = opts::StaleMatchingCostJumpInc; 89244268271Sspupyrev Params.CostJumpDec = opts::StaleMatchingCostJumpDec; 8931256ef27Sspupyrev Params.CostJumpFTDec = opts::StaleMatchingCostJumpDec; 89444268271Sspupyrev Params.CostJumpUnknownInc = opts::StaleMatchingCostJumpUnknownInc; 89544268271Sspupyrev Params.CostJumpUnknownFTInc = opts::StaleMatchingCostJumpUnknownFTInc; 89644268271Sspupyrev 89744268271Sspupyrev applyFlowInference(Params, Func); 89844268271Sspupyrev } 89944268271Sspupyrev 90044268271Sspupyrev /// Collect inferred counts from the flow function and update annotations in 90144268271Sspupyrev /// the binary function. 90244268271Sspupyrev void assignProfile(BinaryFunction &BF, 90344268271Sspupyrev const BinaryFunction::BasicBlockOrderType &BlockOrder, 90444268271Sspupyrev FlowFunction &Func) { 90544268271Sspupyrev BinaryContext &BC = BF.getBinaryContext(); 90644268271Sspupyrev 907753498eeSshaw young assert(Func.Blocks.size() == BlockOrder.size() + 2); 90844268271Sspupyrev for (uint64_t I = 0; I < BlockOrder.size(); I++) { 90944268271Sspupyrev FlowBlock &Block = Func.Blocks[I + 1]; 91044268271Sspupyrev BinaryBasicBlock *BB = BlockOrder[I]; 91144268271Sspupyrev 91244268271Sspupyrev // Update block's count 91344268271Sspupyrev BB->setExecutionCount(Block.Flow); 91444268271Sspupyrev 91544268271Sspupyrev // Update jump counts: (i) clean existing counts and then (ii) set new ones 91644268271Sspupyrev auto BI = BB->branch_info_begin(); 91744268271Sspupyrev for (const BinaryBasicBlock *DstBB : BB->successors()) { 91844268271Sspupyrev (void)DstBB; 91944268271Sspupyrev BI->Count = 0; 92044268271Sspupyrev BI->MispredictedCount = 0; 92144268271Sspupyrev ++BI; 92244268271Sspupyrev } 92344268271Sspupyrev for (FlowJump *Jump : Block.SuccJumps) { 92444268271Sspupyrev if (Jump->IsUnlikely) 92544268271Sspupyrev continue; 92644268271Sspupyrev if (Jump->Flow == 0) 92744268271Sspupyrev continue; 92844268271Sspupyrev 929753498eeSshaw young // Skips the artificial sink block. 930753498eeSshaw young if (Jump->Target == Func.Blocks.size() - 1) 931753498eeSshaw young continue; 93244268271Sspupyrev BinaryBasicBlock &SuccBB = *BlockOrder[Jump->Target - 1]; 93344268271Sspupyrev // Check if the edge corresponds to a regular jump or a landing pad 93444268271Sspupyrev if (BB->getSuccessor(SuccBB.getLabel())) { 93544268271Sspupyrev BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(SuccBB); 93644268271Sspupyrev BI.Count += Jump->Flow; 93744268271Sspupyrev } else { 93844268271Sspupyrev BinaryBasicBlock *LP = BB->getLandingPad(SuccBB.getLabel()); 93944268271Sspupyrev if (LP && LP->getKnownExecutionCount() < Jump->Flow) 94044268271Sspupyrev LP->setExecutionCount(Jump->Flow); 94144268271Sspupyrev } 94244268271Sspupyrev } 94344268271Sspupyrev 94444268271Sspupyrev // Update call-site annotations 94544268271Sspupyrev auto setOrUpdateAnnotation = [&](MCInst &Instr, StringRef Name, 94644268271Sspupyrev uint64_t Count) { 94744268271Sspupyrev if (BC.MIB->hasAnnotation(Instr, Name)) 94844268271Sspupyrev BC.MIB->removeAnnotation(Instr, Name); 94944268271Sspupyrev // Do not add zero-count annotations 95044268271Sspupyrev if (Count == 0) 95144268271Sspupyrev return; 95244268271Sspupyrev BC.MIB->addAnnotation(Instr, Name, Count); 95344268271Sspupyrev }; 95444268271Sspupyrev 95544268271Sspupyrev for (MCInst &Instr : *BB) { 95644268271Sspupyrev // Ignore pseudo instructions 95744268271Sspupyrev if (BC.MIB->isPseudo(Instr)) 95844268271Sspupyrev continue; 95944268271Sspupyrev // Ignore jump tables 96044268271Sspupyrev const MCInst *LastInstr = BB->getLastNonPseudoInstr(); 96144268271Sspupyrev if (BC.MIB->getJumpTable(*LastInstr) && LastInstr == &Instr) 96244268271Sspupyrev continue; 96344268271Sspupyrev 96444268271Sspupyrev if (BC.MIB->isIndirectCall(Instr) || BC.MIB->isIndirectBranch(Instr)) { 96544268271Sspupyrev auto &ICSP = BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>( 96644268271Sspupyrev Instr, "CallProfile"); 96744268271Sspupyrev if (!ICSP.empty()) { 96844268271Sspupyrev // Try to evenly distribute the counts among the call sites 96944268271Sspupyrev const uint64_t TotalCount = Block.Flow; 97044268271Sspupyrev const uint64_t NumSites = ICSP.size(); 97144268271Sspupyrev for (uint64_t Idx = 0; Idx < ICSP.size(); Idx++) { 97244268271Sspupyrev IndirectCallProfile &CSP = ICSP[Idx]; 97344268271Sspupyrev uint64_t CountPerSite = TotalCount / NumSites; 97444268271Sspupyrev // When counts cannot be exactly distributed, increase by 1 the 97544268271Sspupyrev // counts of the first (TotalCount % NumSites) call sites 97644268271Sspupyrev if (Idx < TotalCount % NumSites) 97744268271Sspupyrev CountPerSite++; 97844268271Sspupyrev CSP.Count = CountPerSite; 97944268271Sspupyrev } 98044268271Sspupyrev } else { 98144268271Sspupyrev ICSP.emplace_back(nullptr, Block.Flow, 0); 98244268271Sspupyrev } 98344268271Sspupyrev } else if (BC.MIB->getConditionalTailCall(Instr)) { 98444268271Sspupyrev // We don't know exactly the number of times the conditional tail call 98544268271Sspupyrev // is executed; conservatively, setting it to the count of the block 98644268271Sspupyrev setOrUpdateAnnotation(Instr, "CTCTakenCount", Block.Flow); 98744268271Sspupyrev BC.MIB->removeAnnotation(Instr, "CTCMispredCount"); 98844268271Sspupyrev } else if (BC.MIB->isCall(Instr)) { 98944268271Sspupyrev setOrUpdateAnnotation(Instr, "Count", Block.Flow); 99044268271Sspupyrev } 99144268271Sspupyrev } 99244268271Sspupyrev } 99344268271Sspupyrev 99444268271Sspupyrev // Update function's execution count and mark the function inferred. 99544268271Sspupyrev BF.setExecutionCount(Func.Blocks[0].Flow); 99644268271Sspupyrev BF.setHasInferredProfile(true); 99744268271Sspupyrev } 99844268271Sspupyrev 99944268271Sspupyrev bool YAMLProfileReader::inferStaleProfile( 10009a9af0a2SShaw Young BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF, 10019a9af0a2SShaw Young const ArrayRef<ProbeMatchSpec> ProbeMatchSpecs) { 100296378b3dSshaw young 100396378b3dSshaw young NamedRegionTimer T("inferStaleProfile", "stale profile inference", "rewrite", 100496378b3dSshaw young "Rewrite passes", opts::TimeRewrite); 100596378b3dSshaw young 1006b431546dSAmir Ayupov if (!BF.hasCFG()) 1007b431546dSAmir Ayupov return false; 1008b431546dSAmir Ayupov 10096d1502c6Sspupyrev LLVM_DEBUG(dbgs() << "BOLT-INFO: applying profile inference for " 10106d1502c6Sspupyrev << "\"" << BF.getPrintName() << "\"\n"); 10116d1502c6Sspupyrev 10126d1502c6Sspupyrev // Make sure that block hashes are up to date. 1013b039ccc6SAmir Ayupov BF.computeBlockHashes(YamlBP.Header.HashFunction); 101444268271Sspupyrev 101544268271Sspupyrev const BinaryFunction::BasicBlockOrderType BlockOrder( 101644268271Sspupyrev BF.getLayout().block_begin(), BF.getLayout().block_end()); 101744268271Sspupyrev 101868fc8dffSshaw young // Tracks the number of matched blocks. 101968fc8dffSshaw young 10206d1502c6Sspupyrev // Create a wrapper flow function to use with the profile inference algorithm. 102144268271Sspupyrev FlowFunction Func = createFlowFunction(BlockOrder); 102244268271Sspupyrev 102344268271Sspupyrev // Match as many block/jump counts from the stale profile as possible 102468fc8dffSshaw young size_t MatchedBlocks = 10259a9af0a2SShaw Young matchWeights(BF.getBinaryContext(), BlockOrder, YamlBF, Func, 10269a9af0a2SShaw Young YamlBP.Header.HashFunction, IdToYamLBF, BF, ProbeMatchSpecs); 102744268271Sspupyrev 102844268271Sspupyrev // Adjust the flow function by marking unreachable blocks Unlikely so that 10296d1502c6Sspupyrev // they don't get any counts assigned. 103044268271Sspupyrev preprocessUnreachableBlocks(Func); 103144268271Sspupyrev 10326d1502c6Sspupyrev // Check if profile inference can be applied for the instance. 103368fc8dffSshaw young if (!canApplyInference(Func, YamlBF, MatchedBlocks)) 103444268271Sspupyrev return false; 103544268271Sspupyrev 10366d1502c6Sspupyrev // Apply the profile inference algorithm. 103744268271Sspupyrev applyInference(Func); 103844268271Sspupyrev 10396d1502c6Sspupyrev // Collect inferred counts and update function annotations. 104044268271Sspupyrev assignProfile(BF, BlockOrder, Func); 104144268271Sspupyrev 104244268271Sspupyrev // As of now, we always mark the binary function having "correct" profile. 104344268271Sspupyrev // In the future, we may discard the results for instances with poor inference 104444268271Sspupyrev // metrics and keep such functions un-optimized. 104544268271Sspupyrev return true; 104644268271Sspupyrev } 104744268271Sspupyrev 104844268271Sspupyrev } // end namespace bolt 104944268271Sspupyrev } // end namespace llvm 1050