xref: /llvm-project/bolt/lib/Profile/StaleProfileMatching.cpp (revision 06e08696248ac01754c87c22cc8a4b797ef46430)
144268271Sspupyrev //===- bolt/Profile/StaleProfileMatching.cpp - Profile data matching   ----===//
244268271Sspupyrev //
344268271Sspupyrev // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
444268271Sspupyrev // See https://llvm.org/LICENSE.txt for license information.
544268271Sspupyrev // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
644268271Sspupyrev //
744268271Sspupyrev //===----------------------------------------------------------------------===//
844268271Sspupyrev //
944268271Sspupyrev // BOLT often has to deal with profiles collected on binaries built from several
1044268271Sspupyrev // revisions behind release. As a result, a certain percentage of functions is
1144268271Sspupyrev // considered stale and not optimized. This file implements an ability to match
1244268271Sspupyrev // profile to functions that are not 100% binary identical, and thus, increasing
1344268271Sspupyrev // the optimization coverage and boost the performance of applications.
1444268271Sspupyrev //
1544268271Sspupyrev // The algorithm consists of two phases: matching and inference:
1644268271Sspupyrev // - At the matching phase, we try to "guess" as many block and jump counts from
1744268271Sspupyrev //   the stale profile as possible. To this end, the content of each basic block
1844268271Sspupyrev //   is hashed and stored in the (yaml) profile. When BOLT optimizes a binary,
1944268271Sspupyrev //   it computes block hashes and identifies the corresponding entries in the
2044268271Sspupyrev //   stale profile. It yields a partial profile for every CFG in the binary.
2144268271Sspupyrev // - At the inference phase, we employ a network flow-based algorithm (profi) to
2244268271Sspupyrev //   reconstruct "realistic" block and jump counts from the partial profile
2344268271Sspupyrev //   generated at the first stage. In practice, we don't always produce proper
2444268271Sspupyrev //   profile data but the majority (e.g., >90%) of CFGs get the correct counts.
2544268271Sspupyrev //
2644268271Sspupyrev //===----------------------------------------------------------------------===//
2744268271Sspupyrev 
2844268271Sspupyrev #include "bolt/Core/HashUtilities.h"
2944268271Sspupyrev #include "bolt/Profile/YAMLProfileReader.h"
306d1502c6Sspupyrev #include "llvm/ADT/Bitfields.h"
3144268271Sspupyrev #include "llvm/ADT/Hashing.h"
329a9af0a2SShaw Young #include "llvm/MC/MCPseudoProbe.h"
3344268271Sspupyrev #include "llvm/Support/CommandLine.h"
3496378b3dSshaw young #include "llvm/Support/Timer.h"
35e7dd596cSspupyrev #include "llvm/Support/xxhash.h"
3644268271Sspupyrev #include "llvm/Transforms/Utils/SampleProfileInference.h"
3744268271Sspupyrev 
3844268271Sspupyrev #include <queue>
3944268271Sspupyrev 
406d1502c6Sspupyrev using namespace llvm;
416d1502c6Sspupyrev 
422316a10fSspupyrev #undef DEBUG_TYPE
432316a10fSspupyrev #define DEBUG_TYPE "bolt-prof"
442316a10fSspupyrev 
4544268271Sspupyrev namespace opts {
4644268271Sspupyrev 
4796378b3dSshaw young extern cl::opt<bool> TimeRewrite;
4844268271Sspupyrev extern cl::OptionCategory BoltOptCategory;
4944268271Sspupyrev 
5044268271Sspupyrev cl::opt<bool>
5144268271Sspupyrev     InferStaleProfile("infer-stale-profile",
5244268271Sspupyrev                       cl::desc("Infer counts from stale profile data."),
5344268271Sspupyrev                       cl::init(false), cl::Hidden, cl::cat(BoltOptCategory));
5444268271Sspupyrev 
5568fc8dffSshaw young cl::opt<unsigned> StaleMatchingMinMatchedBlock(
5668fc8dffSshaw young     "stale-matching-min-matched-block",
5768fc8dffSshaw young     cl::desc("Percentage threshold of matched basic blocks at which stale "
5868fc8dffSshaw young              "profile inference is executed."),
5968fc8dffSshaw young     cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));
6068fc8dffSshaw young 
6144268271Sspupyrev cl::opt<unsigned> StaleMatchingMaxFuncSize(
6244268271Sspupyrev     "stale-matching-max-func-size",
6344268271Sspupyrev     cl::desc("The maximum size of a function to consider for inference."),
6444268271Sspupyrev     cl::init(10000), cl::Hidden, cl::cat(BoltOptCategory));
6544268271Sspupyrev 
6644268271Sspupyrev // Parameters of the profile inference algorithm. The default values are tuned
6744268271Sspupyrev // on several benchmarks.
6844268271Sspupyrev cl::opt<bool> StaleMatchingEvenFlowDistribution(
6944268271Sspupyrev     "stale-matching-even-flow-distribution",
7044268271Sspupyrev     cl::desc("Try to evenly distribute flow when there are multiple equally "
7144268271Sspupyrev              "likely options."),
7244268271Sspupyrev     cl::init(true), cl::ReallyHidden, cl::cat(BoltOptCategory));
7344268271Sspupyrev 
7444268271Sspupyrev cl::opt<bool> StaleMatchingRebalanceUnknown(
7544268271Sspupyrev     "stale-matching-rebalance-unknown",
7644268271Sspupyrev     cl::desc("Evenly re-distribute flow among unknown subgraphs."),
7744268271Sspupyrev     cl::init(false), cl::ReallyHidden, cl::cat(BoltOptCategory));
7844268271Sspupyrev 
7944268271Sspupyrev cl::opt<bool> StaleMatchingJoinIslands(
8044268271Sspupyrev     "stale-matching-join-islands",
8144268271Sspupyrev     cl::desc("Join isolated components having positive flow."), cl::init(true),
8244268271Sspupyrev     cl::ReallyHidden, cl::cat(BoltOptCategory));
8344268271Sspupyrev 
8444268271Sspupyrev cl::opt<unsigned> StaleMatchingCostBlockInc(
8544268271Sspupyrev     "stale-matching-cost-block-inc",
861256ef27Sspupyrev     cl::desc("The cost of increasing a block count by one."), cl::init(150),
8744268271Sspupyrev     cl::ReallyHidden, cl::cat(BoltOptCategory));
8844268271Sspupyrev 
8944268271Sspupyrev cl::opt<unsigned> StaleMatchingCostBlockDec(
9044268271Sspupyrev     "stale-matching-cost-block-dec",
911256ef27Sspupyrev     cl::desc("The cost of decreasing a block count by one."), cl::init(150),
9244268271Sspupyrev     cl::ReallyHidden, cl::cat(BoltOptCategory));
9344268271Sspupyrev 
9444268271Sspupyrev cl::opt<unsigned> StaleMatchingCostJumpInc(
9544268271Sspupyrev     "stale-matching-cost-jump-inc",
961256ef27Sspupyrev     cl::desc("The cost of increasing a jump count by one."), cl::init(150),
9744268271Sspupyrev     cl::ReallyHidden, cl::cat(BoltOptCategory));
9844268271Sspupyrev 
9944268271Sspupyrev cl::opt<unsigned> StaleMatchingCostJumpDec(
10044268271Sspupyrev     "stale-matching-cost-jump-dec",
1011256ef27Sspupyrev     cl::desc("The cost of decreasing a jump count by one."), cl::init(150),
10244268271Sspupyrev     cl::ReallyHidden, cl::cat(BoltOptCategory));
10344268271Sspupyrev 
1041256ef27Sspupyrev cl::opt<unsigned> StaleMatchingCostBlockUnknownInc(
1051256ef27Sspupyrev     "stale-matching-cost-block-unknown-inc",
1061256ef27Sspupyrev     cl::desc("The cost of increasing an unknown block count by one."),
1071256ef27Sspupyrev     cl::init(1), cl::ReallyHidden, cl::cat(BoltOptCategory));
10844268271Sspupyrev 
10944268271Sspupyrev cl::opt<unsigned> StaleMatchingCostJumpUnknownInc(
11044268271Sspupyrev     "stale-matching-cost-jump-unknown-inc",
1111256ef27Sspupyrev     cl::desc("The cost of increasing an unknown jump count by one."),
1121256ef27Sspupyrev     cl::init(140), cl::ReallyHidden, cl::cat(BoltOptCategory));
11344268271Sspupyrev 
11444268271Sspupyrev cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc(
11544268271Sspupyrev     "stale-matching-cost-jump-unknown-ft-inc",
11644268271Sspupyrev     cl::desc(
1171256ef27Sspupyrev         "The cost of increasing an unknown fall-through jump count by one."),
1181256ef27Sspupyrev     cl::init(3), cl::ReallyHidden, cl::cat(BoltOptCategory));
11944268271Sspupyrev 
1209a9af0a2SShaw Young cl::opt<bool> StaleMatchingWithPseudoProbes(
1219a9af0a2SShaw Young     "stale-matching-with-pseudo-probes",
1229a9af0a2SShaw Young     cl::desc("Turns on stale matching with block pseudo probes."),
1239a9af0a2SShaw Young     cl::init(false), cl::ReallyHidden, cl::cat(BoltOptCategory));
1249a9af0a2SShaw Young 
12544268271Sspupyrev } // namespace opts
12644268271Sspupyrev 
12744268271Sspupyrev namespace llvm {
12844268271Sspupyrev namespace bolt {
12944268271Sspupyrev 
1302316a10fSspupyrev /// An object wrapping several components of a basic block hash. The combined
1312316a10fSspupyrev /// (blended) hash is represented and stored as one uint64_t, while individual
1322316a10fSspupyrev /// components are of smaller size (e.g., uint16_t or uint8_t).
1332316a10fSspupyrev struct BlendedBlockHash {
1342316a10fSspupyrev private:
1356d1502c6Sspupyrev   using ValueOffset = Bitfield::Element<uint16_t, 0, 16>;
1366d1502c6Sspupyrev   using ValueOpcode = Bitfield::Element<uint16_t, 16, 16>;
1376d1502c6Sspupyrev   using ValueInstr = Bitfield::Element<uint16_t, 32, 16>;
1381256ef27Sspupyrev   using ValuePred = Bitfield::Element<uint8_t, 48, 8>;
1391256ef27Sspupyrev   using ValueSucc = Bitfield::Element<uint8_t, 56, 8>;
1402316a10fSspupyrev 
1412316a10fSspupyrev public:
1422316a10fSspupyrev   explicit BlendedBlockHash() {}
1432316a10fSspupyrev 
1446d1502c6Sspupyrev   explicit BlendedBlockHash(uint64_t Hash) {
1456d1502c6Sspupyrev     Offset = Bitfield::get<ValueOffset>(Hash);
1466d1502c6Sspupyrev     OpcodeHash = Bitfield::get<ValueOpcode>(Hash);
1476d1502c6Sspupyrev     InstrHash = Bitfield::get<ValueInstr>(Hash);
1481256ef27Sspupyrev     PredHash = Bitfield::get<ValuePred>(Hash);
1491256ef27Sspupyrev     SuccHash = Bitfield::get<ValueSucc>(Hash);
1502316a10fSspupyrev   }
1512316a10fSspupyrev 
1522316a10fSspupyrev   /// Combine the blended hash into uint64_t.
1532316a10fSspupyrev   uint64_t combine() const {
1546d1502c6Sspupyrev     uint64_t Hash = 0;
1556d1502c6Sspupyrev     Bitfield::set<ValueOffset>(Hash, Offset);
1566d1502c6Sspupyrev     Bitfield::set<ValueOpcode>(Hash, OpcodeHash);
1576d1502c6Sspupyrev     Bitfield::set<ValueInstr>(Hash, InstrHash);
1581256ef27Sspupyrev     Bitfield::set<ValuePred>(Hash, PredHash);
1591256ef27Sspupyrev     Bitfield::set<ValueSucc>(Hash, SuccHash);
1606d1502c6Sspupyrev     return Hash;
1612316a10fSspupyrev   }
1622316a10fSspupyrev 
1632316a10fSspupyrev   /// Compute a distance between two given blended hashes. The smaller the
1642316a10fSspupyrev   /// distance, the more similar two blocks are. For identical basic blocks,
1652316a10fSspupyrev   /// the distance is zero.
1662316a10fSspupyrev   uint64_t distance(const BlendedBlockHash &BBH) const {
1672316a10fSspupyrev     assert(OpcodeHash == BBH.OpcodeHash &&
1682316a10fSspupyrev            "incorrect blended hash distance computation");
1692316a10fSspupyrev     uint64_t Dist = 0;
1702316a10fSspupyrev     // Account for NeighborHash
1711256ef27Sspupyrev     Dist += SuccHash == BBH.SuccHash ? 0 : 1;
1721256ef27Sspupyrev     Dist += PredHash == BBH.PredHash ? 0 : 1;
1732316a10fSspupyrev     Dist <<= 16;
1742316a10fSspupyrev     // Account for InstrHash
1752316a10fSspupyrev     Dist += InstrHash == BBH.InstrHash ? 0 : 1;
1762316a10fSspupyrev     Dist <<= 16;
1772316a10fSspupyrev     // Account for Offset
1782316a10fSspupyrev     Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset);
1792316a10fSspupyrev     return Dist;
1802316a10fSspupyrev   }
1812316a10fSspupyrev 
1822316a10fSspupyrev   /// The offset of the basic block from the function start.
1832316a10fSspupyrev   uint16_t Offset{0};
1842316a10fSspupyrev   /// (Loose) Hash of the basic block instructions, excluding operands.
1852316a10fSspupyrev   uint16_t OpcodeHash{0};
1862316a10fSspupyrev   /// (Strong) Hash of the basic block instructions, including opcodes and
1872316a10fSspupyrev   /// operands.
1882316a10fSspupyrev   uint16_t InstrHash{0};
1891256ef27Sspupyrev   /// (Loose) Hashes of the predecessors of the basic block.
1901256ef27Sspupyrev   uint8_t PredHash{0};
1911256ef27Sspupyrev   /// (Loose) Hashes of the successors of the basic block.
1921256ef27Sspupyrev   uint8_t SuccHash{0};
1932316a10fSspupyrev };
1942316a10fSspupyrev 
1952316a10fSspupyrev /// The object is used to identify and match basic blocks in a BinaryFunction
1962316a10fSspupyrev /// given their hashes computed on a binary built from several revisions behind
1972316a10fSspupyrev /// release.
1982316a10fSspupyrev class StaleMatcher {
1992316a10fSspupyrev public:
2002316a10fSspupyrev   /// Initialize stale matcher.
2012316a10fSspupyrev   void init(const std::vector<FlowBlock *> &Blocks,
202131eb305SShaw Young             const std::vector<BlendedBlockHash> &Hashes,
203131eb305SShaw Young             const std::vector<uint64_t> &CallHashes) {
2042316a10fSspupyrev     assert(Blocks.size() == Hashes.size() &&
205131eb305SShaw Young            Hashes.size() == CallHashes.size() &&
2062316a10fSspupyrev            "incorrect matcher initialization");
2072316a10fSspupyrev     for (size_t I = 0; I < Blocks.size(); I++) {
2082316a10fSspupyrev       FlowBlock *Block = Blocks[I];
2092316a10fSspupyrev       uint16_t OpHash = Hashes[I].OpcodeHash;
2102316a10fSspupyrev       OpHashToBlocks[OpHash].push_back(std::make_pair(Hashes[I], Block));
211131eb305SShaw Young       if (CallHashes[I])
212131eb305SShaw Young         CallHashToBlocks[CallHashes[I]].push_back(
213131eb305SShaw Young             std::make_pair(Hashes[I], Block));
2142316a10fSspupyrev     }
2152316a10fSspupyrev   }
2162316a10fSspupyrev 
2179a9af0a2SShaw Young   /// Creates a mapping from a pseudo probe to a flow block.
2189a9af0a2SShaw Young   void mapProbeToBB(const MCDecodedPseudoProbe *Probe, FlowBlock *Block) {
2199a9af0a2SShaw Young     BBPseudoProbeToBlock[Probe] = Block;
2209a9af0a2SShaw Young   }
2219a9af0a2SShaw Young 
2229a9af0a2SShaw Young   enum MatchMethod : char {
2239a9af0a2SShaw Young     MATCH_EXACT = 0,
2249a9af0a2SShaw Young     MATCH_PROBE_EXACT,
2259a9af0a2SShaw Young     MATCH_PROBE_LOOSE,
2269a9af0a2SShaw Young     MATCH_OPCODE,
2279a9af0a2SShaw Young     MATCH_CALL,
2289a9af0a2SShaw Young     NO_MATCH
2299a9af0a2SShaw Young   };
2309a9af0a2SShaw Young 
2319a9af0a2SShaw Young   /// Find the most similar flow block for a profile block given blended hash.
2329a9af0a2SShaw Young   std::pair<const FlowBlock *, MatchMethod>
2339a9af0a2SShaw Young   matchBlockStrict(BlendedBlockHash BlendedHash) {
2349a9af0a2SShaw Young     const auto &[Block, ExactHash] = matchWithOpcodes(BlendedHash);
2359a9af0a2SShaw Young     if (Block && ExactHash)
2369a9af0a2SShaw Young       return {Block, MATCH_EXACT};
2379a9af0a2SShaw Young     return {nullptr, NO_MATCH};
2389a9af0a2SShaw Young   }
2399a9af0a2SShaw Young 
2409a9af0a2SShaw Young   /// Find the most similar flow block for a profile block given pseudo probes.
2419a9af0a2SShaw Young   std::pair<const FlowBlock *, MatchMethod> matchBlockProbe(
2429a9af0a2SShaw Young       const ArrayRef<yaml::bolt::PseudoProbeInfo> PseudoProbes,
2439a9af0a2SShaw Young       const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) {
2449a9af0a2SShaw Young     const auto &[ProbeBlock, ExactProbe] =
2459a9af0a2SShaw Young         matchWithPseudoProbes(PseudoProbes, InlineTreeNodeMap);
2469a9af0a2SShaw Young     if (ProbeBlock)
2479a9af0a2SShaw Young       return {ProbeBlock, ExactProbe ? MATCH_PROBE_EXACT : MATCH_PROBE_LOOSE};
2489a9af0a2SShaw Young     return {nullptr, NO_MATCH};
2499a9af0a2SShaw Young   }
2509a9af0a2SShaw Young 
2519a9af0a2SShaw Young   /// Find the most similar flow block for a profile block given its hashes.
2529a9af0a2SShaw Young   std::pair<const FlowBlock *, MatchMethod>
2539a9af0a2SShaw Young   matchBlockLoose(BlendedBlockHash BlendedHash, uint64_t CallHash) {
2549a9af0a2SShaw Young     if (const FlowBlock *CallBlock = matchWithCalls(BlendedHash, CallHash))
2559a9af0a2SShaw Young       return {CallBlock, MATCH_CALL};
2569a9af0a2SShaw Young     if (const FlowBlock *OpcodeBlock = matchWithOpcodes(BlendedHash).first)
2579a9af0a2SShaw Young       return {OpcodeBlock, MATCH_OPCODE};
2589a9af0a2SShaw Young     return {nullptr, NO_MATCH};
259131eb305SShaw Young   }
260131eb305SShaw Young 
261131eb305SShaw Young   /// Returns true if the two basic blocks (in the binary and in the profile)
262131eb305SShaw Young   /// corresponding to the given hashes are matched to each other with a high
263131eb305SShaw Young   /// confidence.
264131eb305SShaw Young   static bool isHighConfidenceMatch(BlendedBlockHash Hash1,
265131eb305SShaw Young                                     BlendedBlockHash Hash2) {
266131eb305SShaw Young     return Hash1.InstrHash == Hash2.InstrHash;
267131eb305SShaw Young   }
268131eb305SShaw Young 
269131eb305SShaw Young private:
270131eb305SShaw Young   using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>;
271131eb305SShaw Young   std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks;
272131eb305SShaw Young   std::unordered_map<uint64_t, std::vector<HashBlockPairType>> CallHashToBlocks;
2739a9af0a2SShaw Young   DenseMap<const MCDecodedPseudoProbe *, FlowBlock *> BBPseudoProbeToBlock;
274131eb305SShaw Young 
275131eb305SShaw Young   // Uses OpcodeHash to find the most similar block for a given hash.
2769a9af0a2SShaw Young   std::pair<const FlowBlock *, bool>
2779a9af0a2SShaw Young   matchWithOpcodes(BlendedBlockHash BlendedHash) const {
2782316a10fSspupyrev     auto BlockIt = OpHashToBlocks.find(BlendedHash.OpcodeHash);
27931e8a9f4Sspupyrev     if (BlockIt == OpHashToBlocks.end())
2809a9af0a2SShaw Young       return {nullptr, false};
2812316a10fSspupyrev     FlowBlock *BestBlock = nullptr;
2822316a10fSspupyrev     uint64_t BestDist = std::numeric_limits<uint64_t>::max();
2839a9af0a2SShaw Young     BlendedBlockHash BestHash;
28431e8a9f4Sspupyrev     for (const auto &[Hash, Block] : BlockIt->second) {
2852316a10fSspupyrev       uint64_t Dist = Hash.distance(BlendedHash);
2862316a10fSspupyrev       if (BestBlock == nullptr || Dist < BestDist) {
2872316a10fSspupyrev         BestDist = Dist;
2882316a10fSspupyrev         BestBlock = Block;
2899a9af0a2SShaw Young         BestHash = Hash;
2902316a10fSspupyrev       }
2912316a10fSspupyrev     }
2929a9af0a2SShaw Young     return {BestBlock, isHighConfidenceMatch(BestHash, BlendedHash)};
2932316a10fSspupyrev   }
2942316a10fSspupyrev 
295131eb305SShaw Young   // Uses CallHash to find the most similar block for a given hash.
296131eb305SShaw Young   const FlowBlock *matchWithCalls(BlendedBlockHash BlendedHash,
297131eb305SShaw Young                                   uint64_t CallHash) const {
298131eb305SShaw Young     if (!CallHash)
299131eb305SShaw Young       return nullptr;
300131eb305SShaw Young     auto BlockIt = CallHashToBlocks.find(CallHash);
301131eb305SShaw Young     if (BlockIt == CallHashToBlocks.end())
302131eb305SShaw Young       return nullptr;
303131eb305SShaw Young     FlowBlock *BestBlock = nullptr;
304131eb305SShaw Young     uint64_t BestDist = std::numeric_limits<uint64_t>::max();
305131eb305SShaw Young     for (const auto &[Hash, Block] : BlockIt->second) {
306131eb305SShaw Young       uint64_t Dist = Hash.OpcodeHash > BlendedHash.OpcodeHash
307131eb305SShaw Young                           ? Hash.OpcodeHash - BlendedHash.OpcodeHash
308131eb305SShaw Young                           : BlendedHash.OpcodeHash - Hash.OpcodeHash;
309131eb305SShaw Young       if (BestBlock == nullptr || Dist < BestDist) {
310131eb305SShaw Young         BestDist = Dist;
311131eb305SShaw Young         BestBlock = Block;
31231e8a9f4Sspupyrev       }
313131eb305SShaw Young     }
314131eb305SShaw Young     return BestBlock;
315131eb305SShaw Young   }
3169a9af0a2SShaw Young 
3179a9af0a2SShaw Young   /// Matches a profile block with a binary block based on pseudo probes.
3189a9af0a2SShaw Young   /// Returns the best matching block (or nullptr) and whether the match is
3199a9af0a2SShaw Young   /// unambiguous.
3209a9af0a2SShaw Young   std::pair<const FlowBlock *, bool> matchWithPseudoProbes(
3219a9af0a2SShaw Young       const ArrayRef<yaml::bolt::PseudoProbeInfo> BlockPseudoProbes,
3229a9af0a2SShaw Young       const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) const {
3239a9af0a2SShaw Young 
3249a9af0a2SShaw Young     if (!opts::StaleMatchingWithPseudoProbes)
3259a9af0a2SShaw Young       return {nullptr, false};
3269a9af0a2SShaw Young 
3279a9af0a2SShaw Young     DenseMap<const FlowBlock *, uint32_t> FlowBlockMatchCount;
3289a9af0a2SShaw Young 
3299a9af0a2SShaw Young     auto matchProfileProbeToBlock = [&](uint32_t NodeId,
3309a9af0a2SShaw Young                                         uint64_t ProbeId) -> const FlowBlock * {
3319a9af0a2SShaw Young       const MCDecodedPseudoProbeInlineTree *BinaryNode =
3329a9af0a2SShaw Young           InlineTreeNodeMap.getInlineTreeNode(NodeId);
3339a9af0a2SShaw Young       if (!BinaryNode)
3349a9af0a2SShaw Young         return nullptr;
3359a9af0a2SShaw Young       const MCDecodedPseudoProbe Dummy(0, ProbeId, PseudoProbeType::Block, 0, 0,
3369a9af0a2SShaw Young                                        nullptr);
3379a9af0a2SShaw Young       ArrayRef<MCDecodedPseudoProbe> BinaryProbes = BinaryNode->getProbes();
3389a9af0a2SShaw Young       auto BinaryProbeIt = llvm::lower_bound(
3399a9af0a2SShaw Young           BinaryProbes, Dummy, [](const auto &LHS, const auto &RHS) {
3409a9af0a2SShaw Young             return LHS.getIndex() < RHS.getIndex();
3419a9af0a2SShaw Young           });
3429a9af0a2SShaw Young       if (BinaryProbeIt == BinaryNode->getProbes().end() ||
3439a9af0a2SShaw Young           BinaryProbeIt->getIndex() != ProbeId)
3449a9af0a2SShaw Young         return nullptr;
3459a9af0a2SShaw Young       auto It = BBPseudoProbeToBlock.find(&*BinaryProbeIt);
3469a9af0a2SShaw Young       if (It == BBPseudoProbeToBlock.end())
3479a9af0a2SShaw Young         return nullptr;
3489a9af0a2SShaw Young       return It->second;
3499a9af0a2SShaw Young     };
3509a9af0a2SShaw Young 
3519a9af0a2SShaw Young     auto matchPseudoProbeInfo = [&](const yaml::bolt::PseudoProbeInfo
3529a9af0a2SShaw Young                                         &ProfileProbe,
3539a9af0a2SShaw Young                                     uint32_t NodeId) {
3549a9af0a2SShaw Young       for (uint64_t Index = 0; Index < 64; ++Index)
3559a9af0a2SShaw Young         if (ProfileProbe.BlockMask & 1ull << Index)
3569a9af0a2SShaw Young           ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, Index + 1)];
3579a9af0a2SShaw Young       for (const auto &ProfileProbes :
3589a9af0a2SShaw Young            {ProfileProbe.BlockProbes, ProfileProbe.IndCallProbes,
3599a9af0a2SShaw Young             ProfileProbe.CallProbes})
3609a9af0a2SShaw Young         for (uint64_t ProfileProbe : ProfileProbes)
3619a9af0a2SShaw Young           ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, ProfileProbe)];
3629a9af0a2SShaw Young     };
3639a9af0a2SShaw Young 
3649a9af0a2SShaw Young     for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes) {
3659a9af0a2SShaw Young       if (!ProfileProbe.InlineTreeNodes.empty())
3669a9af0a2SShaw Young         for (uint32_t ProfileInlineTreeNode : ProfileProbe.InlineTreeNodes)
3679a9af0a2SShaw Young           matchPseudoProbeInfo(ProfileProbe, ProfileInlineTreeNode);
3689a9af0a2SShaw Young       else
3699a9af0a2SShaw Young         matchPseudoProbeInfo(ProfileProbe, ProfileProbe.InlineTreeIndex);
3709a9af0a2SShaw Young     }
3719a9af0a2SShaw Young     uint32_t BestMatchCount = 0;
3729a9af0a2SShaw Young     uint32_t TotalMatchCount = 0;
3739a9af0a2SShaw Young     const FlowBlock *BestMatchBlock = nullptr;
3749a9af0a2SShaw Young     for (const auto &[FlowBlock, Count] : FlowBlockMatchCount) {
3759a9af0a2SShaw Young       TotalMatchCount += Count;
3769a9af0a2SShaw Young       if (Count < BestMatchCount || (Count == BestMatchCount && BestMatchBlock))
3779a9af0a2SShaw Young         continue;
3789a9af0a2SShaw Young       BestMatchBlock = FlowBlock;
3799a9af0a2SShaw Young       BestMatchCount = Count;
3809a9af0a2SShaw Young     }
3819a9af0a2SShaw Young     return {BestMatchBlock, BestMatchCount == TotalMatchCount};
3829a9af0a2SShaw Young   }
3832316a10fSspupyrev };
3842316a10fSspupyrev 
385b039ccc6SAmir Ayupov void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const {
3862316a10fSspupyrev   if (size() == 0)
3872316a10fSspupyrev     return;
3882316a10fSspupyrev 
3892316a10fSspupyrev   assert(hasCFG() && "the function is expected to have CFG");
3902316a10fSspupyrev 
3912316a10fSspupyrev   std::vector<BlendedBlockHash> BlendedHashes(BasicBlocks.size());
3922316a10fSspupyrev   std::vector<uint64_t> OpcodeHashes(BasicBlocks.size());
3931256ef27Sspupyrev   // Initialize hash components.
3942316a10fSspupyrev   for (size_t I = 0; I < BasicBlocks.size(); I++) {
3952316a10fSspupyrev     const BinaryBasicBlock *BB = BasicBlocks[I];
3962316a10fSspupyrev     assert(BB->getIndex() == I && "incorrect block index");
3972316a10fSspupyrev     BlendedHashes[I].Offset = BB->getOffset();
3981256ef27Sspupyrev     // Hashing complete instructions.
3992316a10fSspupyrev     std::string InstrHashStr = hashBlock(
4002316a10fSspupyrev         BC, *BB, [&](const MCOperand &Op) { return hashInstOperand(BC, Op); });
401b039ccc6SAmir Ayupov     if (HashFunction == HashFunction::StdHash) {
402b039ccc6SAmir Ayupov       uint64_t InstrHash = std::hash<std::string>{}(InstrHashStr);
403b039ccc6SAmir Ayupov       BlendedHashes[I].InstrHash = (uint16_t)hash_value(InstrHash);
404b039ccc6SAmir Ayupov     } else if (HashFunction == HashFunction::XXH3) {
405e7dd596cSspupyrev       uint64_t InstrHash = llvm::xxh3_64bits(InstrHashStr);
406e7dd596cSspupyrev       BlendedHashes[I].InstrHash = (uint16_t)InstrHash;
407b039ccc6SAmir Ayupov     } else {
408b039ccc6SAmir Ayupov       llvm_unreachable("Unhandled HashFunction");
409b039ccc6SAmir Ayupov     }
4101256ef27Sspupyrev     // Hashing opcodes.
4111256ef27Sspupyrev     std::string OpcodeHashStr = hashBlockLoose(BC, *BB);
412b039ccc6SAmir Ayupov     if (HashFunction == HashFunction::StdHash) {
413b039ccc6SAmir Ayupov       OpcodeHashes[I] = std::hash<std::string>{}(OpcodeHashStr);
414b039ccc6SAmir Ayupov       BlendedHashes[I].OpcodeHash = (uint16_t)hash_value(OpcodeHashes[I]);
415b039ccc6SAmir Ayupov     } else if (HashFunction == HashFunction::XXH3) {
416e7dd596cSspupyrev       OpcodeHashes[I] = llvm::xxh3_64bits(OpcodeHashStr);
417e7dd596cSspupyrev       BlendedHashes[I].OpcodeHash = (uint16_t)OpcodeHashes[I];
418b039ccc6SAmir Ayupov     } else {
419b039ccc6SAmir Ayupov       llvm_unreachable("Unhandled HashFunction");
420b039ccc6SAmir Ayupov     }
4212316a10fSspupyrev   }
4222316a10fSspupyrev 
4231256ef27Sspupyrev   // Initialize neighbor hash.
4242316a10fSspupyrev   for (size_t I = 0; I < BasicBlocks.size(); I++) {
4252316a10fSspupyrev     const BinaryBasicBlock *BB = BasicBlocks[I];
4261256ef27Sspupyrev     // Append hashes of successors.
4271256ef27Sspupyrev     uint64_t Hash = 0;
4282316a10fSspupyrev     for (BinaryBasicBlock *SuccBB : BB->successors()) {
4292316a10fSspupyrev       uint64_t SuccHash = OpcodeHashes[SuccBB->getIndex()];
4302316a10fSspupyrev       Hash = hashing::detail::hash_16_bytes(Hash, SuccHash);
4312316a10fSspupyrev     }
432b039ccc6SAmir Ayupov     if (HashFunction == HashFunction::StdHash) {
433b039ccc6SAmir Ayupov       // Compatibility with old behavior.
434b039ccc6SAmir Ayupov       BlendedHashes[I].SuccHash = (uint8_t)hash_value(Hash);
435b039ccc6SAmir Ayupov     } else {
436e7dd596cSspupyrev       BlendedHashes[I].SuccHash = (uint8_t)Hash;
437b039ccc6SAmir Ayupov     }
4381256ef27Sspupyrev 
4391256ef27Sspupyrev     // Append hashes of predecessors.
4401256ef27Sspupyrev     Hash = 0;
4412316a10fSspupyrev     for (BinaryBasicBlock *PredBB : BB->predecessors()) {
4422316a10fSspupyrev       uint64_t PredHash = OpcodeHashes[PredBB->getIndex()];
4432316a10fSspupyrev       Hash = hashing::detail::hash_16_bytes(Hash, PredHash);
4442316a10fSspupyrev     }
445b039ccc6SAmir Ayupov     if (HashFunction == HashFunction::StdHash) {
446b039ccc6SAmir Ayupov       // Compatibility with old behavior.
447b039ccc6SAmir Ayupov       BlendedHashes[I].PredHash = (uint8_t)hash_value(Hash);
448b039ccc6SAmir Ayupov     } else {
449e7dd596cSspupyrev       BlendedHashes[I].PredHash = (uint8_t)Hash;
4502316a10fSspupyrev     }
451b039ccc6SAmir Ayupov   }
4522316a10fSspupyrev 
4531256ef27Sspupyrev   //  Assign hashes.
4542316a10fSspupyrev   for (size_t I = 0; I < BasicBlocks.size(); I++) {
4552316a10fSspupyrev     const BinaryBasicBlock *BB = BasicBlocks[I];
4562316a10fSspupyrev     BB->setHash(BlendedHashes[I].combine());
4572316a10fSspupyrev   }
4582316a10fSspupyrev }
459753498eeSshaw young // TODO: mediate the difference between flow function construction here in BOLT
460753498eeSshaw young // and in the compiler by splitting blocks with exception throwing calls at the
461753498eeSshaw young // call and adding the landing pad as the successor.
46244268271Sspupyrev /// Create a wrapper flow function to use with the profile inference algorithm,
46344268271Sspupyrev /// and initialize its jumps and metadata.
46444268271Sspupyrev FlowFunction
46544268271Sspupyrev createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
46644268271Sspupyrev   FlowFunction Func;
46744268271Sspupyrev 
46844268271Sspupyrev   // Add a special "dummy" source so that there is always a unique entry point.
46944268271Sspupyrev   FlowBlock EntryBlock;
47044268271Sspupyrev   EntryBlock.Index = 0;
47144268271Sspupyrev   Func.Blocks.push_back(EntryBlock);
47244268271Sspupyrev 
473753498eeSshaw young   // Create FlowBlock for every basic block in the binary function.
47444268271Sspupyrev   for (const BinaryBasicBlock *BB : BlockOrder) {
47544268271Sspupyrev     Func.Blocks.emplace_back();
47644268271Sspupyrev     FlowBlock &Block = Func.Blocks.back();
47744268271Sspupyrev     Block.Index = Func.Blocks.size() - 1;
47844268271Sspupyrev     (void)BB;
4796d1502c6Sspupyrev     assert(Block.Index == BB->getIndex() + 1 &&
48044268271Sspupyrev            "incorrectly assigned basic block index");
48144268271Sspupyrev   }
48244268271Sspupyrev 
483753498eeSshaw young   // Add a special "dummy" sink block so there is always a unique sink.
484753498eeSshaw young   FlowBlock SinkBlock;
485753498eeSshaw young   SinkBlock.Index = Func.Blocks.size();
486753498eeSshaw young   Func.Blocks.push_back(SinkBlock);
487753498eeSshaw young 
488753498eeSshaw young   // Create FlowJump for each jump between basic blocks in the binary function.
48944268271Sspupyrev   std::vector<uint64_t> InDegree(Func.Blocks.size(), 0);
49044268271Sspupyrev   for (const BinaryBasicBlock *SrcBB : BlockOrder) {
49144268271Sspupyrev     std::unordered_set<const BinaryBasicBlock *> UniqueSuccs;
49244268271Sspupyrev     // Collect regular jumps
49344268271Sspupyrev     for (const BinaryBasicBlock *DstBB : SrcBB->successors()) {
49444268271Sspupyrev       // Ignoring parallel edges
49544268271Sspupyrev       if (UniqueSuccs.find(DstBB) != UniqueSuccs.end())
49644268271Sspupyrev         continue;
49744268271Sspupyrev 
49844268271Sspupyrev       Func.Jumps.emplace_back();
49944268271Sspupyrev       FlowJump &Jump = Func.Jumps.back();
5006d1502c6Sspupyrev       Jump.Source = SrcBB->getIndex() + 1;
5016d1502c6Sspupyrev       Jump.Target = DstBB->getIndex() + 1;
50244268271Sspupyrev       InDegree[Jump.Target]++;
50344268271Sspupyrev       UniqueSuccs.insert(DstBB);
50444268271Sspupyrev     }
505753498eeSshaw young     // TODO: set jump from exit block to landing pad to Unlikely.
506753498eeSshaw young     // If the block is an exit, add a dummy edge from it to the sink block.
507753498eeSshaw young     if (UniqueSuccs.empty()) {
508753498eeSshaw young       Func.Jumps.emplace_back();
509753498eeSshaw young       FlowJump &Jump = Func.Jumps.back();
510753498eeSshaw young       Jump.Source = SrcBB->getIndex() + 1;
511753498eeSshaw young       Jump.Target = Func.Blocks.size() - 1;
512753498eeSshaw young       InDegree[Jump.Target]++;
513753498eeSshaw young     }
514753498eeSshaw young 
51544268271Sspupyrev     // Collect jumps to landing pads
51644268271Sspupyrev     for (const BinaryBasicBlock *DstBB : SrcBB->landing_pads()) {
51744268271Sspupyrev       // Ignoring parallel edges
51844268271Sspupyrev       if (UniqueSuccs.find(DstBB) != UniqueSuccs.end())
51944268271Sspupyrev         continue;
52044268271Sspupyrev 
52144268271Sspupyrev       Func.Jumps.emplace_back();
52244268271Sspupyrev       FlowJump &Jump = Func.Jumps.back();
5236d1502c6Sspupyrev       Jump.Source = SrcBB->getIndex() + 1;
5246d1502c6Sspupyrev       Jump.Target = DstBB->getIndex() + 1;
52544268271Sspupyrev       InDegree[Jump.Target]++;
52644268271Sspupyrev       UniqueSuccs.insert(DstBB);
52744268271Sspupyrev     }
52844268271Sspupyrev   }
52944268271Sspupyrev 
53044268271Sspupyrev   // Add dummy edges to the extra sources. If there are multiple entry blocks,
531753498eeSshaw young   // add an unlikely edge from 0 to the subsequent ones. Skips the sink block.
53244268271Sspupyrev   assert(InDegree[0] == 0 && "dummy entry blocks shouldn't have predecessors");
533753498eeSshaw young   for (uint64_t I = 1; I < Func.Blocks.size() - 1; I++) {
53444268271Sspupyrev     const BinaryBasicBlock *BB = BlockOrder[I - 1];
53544268271Sspupyrev     if (BB->isEntryPoint() || InDegree[I] == 0) {
53644268271Sspupyrev       Func.Jumps.emplace_back();
53744268271Sspupyrev       FlowJump &Jump = Func.Jumps.back();
53844268271Sspupyrev       Jump.Source = 0;
53944268271Sspupyrev       Jump.Target = I;
54044268271Sspupyrev       if (!BB->isEntryPoint())
54144268271Sspupyrev         Jump.IsUnlikely = true;
54244268271Sspupyrev     }
54344268271Sspupyrev   }
54444268271Sspupyrev 
54544268271Sspupyrev   // Create necessary metadata for the flow function
54644268271Sspupyrev   for (FlowJump &Jump : Func.Jumps) {
547c8fc234eSshaw young     assert(Jump.Source < Func.Blocks.size());
548c8fc234eSshaw young     Func.Blocks[Jump.Source].SuccJumps.push_back(&Jump);
549c8fc234eSshaw young     assert(Jump.Target < Func.Blocks.size());
550c8fc234eSshaw young     Func.Blocks[Jump.Target].PredJumps.push_back(&Jump);
55144268271Sspupyrev   }
55244268271Sspupyrev   return Func;
55344268271Sspupyrev }
55444268271Sspupyrev 
55544268271Sspupyrev /// Assign initial block/jump weights based on the stale profile data. The goal
55644268271Sspupyrev /// is to extract as much information from the stale profile as possible. Here
55744268271Sspupyrev /// we assume that each basic block is specified via a hash value computed from
55844268271Sspupyrev /// its content and the hashes of the unchanged basic blocks stay the same
5599a9af0a2SShaw Young /// across different revisions of the binary. Blocks may also have pseudo probe
5609a9af0a2SShaw Young /// information in the profile and the binary which is used for matching.
56144268271Sspupyrev /// Whenever there is a count in the profile with the hash corresponding to one
56244268271Sspupyrev /// of the basic blocks in the binary, the count is "matched" to the block.
56344268271Sspupyrev /// Similarly, if both the source and the target of a count in the profile are
56444268271Sspupyrev /// matched to a jump in the binary, the count is recorded in CFG.
5659a9af0a2SShaw Young size_t matchWeights(
5669a9af0a2SShaw Young     BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder,
5679a9af0a2SShaw Young     const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func,
5689a9af0a2SShaw Young     HashFunction HashFunction, YAMLProfileReader::ProfileLookupMap &IdToYamlBF,
5699a9af0a2SShaw Young     const BinaryFunction &BF,
5709a9af0a2SShaw Young     const ArrayRef<YAMLProfileReader::ProbeMatchSpec> ProbeMatchSpecs) {
571131eb305SShaw Young 
572753498eeSshaw young   assert(Func.Blocks.size() == BlockOrder.size() + 2);
5732316a10fSspupyrev 
574131eb305SShaw Young   std::vector<uint64_t> CallHashes;
5752316a10fSspupyrev   std::vector<FlowBlock *> Blocks;
5762316a10fSspupyrev   std::vector<BlendedBlockHash> BlendedHashes;
57744268271Sspupyrev   for (uint64_t I = 0; I < BlockOrder.size(); I++) {
57844268271Sspupyrev     const BinaryBasicBlock *BB = BlockOrder[I];
57944268271Sspupyrev     assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock");
580131eb305SShaw Young 
581131eb305SShaw Young     std::string CallHashStr = hashBlockCalls(BC, *BB);
582131eb305SShaw Young     if (CallHashStr.empty()) {
583131eb305SShaw Young       CallHashes.push_back(0);
584131eb305SShaw Young     } else {
585131eb305SShaw Young       if (HashFunction == HashFunction::StdHash)
586131eb305SShaw Young         CallHashes.push_back(std::hash<std::string>{}(CallHashStr));
587131eb305SShaw Young       else if (HashFunction == HashFunction::XXH3)
588131eb305SShaw Young         CallHashes.push_back(llvm::xxh3_64bits(CallHashStr));
589131eb305SShaw Young       else
590131eb305SShaw Young         llvm_unreachable("Unhandled HashFunction");
591131eb305SShaw Young     }
592131eb305SShaw Young 
5932316a10fSspupyrev     Blocks.push_back(&Func.Blocks[I + 1]);
5942316a10fSspupyrev     BlendedBlockHash BlendedHash(BB->getHash());
5952316a10fSspupyrev     BlendedHashes.push_back(BlendedHash);
5962316a10fSspupyrev     LLVM_DEBUG(dbgs() << "BB with index " << I << " has hash = "
5972316a10fSspupyrev                       << Twine::utohexstr(BB->getHash()) << "\n");
59844268271Sspupyrev   }
5992316a10fSspupyrev   StaleMatcher Matcher;
6009a9af0a2SShaw Young   // Collects function pseudo probes for use in the StaleMatcher.
6019a9af0a2SShaw Young   if (opts::StaleMatchingWithPseudoProbes) {
6029a9af0a2SShaw Young     const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder();
6039a9af0a2SShaw Young     assert(Decoder &&
6049a9af0a2SShaw Young            "If pseudo probes are in use, pseudo probe decoder should exist");
6059a9af0a2SShaw Young     const AddressProbesMap &ProbeMap = Decoder->getAddress2ProbesMap();
6069a9af0a2SShaw Young     const uint64_t FuncAddr = BF.getAddress();
6079a9af0a2SShaw Young     for (const MCDecodedPseudoProbe &Probe :
6089a9af0a2SShaw Young          ProbeMap.find(FuncAddr, FuncAddr + BF.getSize()))
6099a9af0a2SShaw Young       if (const BinaryBasicBlock *BB =
6109a9af0a2SShaw Young               BF.getBasicBlockContainingOffset(Probe.getAddress() - FuncAddr))
6119a9af0a2SShaw Young         Matcher.mapProbeToBB(&Probe, Blocks[BB->getIndex()]);
6129a9af0a2SShaw Young   }
613131eb305SShaw Young   Matcher.init(Blocks, BlendedHashes, CallHashes);
61444268271Sspupyrev 
6159a9af0a2SShaw Young   using FlowBlockTy =
6169a9af0a2SShaw Young       std::pair<const FlowBlock *, const yaml::bolt::BinaryBasicBlockProfile *>;
6179a9af0a2SShaw Young   using ProfileBlockMatchMap = DenseMap<uint32_t, FlowBlockTy>;
6189a9af0a2SShaw Young   // Binary profile => block index => matched block + its block profile
6199a9af0a2SShaw Young   DenseMap<const yaml::bolt::BinaryFunctionProfile *, ProfileBlockMatchMap>
6209a9af0a2SShaw Young       MatchedBlocks;
6219a9af0a2SShaw Young 
6229a9af0a2SShaw Young   // Map of FlowBlock and matching method.
6239a9af0a2SShaw Young   DenseMap<const FlowBlock *, StaleMatcher::MatchMethod> MatchedFlowBlocks;
6249a9af0a2SShaw Young 
6259a9af0a2SShaw Young   auto addMatchedBlock =
6269a9af0a2SShaw Young       [&](std::pair<const FlowBlock *, StaleMatcher::MatchMethod> BlockMethod,
6279a9af0a2SShaw Young           const yaml::bolt::BinaryFunctionProfile &YamlBP,
6289a9af0a2SShaw Young           const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
6299a9af0a2SShaw Young         const auto &[MatchedBlock, Method] = BlockMethod;
6309a9af0a2SShaw Young         if (!MatchedBlock)
6319a9af0a2SShaw Young           return;
6329a9af0a2SShaw Young         // Don't override earlier matches
6339a9af0a2SShaw Young         if (MatchedFlowBlocks.contains(MatchedBlock))
6349a9af0a2SShaw Young           return;
6359a9af0a2SShaw Young         MatchedFlowBlocks.try_emplace(MatchedBlock, Method);
6369a9af0a2SShaw Young         MatchedBlocks[&YamlBP][YamlBB.Index] = {MatchedBlock, &YamlBB};
6379a9af0a2SShaw Young       };
6389a9af0a2SShaw Young 
6399a9af0a2SShaw Young   // Match blocks from the profile to the blocks in CFG by strict hash.
6409a9af0a2SShaw Young   for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
6419a9af0a2SShaw Young     // Update matching stats.
6429a9af0a2SShaw Young     ++BC.Stats.NumStaleBlocks;
6439a9af0a2SShaw Young     BC.Stats.StaleSampleCount += YamlBB.ExecCount;
6449a9af0a2SShaw Young 
6459a9af0a2SShaw Young     assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile");
6469a9af0a2SShaw Young     BlendedBlockHash YamlHash(YamlBB.Hash);
6479a9af0a2SShaw Young     addMatchedBlock(Matcher.matchBlockStrict(YamlHash), YamlBF, YamlBB);
6489a9af0a2SShaw Young   }
6499a9af0a2SShaw Young   // Match blocks from the profile to the blocks in CFG by pseudo probes.
6509a9af0a2SShaw Young   for (const auto &[InlineNodeMap, YamlBP] : ProbeMatchSpecs) {
6519a9af0a2SShaw Young     for (const yaml::bolt::BinaryBasicBlockProfile &BB : YamlBP.get().Blocks)
6529a9af0a2SShaw Young       if (!BB.PseudoProbes.empty())
6539a9af0a2SShaw Young         addMatchedBlock(Matcher.matchBlockProbe(BB.PseudoProbes, InlineNodeMap),
6549a9af0a2SShaw Young                         YamlBP, BB);
6559a9af0a2SShaw Young   }
6569a9af0a2SShaw Young   // Match blocks from the profile to the blocks in CFG with loose methods.
65744268271Sspupyrev   for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
65844268271Sspupyrev     assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile");
65931e8a9f4Sspupyrev     BlendedBlockHash YamlHash(YamlBB.Hash);
660131eb305SShaw Young 
661131eb305SShaw Young     std::string CallHashStr = hashBlockCalls(IdToYamlBF, YamlBB);
662131eb305SShaw Young     uint64_t CallHash = 0;
663131eb305SShaw Young     if (!CallHashStr.empty()) {
664131eb305SShaw Young       if (HashFunction == HashFunction::StdHash)
665131eb305SShaw Young         CallHash = std::hash<std::string>{}(CallHashStr);
666131eb305SShaw Young       else if (HashFunction == HashFunction::XXH3)
667131eb305SShaw Young         CallHash = llvm::xxh3_64bits(CallHashStr);
668131eb305SShaw Young       else
669131eb305SShaw Young         llvm_unreachable("Unhandled HashFunction");
670131eb305SShaw Young     }
6719a9af0a2SShaw Young     auto [MatchedBlock, Method] = Matcher.matchBlockLoose(YamlHash, CallHash);
6729a9af0a2SShaw Young     if (MatchedBlock == nullptr && YamlBB.Index == 0) {
67342da84fdSspupyrev       MatchedBlock = Blocks[0];
6749a9af0a2SShaw Young       // Report as loose match
6759a9af0a2SShaw Young       Method = StaleMatcher::MATCH_OPCODE;
6769a9af0a2SShaw Young     }
6779a9af0a2SShaw Young     if (!MatchedBlock) {
6789a9af0a2SShaw Young       LLVM_DEBUG(dbgs() << "Couldn't match yaml block (bid = " << YamlBB.Index
6799a9af0a2SShaw Young                         << ")" << " with hash " << Twine::utohexstr(YamlBB.Hash)
6802316a10fSspupyrev                         << "\n");
6819a9af0a2SShaw Young       continue;
68231e8a9f4Sspupyrev     }
6839a9af0a2SShaw Young     addMatchedBlock({MatchedBlock, Method}, YamlBF, YamlBB);
68444268271Sspupyrev   }
68544268271Sspupyrev 
68644268271Sspupyrev   // Match jumps from the profile to the jumps from CFG
68744268271Sspupyrev   std::vector<uint64_t> OutWeight(Func.Blocks.size(), 0);
68844268271Sspupyrev   std::vector<uint64_t> InWeight(Func.Blocks.size(), 0);
6899a9af0a2SShaw Young 
6909a9af0a2SShaw Young   for (const auto &[YamlBF, MatchMap] : MatchedBlocks) {
6919a9af0a2SShaw Young     for (const auto &[YamlBBIdx, FlowBlockProfile] : MatchMap) {
6929a9af0a2SShaw Young       const auto &[MatchedBlock, YamlBB] = FlowBlockProfile;
6939a9af0a2SShaw Young       StaleMatcher::MatchMethod Method = MatchedFlowBlocks.lookup(MatchedBlock);
6949a9af0a2SShaw Young       BlendedBlockHash BinHash = BlendedHashes[MatchedBlock->Index - 1];
6959a9af0a2SShaw Young       LLVM_DEBUG(dbgs() << "Matched yaml block (bid = " << YamlBBIdx << ")"
6969a9af0a2SShaw Young                         << " with hash " << Twine::utohexstr(YamlBB->Hash)
6979a9af0a2SShaw Young                         << " to BB (index = " << MatchedBlock->Index - 1 << ")"
6989a9af0a2SShaw Young                         << " with hash " << Twine::utohexstr(BinHash.combine())
6999a9af0a2SShaw Young                         << "\n");
700*06e08696SKazu Hirata       (void)BinHash;
7019a9af0a2SShaw Young       uint64_t ExecCount = YamlBB->ExecCount;
7029a9af0a2SShaw Young       // Update matching stats accounting for the matched block.
7039a9af0a2SShaw Young       switch (Method) {
7049a9af0a2SShaw Young       case StaleMatcher::MATCH_EXACT:
7059a9af0a2SShaw Young         ++BC.Stats.NumExactMatchedBlocks;
7069a9af0a2SShaw Young         BC.Stats.ExactMatchedSampleCount += ExecCount;
7079a9af0a2SShaw Young         LLVM_DEBUG(dbgs() << "  exact match\n");
7089a9af0a2SShaw Young         break;
7099a9af0a2SShaw Young       case StaleMatcher::MATCH_PROBE_EXACT:
7109a9af0a2SShaw Young         ++BC.Stats.NumPseudoProbeExactMatchedBlocks;
7119a9af0a2SShaw Young         BC.Stats.PseudoProbeExactMatchedSampleCount += ExecCount;
7129a9af0a2SShaw Young         LLVM_DEBUG(dbgs() << "  exact pseudo probe match\n");
7139a9af0a2SShaw Young         break;
7149a9af0a2SShaw Young       case StaleMatcher::MATCH_PROBE_LOOSE:
7159a9af0a2SShaw Young         ++BC.Stats.NumPseudoProbeLooseMatchedBlocks;
7169a9af0a2SShaw Young         BC.Stats.PseudoProbeLooseMatchedSampleCount += ExecCount;
7179a9af0a2SShaw Young         LLVM_DEBUG(dbgs() << "  loose pseudo probe match\n");
7189a9af0a2SShaw Young         break;
7199a9af0a2SShaw Young       case StaleMatcher::MATCH_CALL:
7209a9af0a2SShaw Young         ++BC.Stats.NumCallMatchedBlocks;
7219a9af0a2SShaw Young         BC.Stats.CallMatchedSampleCount += ExecCount;
7229a9af0a2SShaw Young         LLVM_DEBUG(dbgs() << "  call match\n");
7239a9af0a2SShaw Young         break;
7249a9af0a2SShaw Young       case StaleMatcher::MATCH_OPCODE:
7259a9af0a2SShaw Young         ++BC.Stats.NumLooseMatchedBlocks;
7269a9af0a2SShaw Young         BC.Stats.LooseMatchedSampleCount += ExecCount;
7279a9af0a2SShaw Young         LLVM_DEBUG(dbgs() << "  loose match\n");
7289a9af0a2SShaw Young         break;
7299a9af0a2SShaw Young       case StaleMatcher::NO_MATCH:
7309a9af0a2SShaw Young         LLVM_DEBUG(dbgs() << "  no match\n");
7319a9af0a2SShaw Young       }
7329a9af0a2SShaw Young     }
7339a9af0a2SShaw Young 
7349a9af0a2SShaw Young     for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF->Blocks) {
73544268271Sspupyrev       for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) {
73644268271Sspupyrev         if (YamlSI.Count == 0)
73744268271Sspupyrev           continue;
73844268271Sspupyrev 
73944268271Sspupyrev         // Try to find the jump for a given (src, dst) pair from the profile and
74044268271Sspupyrev         // assign the jump weight based on the profile count
74144268271Sspupyrev         const uint64_t SrcIndex = YamlBB.Index;
74244268271Sspupyrev         const uint64_t DstIndex = YamlSI.Index;
74344268271Sspupyrev 
7449a9af0a2SShaw Young         const FlowBlock *MatchedSrcBlock = MatchMap.lookup(SrcIndex).first;
7459a9af0a2SShaw Young         const FlowBlock *MatchedDstBlock = MatchMap.lookup(DstIndex).first;
74644268271Sspupyrev 
74744268271Sspupyrev         if (MatchedSrcBlock != nullptr && MatchedDstBlock != nullptr) {
74844268271Sspupyrev           // Find a jump between the two blocks
74944268271Sspupyrev           FlowJump *Jump = nullptr;
75044268271Sspupyrev           for (FlowJump *SuccJump : MatchedSrcBlock->SuccJumps) {
75144268271Sspupyrev             if (SuccJump->Target == MatchedDstBlock->Index) {
75244268271Sspupyrev               Jump = SuccJump;
75344268271Sspupyrev               break;
75444268271Sspupyrev             }
75544268271Sspupyrev           }
75644268271Sspupyrev           // Assign the weight, if the corresponding jump is found
75744268271Sspupyrev           if (Jump != nullptr) {
75844268271Sspupyrev             Jump->Weight = YamlSI.Count;
75944268271Sspupyrev             Jump->HasUnknownWeight = false;
76044268271Sspupyrev           }
76144268271Sspupyrev         }
76244268271Sspupyrev         // Assign the weight for the src block, if it is found
76344268271Sspupyrev         if (MatchedSrcBlock != nullptr)
76444268271Sspupyrev           OutWeight[MatchedSrcBlock->Index] += YamlSI.Count;
76544268271Sspupyrev         // Assign the weight for the dst block, if it is found
76644268271Sspupyrev         if (MatchedDstBlock != nullptr)
76744268271Sspupyrev           InWeight[MatchedDstBlock->Index] += YamlSI.Count;
76844268271Sspupyrev       }
76944268271Sspupyrev     }
7709a9af0a2SShaw Young   }
77144268271Sspupyrev 
77244268271Sspupyrev   // Assign block counts based on in-/out- jumps
77344268271Sspupyrev   for (FlowBlock &Block : Func.Blocks) {
77444268271Sspupyrev     if (OutWeight[Block.Index] == 0 && InWeight[Block.Index] == 0) {
77531e8a9f4Sspupyrev       assert(Block.HasUnknownWeight && "unmatched block with a positive count");
77644268271Sspupyrev       continue;
77744268271Sspupyrev     }
77844268271Sspupyrev     Block.HasUnknownWeight = false;
77944268271Sspupyrev     Block.Weight = std::max(OutWeight[Block.Index], InWeight[Block.Index]);
78044268271Sspupyrev   }
78168fc8dffSshaw young 
7829a9af0a2SShaw Young   return MatchedBlocks[&YamlBF].size();
78344268271Sspupyrev }
78444268271Sspupyrev 
78544268271Sspupyrev /// The function finds all blocks that are (i) reachable from the Entry block
78644268271Sspupyrev /// and (ii) do not have a path to an exit, and marks all such blocks 'cold'
78744268271Sspupyrev /// so that profi does not send any flow to such blocks.
78844268271Sspupyrev void preprocessUnreachableBlocks(FlowFunction &Func) {
78944268271Sspupyrev   const uint64_t NumBlocks = Func.Blocks.size();
79044268271Sspupyrev 
79144268271Sspupyrev   // Start bfs from the source
79244268271Sspupyrev   std::queue<uint64_t> Queue;
79344268271Sspupyrev   std::vector<bool> VisitedEntry(NumBlocks, false);
79444268271Sspupyrev   for (uint64_t I = 0; I < NumBlocks; I++) {
79544268271Sspupyrev     FlowBlock &Block = Func.Blocks[I];
79644268271Sspupyrev     if (Block.isEntry()) {
79744268271Sspupyrev       Queue.push(I);
79844268271Sspupyrev       VisitedEntry[I] = true;
79944268271Sspupyrev       break;
80044268271Sspupyrev     }
80144268271Sspupyrev   }
80244268271Sspupyrev   while (!Queue.empty()) {
80344268271Sspupyrev     const uint64_t Src = Queue.front();
80444268271Sspupyrev     Queue.pop();
80544268271Sspupyrev     for (FlowJump *Jump : Func.Blocks[Src].SuccJumps) {
80644268271Sspupyrev       const uint64_t Dst = Jump->Target;
80744268271Sspupyrev       if (!VisitedEntry[Dst]) {
80844268271Sspupyrev         Queue.push(Dst);
80944268271Sspupyrev         VisitedEntry[Dst] = true;
81044268271Sspupyrev       }
81144268271Sspupyrev     }
81244268271Sspupyrev   }
81344268271Sspupyrev 
81444268271Sspupyrev   // Start bfs from all sinks
81544268271Sspupyrev   std::vector<bool> VisitedExit(NumBlocks, false);
81644268271Sspupyrev   for (uint64_t I = 0; I < NumBlocks; I++) {
81744268271Sspupyrev     FlowBlock &Block = Func.Blocks[I];
81844268271Sspupyrev     if (Block.isExit() && VisitedEntry[I]) {
81944268271Sspupyrev       Queue.push(I);
82044268271Sspupyrev       VisitedExit[I] = true;
82144268271Sspupyrev     }
82244268271Sspupyrev   }
82344268271Sspupyrev   while (!Queue.empty()) {
82444268271Sspupyrev     const uint64_t Src = Queue.front();
82544268271Sspupyrev     Queue.pop();
82644268271Sspupyrev     for (FlowJump *Jump : Func.Blocks[Src].PredJumps) {
82744268271Sspupyrev       const uint64_t Dst = Jump->Source;
82844268271Sspupyrev       if (!VisitedExit[Dst]) {
82944268271Sspupyrev         Queue.push(Dst);
83044268271Sspupyrev         VisitedExit[Dst] = true;
83144268271Sspupyrev       }
83244268271Sspupyrev     }
83344268271Sspupyrev   }
83444268271Sspupyrev 
83544268271Sspupyrev   // Make all blocks of zero weight so that flow is not sent
83644268271Sspupyrev   for (uint64_t I = 0; I < NumBlocks; I++) {
83744268271Sspupyrev     FlowBlock &Block = Func.Blocks[I];
83844268271Sspupyrev     if (Block.Weight == 0)
83944268271Sspupyrev       continue;
84044268271Sspupyrev     if (!VisitedEntry[I] || !VisitedExit[I]) {
84144268271Sspupyrev       Block.Weight = 0;
84244268271Sspupyrev       Block.HasUnknownWeight = true;
84344268271Sspupyrev       Block.IsUnlikely = true;
84444268271Sspupyrev       for (FlowJump *Jump : Block.SuccJumps) {
84544268271Sspupyrev         if (Jump->Source == Block.Index && Jump->Target == Block.Index) {
84644268271Sspupyrev           Jump->Weight = 0;
84744268271Sspupyrev           Jump->HasUnknownWeight = true;
84844268271Sspupyrev           Jump->IsUnlikely = true;
84944268271Sspupyrev         }
85044268271Sspupyrev       }
85144268271Sspupyrev     }
85244268271Sspupyrev   }
85344268271Sspupyrev }
85444268271Sspupyrev 
85544268271Sspupyrev /// Decide if stale profile matching can be applied for a given function.
85644268271Sspupyrev /// Currently we skip inference for (very) large instances and for instances
85744268271Sspupyrev /// having "unexpected" control flow (e.g., having no sink basic blocks).
85868fc8dffSshaw young bool canApplyInference(const FlowFunction &Func,
85968fc8dffSshaw young                        const yaml::bolt::BinaryFunctionProfile &YamlBF,
86068fc8dffSshaw young                        const uint64_t &MatchedBlocks) {
86144268271Sspupyrev   if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
86244268271Sspupyrev     return false;
86344268271Sspupyrev 
86468fc8dffSshaw young   if (MatchedBlocks * 100 <
86568fc8dffSshaw young       opts::StaleMatchingMinMatchedBlock * YamlBF.Blocks.size())
86668fc8dffSshaw young     return false;
86768fc8dffSshaw young 
868753498eeSshaw young   // Returns false if the artificial sink block has no predecessors meaning
869753498eeSshaw young   // there are no exit blocks.
870753498eeSshaw young   if (Func.Blocks[Func.Blocks.size() - 1].isEntry())
87144268271Sspupyrev     return false;
87244268271Sspupyrev 
87344268271Sspupyrev   return true;
87444268271Sspupyrev }
87544268271Sspupyrev 
87644268271Sspupyrev /// Apply the profile inference algorithm for a given flow function.
87744268271Sspupyrev void applyInference(FlowFunction &Func) {
87844268271Sspupyrev   ProfiParams Params;
87944268271Sspupyrev   // Set the params from the command-line flags.
88044268271Sspupyrev   Params.EvenFlowDistribution = opts::StaleMatchingEvenFlowDistribution;
88144268271Sspupyrev   Params.RebalanceUnknown = opts::StaleMatchingRebalanceUnknown;
88244268271Sspupyrev   Params.JoinIslands = opts::StaleMatchingJoinIslands;
88344268271Sspupyrev 
88444268271Sspupyrev   Params.CostBlockInc = opts::StaleMatchingCostBlockInc;
8851256ef27Sspupyrev   Params.CostBlockEntryInc = opts::StaleMatchingCostBlockInc;
88644268271Sspupyrev   Params.CostBlockDec = opts::StaleMatchingCostBlockDec;
8871256ef27Sspupyrev   Params.CostBlockEntryDec = opts::StaleMatchingCostBlockDec;
88844268271Sspupyrev   Params.CostBlockUnknownInc = opts::StaleMatchingCostBlockUnknownInc;
88944268271Sspupyrev 
89044268271Sspupyrev   Params.CostJumpInc = opts::StaleMatchingCostJumpInc;
8911256ef27Sspupyrev   Params.CostJumpFTInc = opts::StaleMatchingCostJumpInc;
89244268271Sspupyrev   Params.CostJumpDec = opts::StaleMatchingCostJumpDec;
8931256ef27Sspupyrev   Params.CostJumpFTDec = opts::StaleMatchingCostJumpDec;
89444268271Sspupyrev   Params.CostJumpUnknownInc = opts::StaleMatchingCostJumpUnknownInc;
89544268271Sspupyrev   Params.CostJumpUnknownFTInc = opts::StaleMatchingCostJumpUnknownFTInc;
89644268271Sspupyrev 
89744268271Sspupyrev   applyFlowInference(Params, Func);
89844268271Sspupyrev }
89944268271Sspupyrev 
90044268271Sspupyrev /// Collect inferred counts from the flow function and update annotations in
90144268271Sspupyrev /// the binary function.
90244268271Sspupyrev void assignProfile(BinaryFunction &BF,
90344268271Sspupyrev                    const BinaryFunction::BasicBlockOrderType &BlockOrder,
90444268271Sspupyrev                    FlowFunction &Func) {
90544268271Sspupyrev   BinaryContext &BC = BF.getBinaryContext();
90644268271Sspupyrev 
907753498eeSshaw young   assert(Func.Blocks.size() == BlockOrder.size() + 2);
90844268271Sspupyrev   for (uint64_t I = 0; I < BlockOrder.size(); I++) {
90944268271Sspupyrev     FlowBlock &Block = Func.Blocks[I + 1];
91044268271Sspupyrev     BinaryBasicBlock *BB = BlockOrder[I];
91144268271Sspupyrev 
91244268271Sspupyrev     // Update block's count
91344268271Sspupyrev     BB->setExecutionCount(Block.Flow);
91444268271Sspupyrev 
91544268271Sspupyrev     // Update jump counts: (i) clean existing counts and then (ii) set new ones
91644268271Sspupyrev     auto BI = BB->branch_info_begin();
91744268271Sspupyrev     for (const BinaryBasicBlock *DstBB : BB->successors()) {
91844268271Sspupyrev       (void)DstBB;
91944268271Sspupyrev       BI->Count = 0;
92044268271Sspupyrev       BI->MispredictedCount = 0;
92144268271Sspupyrev       ++BI;
92244268271Sspupyrev     }
92344268271Sspupyrev     for (FlowJump *Jump : Block.SuccJumps) {
92444268271Sspupyrev       if (Jump->IsUnlikely)
92544268271Sspupyrev         continue;
92644268271Sspupyrev       if (Jump->Flow == 0)
92744268271Sspupyrev         continue;
92844268271Sspupyrev 
929753498eeSshaw young       // Skips the artificial sink block.
930753498eeSshaw young       if (Jump->Target == Func.Blocks.size() - 1)
931753498eeSshaw young         continue;
93244268271Sspupyrev       BinaryBasicBlock &SuccBB = *BlockOrder[Jump->Target - 1];
93344268271Sspupyrev       // Check if the edge corresponds to a regular jump or a landing pad
93444268271Sspupyrev       if (BB->getSuccessor(SuccBB.getLabel())) {
93544268271Sspupyrev         BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(SuccBB);
93644268271Sspupyrev         BI.Count += Jump->Flow;
93744268271Sspupyrev       } else {
93844268271Sspupyrev         BinaryBasicBlock *LP = BB->getLandingPad(SuccBB.getLabel());
93944268271Sspupyrev         if (LP && LP->getKnownExecutionCount() < Jump->Flow)
94044268271Sspupyrev           LP->setExecutionCount(Jump->Flow);
94144268271Sspupyrev       }
94244268271Sspupyrev     }
94344268271Sspupyrev 
94444268271Sspupyrev     // Update call-site annotations
94544268271Sspupyrev     auto setOrUpdateAnnotation = [&](MCInst &Instr, StringRef Name,
94644268271Sspupyrev                                      uint64_t Count) {
94744268271Sspupyrev       if (BC.MIB->hasAnnotation(Instr, Name))
94844268271Sspupyrev         BC.MIB->removeAnnotation(Instr, Name);
94944268271Sspupyrev       // Do not add zero-count annotations
95044268271Sspupyrev       if (Count == 0)
95144268271Sspupyrev         return;
95244268271Sspupyrev       BC.MIB->addAnnotation(Instr, Name, Count);
95344268271Sspupyrev     };
95444268271Sspupyrev 
95544268271Sspupyrev     for (MCInst &Instr : *BB) {
95644268271Sspupyrev       // Ignore pseudo instructions
95744268271Sspupyrev       if (BC.MIB->isPseudo(Instr))
95844268271Sspupyrev         continue;
95944268271Sspupyrev       // Ignore jump tables
96044268271Sspupyrev       const MCInst *LastInstr = BB->getLastNonPseudoInstr();
96144268271Sspupyrev       if (BC.MIB->getJumpTable(*LastInstr) && LastInstr == &Instr)
96244268271Sspupyrev         continue;
96344268271Sspupyrev 
96444268271Sspupyrev       if (BC.MIB->isIndirectCall(Instr) || BC.MIB->isIndirectBranch(Instr)) {
96544268271Sspupyrev         auto &ICSP = BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
96644268271Sspupyrev             Instr, "CallProfile");
96744268271Sspupyrev         if (!ICSP.empty()) {
96844268271Sspupyrev           // Try to evenly distribute the counts among the call sites
96944268271Sspupyrev           const uint64_t TotalCount = Block.Flow;
97044268271Sspupyrev           const uint64_t NumSites = ICSP.size();
97144268271Sspupyrev           for (uint64_t Idx = 0; Idx < ICSP.size(); Idx++) {
97244268271Sspupyrev             IndirectCallProfile &CSP = ICSP[Idx];
97344268271Sspupyrev             uint64_t CountPerSite = TotalCount / NumSites;
97444268271Sspupyrev             // When counts cannot be exactly distributed, increase by 1 the
97544268271Sspupyrev             // counts of the first (TotalCount % NumSites) call sites
97644268271Sspupyrev             if (Idx < TotalCount % NumSites)
97744268271Sspupyrev               CountPerSite++;
97844268271Sspupyrev             CSP.Count = CountPerSite;
97944268271Sspupyrev           }
98044268271Sspupyrev         } else {
98144268271Sspupyrev           ICSP.emplace_back(nullptr, Block.Flow, 0);
98244268271Sspupyrev         }
98344268271Sspupyrev       } else if (BC.MIB->getConditionalTailCall(Instr)) {
98444268271Sspupyrev         // We don't know exactly the number of times the conditional tail call
98544268271Sspupyrev         // is executed; conservatively, setting it to the count of the block
98644268271Sspupyrev         setOrUpdateAnnotation(Instr, "CTCTakenCount", Block.Flow);
98744268271Sspupyrev         BC.MIB->removeAnnotation(Instr, "CTCMispredCount");
98844268271Sspupyrev       } else if (BC.MIB->isCall(Instr)) {
98944268271Sspupyrev         setOrUpdateAnnotation(Instr, "Count", Block.Flow);
99044268271Sspupyrev       }
99144268271Sspupyrev     }
99244268271Sspupyrev   }
99344268271Sspupyrev 
99444268271Sspupyrev   // Update function's execution count and mark the function inferred.
99544268271Sspupyrev   BF.setExecutionCount(Func.Blocks[0].Flow);
99644268271Sspupyrev   BF.setHasInferredProfile(true);
99744268271Sspupyrev }
99844268271Sspupyrev 
99944268271Sspupyrev bool YAMLProfileReader::inferStaleProfile(
10009a9af0a2SShaw Young     BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF,
10019a9af0a2SShaw Young     const ArrayRef<ProbeMatchSpec> ProbeMatchSpecs) {
100296378b3dSshaw young 
100396378b3dSshaw young   NamedRegionTimer T("inferStaleProfile", "stale profile inference", "rewrite",
100496378b3dSshaw young                      "Rewrite passes", opts::TimeRewrite);
100596378b3dSshaw young 
1006b431546dSAmir Ayupov   if (!BF.hasCFG())
1007b431546dSAmir Ayupov     return false;
1008b431546dSAmir Ayupov 
10096d1502c6Sspupyrev   LLVM_DEBUG(dbgs() << "BOLT-INFO: applying profile inference for "
10106d1502c6Sspupyrev                     << "\"" << BF.getPrintName() << "\"\n");
10116d1502c6Sspupyrev 
10126d1502c6Sspupyrev   // Make sure that block hashes are up to date.
1013b039ccc6SAmir Ayupov   BF.computeBlockHashes(YamlBP.Header.HashFunction);
101444268271Sspupyrev 
101544268271Sspupyrev   const BinaryFunction::BasicBlockOrderType BlockOrder(
101644268271Sspupyrev       BF.getLayout().block_begin(), BF.getLayout().block_end());
101744268271Sspupyrev 
101868fc8dffSshaw young   // Tracks the number of matched blocks.
101968fc8dffSshaw young 
10206d1502c6Sspupyrev   // Create a wrapper flow function to use with the profile inference algorithm.
102144268271Sspupyrev   FlowFunction Func = createFlowFunction(BlockOrder);
102244268271Sspupyrev 
102344268271Sspupyrev   // Match as many block/jump counts from the stale profile as possible
102468fc8dffSshaw young   size_t MatchedBlocks =
10259a9af0a2SShaw Young       matchWeights(BF.getBinaryContext(), BlockOrder, YamlBF, Func,
10269a9af0a2SShaw Young                    YamlBP.Header.HashFunction, IdToYamLBF, BF, ProbeMatchSpecs);
102744268271Sspupyrev 
102844268271Sspupyrev   // Adjust the flow function by marking unreachable blocks Unlikely so that
10296d1502c6Sspupyrev   // they don't get any counts assigned.
103044268271Sspupyrev   preprocessUnreachableBlocks(Func);
103144268271Sspupyrev 
10326d1502c6Sspupyrev   // Check if profile inference can be applied for the instance.
103368fc8dffSshaw young   if (!canApplyInference(Func, YamlBF, MatchedBlocks))
103444268271Sspupyrev     return false;
103544268271Sspupyrev 
10366d1502c6Sspupyrev   // Apply the profile inference algorithm.
103744268271Sspupyrev   applyInference(Func);
103844268271Sspupyrev 
10396d1502c6Sspupyrev   // Collect inferred counts and update function annotations.
104044268271Sspupyrev   assignProfile(BF, BlockOrder, Func);
104144268271Sspupyrev 
104244268271Sspupyrev   // As of now, we always mark the binary function having "correct" profile.
104344268271Sspupyrev   // In the future, we may discard the results for instances with poor inference
104444268271Sspupyrev   // metrics and keep such functions un-optimized.
104544268271Sspupyrev   return true;
104644268271Sspupyrev }
104744268271Sspupyrev 
104844268271Sspupyrev } // end namespace bolt
104944268271Sspupyrev } // end namespace llvm
1050