xref: /llvm-project/bolt/lib/Profile/StaleProfileMatching.cpp (revision 06e08696248ac01754c87c22cc8a4b797ef46430)
1 //===- bolt/Profile/StaleProfileMatching.cpp - Profile data matching   ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // BOLT often has to deal with profiles collected on binaries built from several
10 // revisions behind release. As a result, a certain percentage of functions is
11 // considered stale and not optimized. This file implements an ability to match
12 // profile to functions that are not 100% binary identical, and thus, increasing
13 // the optimization coverage and boost the performance of applications.
14 //
15 // The algorithm consists of two phases: matching and inference:
16 // - At the matching phase, we try to "guess" as many block and jump counts from
17 //   the stale profile as possible. To this end, the content of each basic block
18 //   is hashed and stored in the (yaml) profile. When BOLT optimizes a binary,
19 //   it computes block hashes and identifies the corresponding entries in the
20 //   stale profile. It yields a partial profile for every CFG in the binary.
21 // - At the inference phase, we employ a network flow-based algorithm (profi) to
22 //   reconstruct "realistic" block and jump counts from the partial profile
23 //   generated at the first stage. In practice, we don't always produce proper
24 //   profile data but the majority (e.g., >90%) of CFGs get the correct counts.
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "bolt/Core/HashUtilities.h"
29 #include "bolt/Profile/YAMLProfileReader.h"
30 #include "llvm/ADT/Bitfields.h"
31 #include "llvm/ADT/Hashing.h"
32 #include "llvm/MC/MCPseudoProbe.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Timer.h"
35 #include "llvm/Support/xxhash.h"
36 #include "llvm/Transforms/Utils/SampleProfileInference.h"
37 
38 #include <queue>
39 
40 using namespace llvm;
41 
42 #undef DEBUG_TYPE
43 #define DEBUG_TYPE "bolt-prof"
44 
45 namespace opts {
46 
47 extern cl::opt<bool> TimeRewrite;
48 extern cl::OptionCategory BoltOptCategory;
49 
50 cl::opt<bool>
51     InferStaleProfile("infer-stale-profile",
52                       cl::desc("Infer counts from stale profile data."),
53                       cl::init(false), cl::Hidden, cl::cat(BoltOptCategory));
54 
55 cl::opt<unsigned> StaleMatchingMinMatchedBlock(
56     "stale-matching-min-matched-block",
57     cl::desc("Percentage threshold of matched basic blocks at which stale "
58              "profile inference is executed."),
59     cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));
60 
61 cl::opt<unsigned> StaleMatchingMaxFuncSize(
62     "stale-matching-max-func-size",
63     cl::desc("The maximum size of a function to consider for inference."),
64     cl::init(10000), cl::Hidden, cl::cat(BoltOptCategory));
65 
66 // Parameters of the profile inference algorithm. The default values are tuned
67 // on several benchmarks.
68 cl::opt<bool> StaleMatchingEvenFlowDistribution(
69     "stale-matching-even-flow-distribution",
70     cl::desc("Try to evenly distribute flow when there are multiple equally "
71              "likely options."),
72     cl::init(true), cl::ReallyHidden, cl::cat(BoltOptCategory));
73 
74 cl::opt<bool> StaleMatchingRebalanceUnknown(
75     "stale-matching-rebalance-unknown",
76     cl::desc("Evenly re-distribute flow among unknown subgraphs."),
77     cl::init(false), cl::ReallyHidden, cl::cat(BoltOptCategory));
78 
79 cl::opt<bool> StaleMatchingJoinIslands(
80     "stale-matching-join-islands",
81     cl::desc("Join isolated components having positive flow."), cl::init(true),
82     cl::ReallyHidden, cl::cat(BoltOptCategory));
83 
84 cl::opt<unsigned> StaleMatchingCostBlockInc(
85     "stale-matching-cost-block-inc",
86     cl::desc("The cost of increasing a block count by one."), cl::init(150),
87     cl::ReallyHidden, cl::cat(BoltOptCategory));
88 
89 cl::opt<unsigned> StaleMatchingCostBlockDec(
90     "stale-matching-cost-block-dec",
91     cl::desc("The cost of decreasing a block count by one."), cl::init(150),
92     cl::ReallyHidden, cl::cat(BoltOptCategory));
93 
94 cl::opt<unsigned> StaleMatchingCostJumpInc(
95     "stale-matching-cost-jump-inc",
96     cl::desc("The cost of increasing a jump count by one."), cl::init(150),
97     cl::ReallyHidden, cl::cat(BoltOptCategory));
98 
99 cl::opt<unsigned> StaleMatchingCostJumpDec(
100     "stale-matching-cost-jump-dec",
101     cl::desc("The cost of decreasing a jump count by one."), cl::init(150),
102     cl::ReallyHidden, cl::cat(BoltOptCategory));
103 
104 cl::opt<unsigned> StaleMatchingCostBlockUnknownInc(
105     "stale-matching-cost-block-unknown-inc",
106     cl::desc("The cost of increasing an unknown block count by one."),
107     cl::init(1), cl::ReallyHidden, cl::cat(BoltOptCategory));
108 
109 cl::opt<unsigned> StaleMatchingCostJumpUnknownInc(
110     "stale-matching-cost-jump-unknown-inc",
111     cl::desc("The cost of increasing an unknown jump count by one."),
112     cl::init(140), cl::ReallyHidden, cl::cat(BoltOptCategory));
113 
114 cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc(
115     "stale-matching-cost-jump-unknown-ft-inc",
116     cl::desc(
117         "The cost of increasing an unknown fall-through jump count by one."),
118     cl::init(3), cl::ReallyHidden, cl::cat(BoltOptCategory));
119 
120 cl::opt<bool> StaleMatchingWithPseudoProbes(
121     "stale-matching-with-pseudo-probes",
122     cl::desc("Turns on stale matching with block pseudo probes."),
123     cl::init(false), cl::ReallyHidden, cl::cat(BoltOptCategory));
124 
125 } // namespace opts
126 
127 namespace llvm {
128 namespace bolt {
129 
130 /// An object wrapping several components of a basic block hash. The combined
131 /// (blended) hash is represented and stored as one uint64_t, while individual
132 /// components are of smaller size (e.g., uint16_t or uint8_t).
133 struct BlendedBlockHash {
134 private:
135   using ValueOffset = Bitfield::Element<uint16_t, 0, 16>;
136   using ValueOpcode = Bitfield::Element<uint16_t, 16, 16>;
137   using ValueInstr = Bitfield::Element<uint16_t, 32, 16>;
138   using ValuePred = Bitfield::Element<uint8_t, 48, 8>;
139   using ValueSucc = Bitfield::Element<uint8_t, 56, 8>;
140 
141 public:
142   explicit BlendedBlockHash() {}
143 
144   explicit BlendedBlockHash(uint64_t Hash) {
145     Offset = Bitfield::get<ValueOffset>(Hash);
146     OpcodeHash = Bitfield::get<ValueOpcode>(Hash);
147     InstrHash = Bitfield::get<ValueInstr>(Hash);
148     PredHash = Bitfield::get<ValuePred>(Hash);
149     SuccHash = Bitfield::get<ValueSucc>(Hash);
150   }
151 
152   /// Combine the blended hash into uint64_t.
153   uint64_t combine() const {
154     uint64_t Hash = 0;
155     Bitfield::set<ValueOffset>(Hash, Offset);
156     Bitfield::set<ValueOpcode>(Hash, OpcodeHash);
157     Bitfield::set<ValueInstr>(Hash, InstrHash);
158     Bitfield::set<ValuePred>(Hash, PredHash);
159     Bitfield::set<ValueSucc>(Hash, SuccHash);
160     return Hash;
161   }
162 
163   /// Compute a distance between two given blended hashes. The smaller the
164   /// distance, the more similar two blocks are. For identical basic blocks,
165   /// the distance is zero.
166   uint64_t distance(const BlendedBlockHash &BBH) const {
167     assert(OpcodeHash == BBH.OpcodeHash &&
168            "incorrect blended hash distance computation");
169     uint64_t Dist = 0;
170     // Account for NeighborHash
171     Dist += SuccHash == BBH.SuccHash ? 0 : 1;
172     Dist += PredHash == BBH.PredHash ? 0 : 1;
173     Dist <<= 16;
174     // Account for InstrHash
175     Dist += InstrHash == BBH.InstrHash ? 0 : 1;
176     Dist <<= 16;
177     // Account for Offset
178     Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset);
179     return Dist;
180   }
181 
182   /// The offset of the basic block from the function start.
183   uint16_t Offset{0};
184   /// (Loose) Hash of the basic block instructions, excluding operands.
185   uint16_t OpcodeHash{0};
186   /// (Strong) Hash of the basic block instructions, including opcodes and
187   /// operands.
188   uint16_t InstrHash{0};
189   /// (Loose) Hashes of the predecessors of the basic block.
190   uint8_t PredHash{0};
191   /// (Loose) Hashes of the successors of the basic block.
192   uint8_t SuccHash{0};
193 };
194 
195 /// The object is used to identify and match basic blocks in a BinaryFunction
196 /// given their hashes computed on a binary built from several revisions behind
197 /// release.
198 class StaleMatcher {
199 public:
200   /// Initialize stale matcher.
201   void init(const std::vector<FlowBlock *> &Blocks,
202             const std::vector<BlendedBlockHash> &Hashes,
203             const std::vector<uint64_t> &CallHashes) {
204     assert(Blocks.size() == Hashes.size() &&
205            Hashes.size() == CallHashes.size() &&
206            "incorrect matcher initialization");
207     for (size_t I = 0; I < Blocks.size(); I++) {
208       FlowBlock *Block = Blocks[I];
209       uint16_t OpHash = Hashes[I].OpcodeHash;
210       OpHashToBlocks[OpHash].push_back(std::make_pair(Hashes[I], Block));
211       if (CallHashes[I])
212         CallHashToBlocks[CallHashes[I]].push_back(
213             std::make_pair(Hashes[I], Block));
214     }
215   }
216 
217   /// Creates a mapping from a pseudo probe to a flow block.
218   void mapProbeToBB(const MCDecodedPseudoProbe *Probe, FlowBlock *Block) {
219     BBPseudoProbeToBlock[Probe] = Block;
220   }
221 
222   enum MatchMethod : char {
223     MATCH_EXACT = 0,
224     MATCH_PROBE_EXACT,
225     MATCH_PROBE_LOOSE,
226     MATCH_OPCODE,
227     MATCH_CALL,
228     NO_MATCH
229   };
230 
231   /// Find the most similar flow block for a profile block given blended hash.
232   std::pair<const FlowBlock *, MatchMethod>
233   matchBlockStrict(BlendedBlockHash BlendedHash) {
234     const auto &[Block, ExactHash] = matchWithOpcodes(BlendedHash);
235     if (Block && ExactHash)
236       return {Block, MATCH_EXACT};
237     return {nullptr, NO_MATCH};
238   }
239 
240   /// Find the most similar flow block for a profile block given pseudo probes.
241   std::pair<const FlowBlock *, MatchMethod> matchBlockProbe(
242       const ArrayRef<yaml::bolt::PseudoProbeInfo> PseudoProbes,
243       const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) {
244     const auto &[ProbeBlock, ExactProbe] =
245         matchWithPseudoProbes(PseudoProbes, InlineTreeNodeMap);
246     if (ProbeBlock)
247       return {ProbeBlock, ExactProbe ? MATCH_PROBE_EXACT : MATCH_PROBE_LOOSE};
248     return {nullptr, NO_MATCH};
249   }
250 
251   /// Find the most similar flow block for a profile block given its hashes.
252   std::pair<const FlowBlock *, MatchMethod>
253   matchBlockLoose(BlendedBlockHash BlendedHash, uint64_t CallHash) {
254     if (const FlowBlock *CallBlock = matchWithCalls(BlendedHash, CallHash))
255       return {CallBlock, MATCH_CALL};
256     if (const FlowBlock *OpcodeBlock = matchWithOpcodes(BlendedHash).first)
257       return {OpcodeBlock, MATCH_OPCODE};
258     return {nullptr, NO_MATCH};
259   }
260 
261   /// Returns true if the two basic blocks (in the binary and in the profile)
262   /// corresponding to the given hashes are matched to each other with a high
263   /// confidence.
264   static bool isHighConfidenceMatch(BlendedBlockHash Hash1,
265                                     BlendedBlockHash Hash2) {
266     return Hash1.InstrHash == Hash2.InstrHash;
267   }
268 
269 private:
270   using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>;
271   std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks;
272   std::unordered_map<uint64_t, std::vector<HashBlockPairType>> CallHashToBlocks;
273   DenseMap<const MCDecodedPseudoProbe *, FlowBlock *> BBPseudoProbeToBlock;
274 
275   // Uses OpcodeHash to find the most similar block for a given hash.
276   std::pair<const FlowBlock *, bool>
277   matchWithOpcodes(BlendedBlockHash BlendedHash) const {
278     auto BlockIt = OpHashToBlocks.find(BlendedHash.OpcodeHash);
279     if (BlockIt == OpHashToBlocks.end())
280       return {nullptr, false};
281     FlowBlock *BestBlock = nullptr;
282     uint64_t BestDist = std::numeric_limits<uint64_t>::max();
283     BlendedBlockHash BestHash;
284     for (const auto &[Hash, Block] : BlockIt->second) {
285       uint64_t Dist = Hash.distance(BlendedHash);
286       if (BestBlock == nullptr || Dist < BestDist) {
287         BestDist = Dist;
288         BestBlock = Block;
289         BestHash = Hash;
290       }
291     }
292     return {BestBlock, isHighConfidenceMatch(BestHash, BlendedHash)};
293   }
294 
295   // Uses CallHash to find the most similar block for a given hash.
296   const FlowBlock *matchWithCalls(BlendedBlockHash BlendedHash,
297                                   uint64_t CallHash) const {
298     if (!CallHash)
299       return nullptr;
300     auto BlockIt = CallHashToBlocks.find(CallHash);
301     if (BlockIt == CallHashToBlocks.end())
302       return nullptr;
303     FlowBlock *BestBlock = nullptr;
304     uint64_t BestDist = std::numeric_limits<uint64_t>::max();
305     for (const auto &[Hash, Block] : BlockIt->second) {
306       uint64_t Dist = Hash.OpcodeHash > BlendedHash.OpcodeHash
307                           ? Hash.OpcodeHash - BlendedHash.OpcodeHash
308                           : BlendedHash.OpcodeHash - Hash.OpcodeHash;
309       if (BestBlock == nullptr || Dist < BestDist) {
310         BestDist = Dist;
311         BestBlock = Block;
312       }
313     }
314     return BestBlock;
315   }
316 
317   /// Matches a profile block with a binary block based on pseudo probes.
318   /// Returns the best matching block (or nullptr) and whether the match is
319   /// unambiguous.
320   std::pair<const FlowBlock *, bool> matchWithPseudoProbes(
321       const ArrayRef<yaml::bolt::PseudoProbeInfo> BlockPseudoProbes,
322       const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) const {
323 
324     if (!opts::StaleMatchingWithPseudoProbes)
325       return {nullptr, false};
326 
327     DenseMap<const FlowBlock *, uint32_t> FlowBlockMatchCount;
328 
329     auto matchProfileProbeToBlock = [&](uint32_t NodeId,
330                                         uint64_t ProbeId) -> const FlowBlock * {
331       const MCDecodedPseudoProbeInlineTree *BinaryNode =
332           InlineTreeNodeMap.getInlineTreeNode(NodeId);
333       if (!BinaryNode)
334         return nullptr;
335       const MCDecodedPseudoProbe Dummy(0, ProbeId, PseudoProbeType::Block, 0, 0,
336                                        nullptr);
337       ArrayRef<MCDecodedPseudoProbe> BinaryProbes = BinaryNode->getProbes();
338       auto BinaryProbeIt = llvm::lower_bound(
339           BinaryProbes, Dummy, [](const auto &LHS, const auto &RHS) {
340             return LHS.getIndex() < RHS.getIndex();
341           });
342       if (BinaryProbeIt == BinaryNode->getProbes().end() ||
343           BinaryProbeIt->getIndex() != ProbeId)
344         return nullptr;
345       auto It = BBPseudoProbeToBlock.find(&*BinaryProbeIt);
346       if (It == BBPseudoProbeToBlock.end())
347         return nullptr;
348       return It->second;
349     };
350 
351     auto matchPseudoProbeInfo = [&](const yaml::bolt::PseudoProbeInfo
352                                         &ProfileProbe,
353                                     uint32_t NodeId) {
354       for (uint64_t Index = 0; Index < 64; ++Index)
355         if (ProfileProbe.BlockMask & 1ull << Index)
356           ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, Index + 1)];
357       for (const auto &ProfileProbes :
358            {ProfileProbe.BlockProbes, ProfileProbe.IndCallProbes,
359             ProfileProbe.CallProbes})
360         for (uint64_t ProfileProbe : ProfileProbes)
361           ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, ProfileProbe)];
362     };
363 
364     for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes) {
365       if (!ProfileProbe.InlineTreeNodes.empty())
366         for (uint32_t ProfileInlineTreeNode : ProfileProbe.InlineTreeNodes)
367           matchPseudoProbeInfo(ProfileProbe, ProfileInlineTreeNode);
368       else
369         matchPseudoProbeInfo(ProfileProbe, ProfileProbe.InlineTreeIndex);
370     }
371     uint32_t BestMatchCount = 0;
372     uint32_t TotalMatchCount = 0;
373     const FlowBlock *BestMatchBlock = nullptr;
374     for (const auto &[FlowBlock, Count] : FlowBlockMatchCount) {
375       TotalMatchCount += Count;
376       if (Count < BestMatchCount || (Count == BestMatchCount && BestMatchBlock))
377         continue;
378       BestMatchBlock = FlowBlock;
379       BestMatchCount = Count;
380     }
381     return {BestMatchBlock, BestMatchCount == TotalMatchCount};
382   }
383 };
384 
385 void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const {
386   if (size() == 0)
387     return;
388 
389   assert(hasCFG() && "the function is expected to have CFG");
390 
391   std::vector<BlendedBlockHash> BlendedHashes(BasicBlocks.size());
392   std::vector<uint64_t> OpcodeHashes(BasicBlocks.size());
393   // Initialize hash components.
394   for (size_t I = 0; I < BasicBlocks.size(); I++) {
395     const BinaryBasicBlock *BB = BasicBlocks[I];
396     assert(BB->getIndex() == I && "incorrect block index");
397     BlendedHashes[I].Offset = BB->getOffset();
398     // Hashing complete instructions.
399     std::string InstrHashStr = hashBlock(
400         BC, *BB, [&](const MCOperand &Op) { return hashInstOperand(BC, Op); });
401     if (HashFunction == HashFunction::StdHash) {
402       uint64_t InstrHash = std::hash<std::string>{}(InstrHashStr);
403       BlendedHashes[I].InstrHash = (uint16_t)hash_value(InstrHash);
404     } else if (HashFunction == HashFunction::XXH3) {
405       uint64_t InstrHash = llvm::xxh3_64bits(InstrHashStr);
406       BlendedHashes[I].InstrHash = (uint16_t)InstrHash;
407     } else {
408       llvm_unreachable("Unhandled HashFunction");
409     }
410     // Hashing opcodes.
411     std::string OpcodeHashStr = hashBlockLoose(BC, *BB);
412     if (HashFunction == HashFunction::StdHash) {
413       OpcodeHashes[I] = std::hash<std::string>{}(OpcodeHashStr);
414       BlendedHashes[I].OpcodeHash = (uint16_t)hash_value(OpcodeHashes[I]);
415     } else if (HashFunction == HashFunction::XXH3) {
416       OpcodeHashes[I] = llvm::xxh3_64bits(OpcodeHashStr);
417       BlendedHashes[I].OpcodeHash = (uint16_t)OpcodeHashes[I];
418     } else {
419       llvm_unreachable("Unhandled HashFunction");
420     }
421   }
422 
423   // Initialize neighbor hash.
424   for (size_t I = 0; I < BasicBlocks.size(); I++) {
425     const BinaryBasicBlock *BB = BasicBlocks[I];
426     // Append hashes of successors.
427     uint64_t Hash = 0;
428     for (BinaryBasicBlock *SuccBB : BB->successors()) {
429       uint64_t SuccHash = OpcodeHashes[SuccBB->getIndex()];
430       Hash = hashing::detail::hash_16_bytes(Hash, SuccHash);
431     }
432     if (HashFunction == HashFunction::StdHash) {
433       // Compatibility with old behavior.
434       BlendedHashes[I].SuccHash = (uint8_t)hash_value(Hash);
435     } else {
436       BlendedHashes[I].SuccHash = (uint8_t)Hash;
437     }
438 
439     // Append hashes of predecessors.
440     Hash = 0;
441     for (BinaryBasicBlock *PredBB : BB->predecessors()) {
442       uint64_t PredHash = OpcodeHashes[PredBB->getIndex()];
443       Hash = hashing::detail::hash_16_bytes(Hash, PredHash);
444     }
445     if (HashFunction == HashFunction::StdHash) {
446       // Compatibility with old behavior.
447       BlendedHashes[I].PredHash = (uint8_t)hash_value(Hash);
448     } else {
449       BlendedHashes[I].PredHash = (uint8_t)Hash;
450     }
451   }
452 
453   //  Assign hashes.
454   for (size_t I = 0; I < BasicBlocks.size(); I++) {
455     const BinaryBasicBlock *BB = BasicBlocks[I];
456     BB->setHash(BlendedHashes[I].combine());
457   }
458 }
459 // TODO: mediate the difference between flow function construction here in BOLT
460 // and in the compiler by splitting blocks with exception throwing calls at the
461 // call and adding the landing pad as the successor.
462 /// Create a wrapper flow function to use with the profile inference algorithm,
463 /// and initialize its jumps and metadata.
464 FlowFunction
465 createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
466   FlowFunction Func;
467 
468   // Add a special "dummy" source so that there is always a unique entry point.
469   FlowBlock EntryBlock;
470   EntryBlock.Index = 0;
471   Func.Blocks.push_back(EntryBlock);
472 
473   // Create FlowBlock for every basic block in the binary function.
474   for (const BinaryBasicBlock *BB : BlockOrder) {
475     Func.Blocks.emplace_back();
476     FlowBlock &Block = Func.Blocks.back();
477     Block.Index = Func.Blocks.size() - 1;
478     (void)BB;
479     assert(Block.Index == BB->getIndex() + 1 &&
480            "incorrectly assigned basic block index");
481   }
482 
483   // Add a special "dummy" sink block so there is always a unique sink.
484   FlowBlock SinkBlock;
485   SinkBlock.Index = Func.Blocks.size();
486   Func.Blocks.push_back(SinkBlock);
487 
488   // Create FlowJump for each jump between basic blocks in the binary function.
489   std::vector<uint64_t> InDegree(Func.Blocks.size(), 0);
490   for (const BinaryBasicBlock *SrcBB : BlockOrder) {
491     std::unordered_set<const BinaryBasicBlock *> UniqueSuccs;
492     // Collect regular jumps
493     for (const BinaryBasicBlock *DstBB : SrcBB->successors()) {
494       // Ignoring parallel edges
495       if (UniqueSuccs.find(DstBB) != UniqueSuccs.end())
496         continue;
497 
498       Func.Jumps.emplace_back();
499       FlowJump &Jump = Func.Jumps.back();
500       Jump.Source = SrcBB->getIndex() + 1;
501       Jump.Target = DstBB->getIndex() + 1;
502       InDegree[Jump.Target]++;
503       UniqueSuccs.insert(DstBB);
504     }
505     // TODO: set jump from exit block to landing pad to Unlikely.
506     // If the block is an exit, add a dummy edge from it to the sink block.
507     if (UniqueSuccs.empty()) {
508       Func.Jumps.emplace_back();
509       FlowJump &Jump = Func.Jumps.back();
510       Jump.Source = SrcBB->getIndex() + 1;
511       Jump.Target = Func.Blocks.size() - 1;
512       InDegree[Jump.Target]++;
513     }
514 
515     // Collect jumps to landing pads
516     for (const BinaryBasicBlock *DstBB : SrcBB->landing_pads()) {
517       // Ignoring parallel edges
518       if (UniqueSuccs.find(DstBB) != UniqueSuccs.end())
519         continue;
520 
521       Func.Jumps.emplace_back();
522       FlowJump &Jump = Func.Jumps.back();
523       Jump.Source = SrcBB->getIndex() + 1;
524       Jump.Target = DstBB->getIndex() + 1;
525       InDegree[Jump.Target]++;
526       UniqueSuccs.insert(DstBB);
527     }
528   }
529 
530   // Add dummy edges to the extra sources. If there are multiple entry blocks,
531   // add an unlikely edge from 0 to the subsequent ones. Skips the sink block.
532   assert(InDegree[0] == 0 && "dummy entry blocks shouldn't have predecessors");
533   for (uint64_t I = 1; I < Func.Blocks.size() - 1; I++) {
534     const BinaryBasicBlock *BB = BlockOrder[I - 1];
535     if (BB->isEntryPoint() || InDegree[I] == 0) {
536       Func.Jumps.emplace_back();
537       FlowJump &Jump = Func.Jumps.back();
538       Jump.Source = 0;
539       Jump.Target = I;
540       if (!BB->isEntryPoint())
541         Jump.IsUnlikely = true;
542     }
543   }
544 
545   // Create necessary metadata for the flow function
546   for (FlowJump &Jump : Func.Jumps) {
547     assert(Jump.Source < Func.Blocks.size());
548     Func.Blocks[Jump.Source].SuccJumps.push_back(&Jump);
549     assert(Jump.Target < Func.Blocks.size());
550     Func.Blocks[Jump.Target].PredJumps.push_back(&Jump);
551   }
552   return Func;
553 }
554 
555 /// Assign initial block/jump weights based on the stale profile data. The goal
556 /// is to extract as much information from the stale profile as possible. Here
557 /// we assume that each basic block is specified via a hash value computed from
558 /// its content and the hashes of the unchanged basic blocks stay the same
559 /// across different revisions of the binary. Blocks may also have pseudo probe
560 /// information in the profile and the binary which is used for matching.
561 /// Whenever there is a count in the profile with the hash corresponding to one
562 /// of the basic blocks in the binary, the count is "matched" to the block.
563 /// Similarly, if both the source and the target of a count in the profile are
564 /// matched to a jump in the binary, the count is recorded in CFG.
565 size_t matchWeights(
566     BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder,
567     const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func,
568     HashFunction HashFunction, YAMLProfileReader::ProfileLookupMap &IdToYamlBF,
569     const BinaryFunction &BF,
570     const ArrayRef<YAMLProfileReader::ProbeMatchSpec> ProbeMatchSpecs) {
571 
572   assert(Func.Blocks.size() == BlockOrder.size() + 2);
573 
574   std::vector<uint64_t> CallHashes;
575   std::vector<FlowBlock *> Blocks;
576   std::vector<BlendedBlockHash> BlendedHashes;
577   for (uint64_t I = 0; I < BlockOrder.size(); I++) {
578     const BinaryBasicBlock *BB = BlockOrder[I];
579     assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock");
580 
581     std::string CallHashStr = hashBlockCalls(BC, *BB);
582     if (CallHashStr.empty()) {
583       CallHashes.push_back(0);
584     } else {
585       if (HashFunction == HashFunction::StdHash)
586         CallHashes.push_back(std::hash<std::string>{}(CallHashStr));
587       else if (HashFunction == HashFunction::XXH3)
588         CallHashes.push_back(llvm::xxh3_64bits(CallHashStr));
589       else
590         llvm_unreachable("Unhandled HashFunction");
591     }
592 
593     Blocks.push_back(&Func.Blocks[I + 1]);
594     BlendedBlockHash BlendedHash(BB->getHash());
595     BlendedHashes.push_back(BlendedHash);
596     LLVM_DEBUG(dbgs() << "BB with index " << I << " has hash = "
597                       << Twine::utohexstr(BB->getHash()) << "\n");
598   }
599   StaleMatcher Matcher;
600   // Collects function pseudo probes for use in the StaleMatcher.
601   if (opts::StaleMatchingWithPseudoProbes) {
602     const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder();
603     assert(Decoder &&
604            "If pseudo probes are in use, pseudo probe decoder should exist");
605     const AddressProbesMap &ProbeMap = Decoder->getAddress2ProbesMap();
606     const uint64_t FuncAddr = BF.getAddress();
607     for (const MCDecodedPseudoProbe &Probe :
608          ProbeMap.find(FuncAddr, FuncAddr + BF.getSize()))
609       if (const BinaryBasicBlock *BB =
610               BF.getBasicBlockContainingOffset(Probe.getAddress() - FuncAddr))
611         Matcher.mapProbeToBB(&Probe, Blocks[BB->getIndex()]);
612   }
613   Matcher.init(Blocks, BlendedHashes, CallHashes);
614 
615   using FlowBlockTy =
616       std::pair<const FlowBlock *, const yaml::bolt::BinaryBasicBlockProfile *>;
617   using ProfileBlockMatchMap = DenseMap<uint32_t, FlowBlockTy>;
618   // Binary profile => block index => matched block + its block profile
619   DenseMap<const yaml::bolt::BinaryFunctionProfile *, ProfileBlockMatchMap>
620       MatchedBlocks;
621 
622   // Map of FlowBlock and matching method.
623   DenseMap<const FlowBlock *, StaleMatcher::MatchMethod> MatchedFlowBlocks;
624 
625   auto addMatchedBlock =
626       [&](std::pair<const FlowBlock *, StaleMatcher::MatchMethod> BlockMethod,
627           const yaml::bolt::BinaryFunctionProfile &YamlBP,
628           const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
629         const auto &[MatchedBlock, Method] = BlockMethod;
630         if (!MatchedBlock)
631           return;
632         // Don't override earlier matches
633         if (MatchedFlowBlocks.contains(MatchedBlock))
634           return;
635         MatchedFlowBlocks.try_emplace(MatchedBlock, Method);
636         MatchedBlocks[&YamlBP][YamlBB.Index] = {MatchedBlock, &YamlBB};
637       };
638 
639   // Match blocks from the profile to the blocks in CFG by strict hash.
640   for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
641     // Update matching stats.
642     ++BC.Stats.NumStaleBlocks;
643     BC.Stats.StaleSampleCount += YamlBB.ExecCount;
644 
645     assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile");
646     BlendedBlockHash YamlHash(YamlBB.Hash);
647     addMatchedBlock(Matcher.matchBlockStrict(YamlHash), YamlBF, YamlBB);
648   }
649   // Match blocks from the profile to the blocks in CFG by pseudo probes.
650   for (const auto &[InlineNodeMap, YamlBP] : ProbeMatchSpecs) {
651     for (const yaml::bolt::BinaryBasicBlockProfile &BB : YamlBP.get().Blocks)
652       if (!BB.PseudoProbes.empty())
653         addMatchedBlock(Matcher.matchBlockProbe(BB.PseudoProbes, InlineNodeMap),
654                         YamlBP, BB);
655   }
656   // Match blocks from the profile to the blocks in CFG with loose methods.
657   for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
658     assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile");
659     BlendedBlockHash YamlHash(YamlBB.Hash);
660 
661     std::string CallHashStr = hashBlockCalls(IdToYamlBF, YamlBB);
662     uint64_t CallHash = 0;
663     if (!CallHashStr.empty()) {
664       if (HashFunction == HashFunction::StdHash)
665         CallHash = std::hash<std::string>{}(CallHashStr);
666       else if (HashFunction == HashFunction::XXH3)
667         CallHash = llvm::xxh3_64bits(CallHashStr);
668       else
669         llvm_unreachable("Unhandled HashFunction");
670     }
671     auto [MatchedBlock, Method] = Matcher.matchBlockLoose(YamlHash, CallHash);
672     if (MatchedBlock == nullptr && YamlBB.Index == 0) {
673       MatchedBlock = Blocks[0];
674       // Report as loose match
675       Method = StaleMatcher::MATCH_OPCODE;
676     }
677     if (!MatchedBlock) {
678       LLVM_DEBUG(dbgs() << "Couldn't match yaml block (bid = " << YamlBB.Index
679                         << ")" << " with hash " << Twine::utohexstr(YamlBB.Hash)
680                         << "\n");
681       continue;
682     }
683     addMatchedBlock({MatchedBlock, Method}, YamlBF, YamlBB);
684   }
685 
686   // Match jumps from the profile to the jumps from CFG
687   std::vector<uint64_t> OutWeight(Func.Blocks.size(), 0);
688   std::vector<uint64_t> InWeight(Func.Blocks.size(), 0);
689 
690   for (const auto &[YamlBF, MatchMap] : MatchedBlocks) {
691     for (const auto &[YamlBBIdx, FlowBlockProfile] : MatchMap) {
692       const auto &[MatchedBlock, YamlBB] = FlowBlockProfile;
693       StaleMatcher::MatchMethod Method = MatchedFlowBlocks.lookup(MatchedBlock);
694       BlendedBlockHash BinHash = BlendedHashes[MatchedBlock->Index - 1];
695       LLVM_DEBUG(dbgs() << "Matched yaml block (bid = " << YamlBBIdx << ")"
696                         << " with hash " << Twine::utohexstr(YamlBB->Hash)
697                         << " to BB (index = " << MatchedBlock->Index - 1 << ")"
698                         << " with hash " << Twine::utohexstr(BinHash.combine())
699                         << "\n");
700       (void)BinHash;
701       uint64_t ExecCount = YamlBB->ExecCount;
702       // Update matching stats accounting for the matched block.
703       switch (Method) {
704       case StaleMatcher::MATCH_EXACT:
705         ++BC.Stats.NumExactMatchedBlocks;
706         BC.Stats.ExactMatchedSampleCount += ExecCount;
707         LLVM_DEBUG(dbgs() << "  exact match\n");
708         break;
709       case StaleMatcher::MATCH_PROBE_EXACT:
710         ++BC.Stats.NumPseudoProbeExactMatchedBlocks;
711         BC.Stats.PseudoProbeExactMatchedSampleCount += ExecCount;
712         LLVM_DEBUG(dbgs() << "  exact pseudo probe match\n");
713         break;
714       case StaleMatcher::MATCH_PROBE_LOOSE:
715         ++BC.Stats.NumPseudoProbeLooseMatchedBlocks;
716         BC.Stats.PseudoProbeLooseMatchedSampleCount += ExecCount;
717         LLVM_DEBUG(dbgs() << "  loose pseudo probe match\n");
718         break;
719       case StaleMatcher::MATCH_CALL:
720         ++BC.Stats.NumCallMatchedBlocks;
721         BC.Stats.CallMatchedSampleCount += ExecCount;
722         LLVM_DEBUG(dbgs() << "  call match\n");
723         break;
724       case StaleMatcher::MATCH_OPCODE:
725         ++BC.Stats.NumLooseMatchedBlocks;
726         BC.Stats.LooseMatchedSampleCount += ExecCount;
727         LLVM_DEBUG(dbgs() << "  loose match\n");
728         break;
729       case StaleMatcher::NO_MATCH:
730         LLVM_DEBUG(dbgs() << "  no match\n");
731       }
732     }
733 
734     for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF->Blocks) {
735       for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) {
736         if (YamlSI.Count == 0)
737           continue;
738 
739         // Try to find the jump for a given (src, dst) pair from the profile and
740         // assign the jump weight based on the profile count
741         const uint64_t SrcIndex = YamlBB.Index;
742         const uint64_t DstIndex = YamlSI.Index;
743 
744         const FlowBlock *MatchedSrcBlock = MatchMap.lookup(SrcIndex).first;
745         const FlowBlock *MatchedDstBlock = MatchMap.lookup(DstIndex).first;
746 
747         if (MatchedSrcBlock != nullptr && MatchedDstBlock != nullptr) {
748           // Find a jump between the two blocks
749           FlowJump *Jump = nullptr;
750           for (FlowJump *SuccJump : MatchedSrcBlock->SuccJumps) {
751             if (SuccJump->Target == MatchedDstBlock->Index) {
752               Jump = SuccJump;
753               break;
754             }
755           }
756           // Assign the weight, if the corresponding jump is found
757           if (Jump != nullptr) {
758             Jump->Weight = YamlSI.Count;
759             Jump->HasUnknownWeight = false;
760           }
761         }
762         // Assign the weight for the src block, if it is found
763         if (MatchedSrcBlock != nullptr)
764           OutWeight[MatchedSrcBlock->Index] += YamlSI.Count;
765         // Assign the weight for the dst block, if it is found
766         if (MatchedDstBlock != nullptr)
767           InWeight[MatchedDstBlock->Index] += YamlSI.Count;
768       }
769     }
770   }
771 
772   // Assign block counts based on in-/out- jumps
773   for (FlowBlock &Block : Func.Blocks) {
774     if (OutWeight[Block.Index] == 0 && InWeight[Block.Index] == 0) {
775       assert(Block.HasUnknownWeight && "unmatched block with a positive count");
776       continue;
777     }
778     Block.HasUnknownWeight = false;
779     Block.Weight = std::max(OutWeight[Block.Index], InWeight[Block.Index]);
780   }
781 
782   return MatchedBlocks[&YamlBF].size();
783 }
784 
785 /// The function finds all blocks that are (i) reachable from the Entry block
786 /// and (ii) do not have a path to an exit, and marks all such blocks 'cold'
787 /// so that profi does not send any flow to such blocks.
788 void preprocessUnreachableBlocks(FlowFunction &Func) {
789   const uint64_t NumBlocks = Func.Blocks.size();
790 
791   // Start bfs from the source
792   std::queue<uint64_t> Queue;
793   std::vector<bool> VisitedEntry(NumBlocks, false);
794   for (uint64_t I = 0; I < NumBlocks; I++) {
795     FlowBlock &Block = Func.Blocks[I];
796     if (Block.isEntry()) {
797       Queue.push(I);
798       VisitedEntry[I] = true;
799       break;
800     }
801   }
802   while (!Queue.empty()) {
803     const uint64_t Src = Queue.front();
804     Queue.pop();
805     for (FlowJump *Jump : Func.Blocks[Src].SuccJumps) {
806       const uint64_t Dst = Jump->Target;
807       if (!VisitedEntry[Dst]) {
808         Queue.push(Dst);
809         VisitedEntry[Dst] = true;
810       }
811     }
812   }
813 
814   // Start bfs from all sinks
815   std::vector<bool> VisitedExit(NumBlocks, false);
816   for (uint64_t I = 0; I < NumBlocks; I++) {
817     FlowBlock &Block = Func.Blocks[I];
818     if (Block.isExit() && VisitedEntry[I]) {
819       Queue.push(I);
820       VisitedExit[I] = true;
821     }
822   }
823   while (!Queue.empty()) {
824     const uint64_t Src = Queue.front();
825     Queue.pop();
826     for (FlowJump *Jump : Func.Blocks[Src].PredJumps) {
827       const uint64_t Dst = Jump->Source;
828       if (!VisitedExit[Dst]) {
829         Queue.push(Dst);
830         VisitedExit[Dst] = true;
831       }
832     }
833   }
834 
835   // Make all blocks of zero weight so that flow is not sent
836   for (uint64_t I = 0; I < NumBlocks; I++) {
837     FlowBlock &Block = Func.Blocks[I];
838     if (Block.Weight == 0)
839       continue;
840     if (!VisitedEntry[I] || !VisitedExit[I]) {
841       Block.Weight = 0;
842       Block.HasUnknownWeight = true;
843       Block.IsUnlikely = true;
844       for (FlowJump *Jump : Block.SuccJumps) {
845         if (Jump->Source == Block.Index && Jump->Target == Block.Index) {
846           Jump->Weight = 0;
847           Jump->HasUnknownWeight = true;
848           Jump->IsUnlikely = true;
849         }
850       }
851     }
852   }
853 }
854 
855 /// Decide if stale profile matching can be applied for a given function.
856 /// Currently we skip inference for (very) large instances and for instances
857 /// having "unexpected" control flow (e.g., having no sink basic blocks).
858 bool canApplyInference(const FlowFunction &Func,
859                        const yaml::bolt::BinaryFunctionProfile &YamlBF,
860                        const uint64_t &MatchedBlocks) {
861   if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
862     return false;
863 
864   if (MatchedBlocks * 100 <
865       opts::StaleMatchingMinMatchedBlock * YamlBF.Blocks.size())
866     return false;
867 
868   // Returns false if the artificial sink block has no predecessors meaning
869   // there are no exit blocks.
870   if (Func.Blocks[Func.Blocks.size() - 1].isEntry())
871     return false;
872 
873   return true;
874 }
875 
876 /// Apply the profile inference algorithm for a given flow function.
877 void applyInference(FlowFunction &Func) {
878   ProfiParams Params;
879   // Set the params from the command-line flags.
880   Params.EvenFlowDistribution = opts::StaleMatchingEvenFlowDistribution;
881   Params.RebalanceUnknown = opts::StaleMatchingRebalanceUnknown;
882   Params.JoinIslands = opts::StaleMatchingJoinIslands;
883 
884   Params.CostBlockInc = opts::StaleMatchingCostBlockInc;
885   Params.CostBlockEntryInc = opts::StaleMatchingCostBlockInc;
886   Params.CostBlockDec = opts::StaleMatchingCostBlockDec;
887   Params.CostBlockEntryDec = opts::StaleMatchingCostBlockDec;
888   Params.CostBlockUnknownInc = opts::StaleMatchingCostBlockUnknownInc;
889 
890   Params.CostJumpInc = opts::StaleMatchingCostJumpInc;
891   Params.CostJumpFTInc = opts::StaleMatchingCostJumpInc;
892   Params.CostJumpDec = opts::StaleMatchingCostJumpDec;
893   Params.CostJumpFTDec = opts::StaleMatchingCostJumpDec;
894   Params.CostJumpUnknownInc = opts::StaleMatchingCostJumpUnknownInc;
895   Params.CostJumpUnknownFTInc = opts::StaleMatchingCostJumpUnknownFTInc;
896 
897   applyFlowInference(Params, Func);
898 }
899 
900 /// Collect inferred counts from the flow function and update annotations in
901 /// the binary function.
902 void assignProfile(BinaryFunction &BF,
903                    const BinaryFunction::BasicBlockOrderType &BlockOrder,
904                    FlowFunction &Func) {
905   BinaryContext &BC = BF.getBinaryContext();
906 
907   assert(Func.Blocks.size() == BlockOrder.size() + 2);
908   for (uint64_t I = 0; I < BlockOrder.size(); I++) {
909     FlowBlock &Block = Func.Blocks[I + 1];
910     BinaryBasicBlock *BB = BlockOrder[I];
911 
912     // Update block's count
913     BB->setExecutionCount(Block.Flow);
914 
915     // Update jump counts: (i) clean existing counts and then (ii) set new ones
916     auto BI = BB->branch_info_begin();
917     for (const BinaryBasicBlock *DstBB : BB->successors()) {
918       (void)DstBB;
919       BI->Count = 0;
920       BI->MispredictedCount = 0;
921       ++BI;
922     }
923     for (FlowJump *Jump : Block.SuccJumps) {
924       if (Jump->IsUnlikely)
925         continue;
926       if (Jump->Flow == 0)
927         continue;
928 
929       // Skips the artificial sink block.
930       if (Jump->Target == Func.Blocks.size() - 1)
931         continue;
932       BinaryBasicBlock &SuccBB = *BlockOrder[Jump->Target - 1];
933       // Check if the edge corresponds to a regular jump or a landing pad
934       if (BB->getSuccessor(SuccBB.getLabel())) {
935         BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(SuccBB);
936         BI.Count += Jump->Flow;
937       } else {
938         BinaryBasicBlock *LP = BB->getLandingPad(SuccBB.getLabel());
939         if (LP && LP->getKnownExecutionCount() < Jump->Flow)
940           LP->setExecutionCount(Jump->Flow);
941       }
942     }
943 
944     // Update call-site annotations
945     auto setOrUpdateAnnotation = [&](MCInst &Instr, StringRef Name,
946                                      uint64_t Count) {
947       if (BC.MIB->hasAnnotation(Instr, Name))
948         BC.MIB->removeAnnotation(Instr, Name);
949       // Do not add zero-count annotations
950       if (Count == 0)
951         return;
952       BC.MIB->addAnnotation(Instr, Name, Count);
953     };
954 
955     for (MCInst &Instr : *BB) {
956       // Ignore pseudo instructions
957       if (BC.MIB->isPseudo(Instr))
958         continue;
959       // Ignore jump tables
960       const MCInst *LastInstr = BB->getLastNonPseudoInstr();
961       if (BC.MIB->getJumpTable(*LastInstr) && LastInstr == &Instr)
962         continue;
963 
964       if (BC.MIB->isIndirectCall(Instr) || BC.MIB->isIndirectBranch(Instr)) {
965         auto &ICSP = BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
966             Instr, "CallProfile");
967         if (!ICSP.empty()) {
968           // Try to evenly distribute the counts among the call sites
969           const uint64_t TotalCount = Block.Flow;
970           const uint64_t NumSites = ICSP.size();
971           for (uint64_t Idx = 0; Idx < ICSP.size(); Idx++) {
972             IndirectCallProfile &CSP = ICSP[Idx];
973             uint64_t CountPerSite = TotalCount / NumSites;
974             // When counts cannot be exactly distributed, increase by 1 the
975             // counts of the first (TotalCount % NumSites) call sites
976             if (Idx < TotalCount % NumSites)
977               CountPerSite++;
978             CSP.Count = CountPerSite;
979           }
980         } else {
981           ICSP.emplace_back(nullptr, Block.Flow, 0);
982         }
983       } else if (BC.MIB->getConditionalTailCall(Instr)) {
984         // We don't know exactly the number of times the conditional tail call
985         // is executed; conservatively, setting it to the count of the block
986         setOrUpdateAnnotation(Instr, "CTCTakenCount", Block.Flow);
987         BC.MIB->removeAnnotation(Instr, "CTCMispredCount");
988       } else if (BC.MIB->isCall(Instr)) {
989         setOrUpdateAnnotation(Instr, "Count", Block.Flow);
990       }
991     }
992   }
993 
994   // Update function's execution count and mark the function inferred.
995   BF.setExecutionCount(Func.Blocks[0].Flow);
996   BF.setHasInferredProfile(true);
997 }
998 
999 bool YAMLProfileReader::inferStaleProfile(
1000     BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF,
1001     const ArrayRef<ProbeMatchSpec> ProbeMatchSpecs) {
1002 
1003   NamedRegionTimer T("inferStaleProfile", "stale profile inference", "rewrite",
1004                      "Rewrite passes", opts::TimeRewrite);
1005 
1006   if (!BF.hasCFG())
1007     return false;
1008 
1009   LLVM_DEBUG(dbgs() << "BOLT-INFO: applying profile inference for "
1010                     << "\"" << BF.getPrintName() << "\"\n");
1011 
1012   // Make sure that block hashes are up to date.
1013   BF.computeBlockHashes(YamlBP.Header.HashFunction);
1014 
1015   const BinaryFunction::BasicBlockOrderType BlockOrder(
1016       BF.getLayout().block_begin(), BF.getLayout().block_end());
1017 
1018   // Tracks the number of matched blocks.
1019 
1020   // Create a wrapper flow function to use with the profile inference algorithm.
1021   FlowFunction Func = createFlowFunction(BlockOrder);
1022 
1023   // Match as many block/jump counts from the stale profile as possible
1024   size_t MatchedBlocks =
1025       matchWeights(BF.getBinaryContext(), BlockOrder, YamlBF, Func,
1026                    YamlBP.Header.HashFunction, IdToYamLBF, BF, ProbeMatchSpecs);
1027 
1028   // Adjust the flow function by marking unreachable blocks Unlikely so that
1029   // they don't get any counts assigned.
1030   preprocessUnreachableBlocks(Func);
1031 
1032   // Check if profile inference can be applied for the instance.
1033   if (!canApplyInference(Func, YamlBF, MatchedBlocks))
1034     return false;
1035 
1036   // Apply the profile inference algorithm.
1037   applyInference(Func);
1038 
1039   // Collect inferred counts and update function annotations.
1040   assignProfile(BF, BlockOrder, Func);
1041 
1042   // As of now, we always mark the binary function having "correct" profile.
1043   // In the future, we may discard the results for instances with poor inference
1044   // metrics and keep such functions un-optimized.
1045   return true;
1046 }
1047 
1048 } // end namespace bolt
1049 } // end namespace llvm
1050