xref: /llvm-project/bolt/lib/Passes/MCF.cpp (revision f3dc732b3623c86164f9d95f7563f982cecc5558)
1 //===- bolt/Passes/MCF.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions for solving minimum-cost flow problem.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/MCF.h"
14 #include "bolt/Core/BinaryFunction.h"
15 #include "bolt/Core/ParallelUtilities.h"
16 #include "bolt/Passes/DataflowInfoManager.h"
17 #include "bolt/Utils/CommandLineOpts.h"
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/Support/CommandLine.h"
21 #include <algorithm>
22 #include <vector>
23 
24 #undef  DEBUG_TYPE
25 #define DEBUG_TYPE "mcf"
26 
27 using namespace llvm;
28 using namespace bolt;
29 
30 namespace opts {
31 
32 extern cl::OptionCategory BoltOptCategory;
33 
34 static cl::opt<bool> IterativeGuess(
35     "iterative-guess",
36     cl::desc("in non-LBR mode, guess edge counts using iterative technique"),
37     cl::Hidden, cl::cat(BoltOptCategory));
38 } // namespace opts
39 
40 namespace llvm {
41 namespace bolt {
42 
43 namespace {
44 
45 // Edge Weight Inference Heuristic
46 //
47 // We start by maintaining the invariant used in LBR mode where the sum of
48 // pred edges count is equal to the block execution count. This loop will set
49 // pred edges count by balancing its own execution count in different pred
50 // edges. The weight of each edge is guessed by looking at how hot each pred
51 // block is (in terms of samples).
52 // There are two caveats in this approach. One is for critical edges and the
53 // other is for self-referencing blocks (loops of 1 BB). For critical edges,
54 // we can't infer the hotness of them based solely on pred BBs execution
55 // count. For each critical edge we look at the pred BB, then look at its
56 // succs to adjust its weight.
57 //
58 //    [ 60  ]       [ 25 ]
59 //       |      \     |
60 //    [ 10  ]       [ 75 ]
61 //
62 // The illustration above shows a critical edge \. We wish to adjust bb count
63 // 60 to 50 to properly determine the weight of the critical edge to be
64 // 50 / 75.
65 // For self-referencing edges, we attribute its weight by subtracting the
66 // current BB execution count by the sum of predecessors count if this result
67 // is non-negative.
68 using EdgeWeightMap =
69     DenseMap<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>,
70              double>;
71 
72 template <class NodeT>
73 void updateEdgeWeight(EdgeWeightMap &EdgeWeights, const BinaryBasicBlock *A,
74                       const BinaryBasicBlock *B, double Weight);
75 
76 template <>
updateEdgeWeight(EdgeWeightMap & EdgeWeights,const BinaryBasicBlock * A,const BinaryBasicBlock * B,double Weight)77 void updateEdgeWeight<BinaryBasicBlock *>(EdgeWeightMap &EdgeWeights,
78                                           const BinaryBasicBlock *A,
79                                           const BinaryBasicBlock *B,
80                                           double Weight) {
81   EdgeWeights[std::make_pair(A, B)] = Weight;
82 }
83 
84 template <>
updateEdgeWeight(EdgeWeightMap & EdgeWeights,const BinaryBasicBlock * A,const BinaryBasicBlock * B,double Weight)85 void updateEdgeWeight<Inverse<BinaryBasicBlock *>>(EdgeWeightMap &EdgeWeights,
86                                                    const BinaryBasicBlock *A,
87                                                    const BinaryBasicBlock *B,
88                                                    double Weight) {
89   EdgeWeights[std::make_pair(B, A)] = Weight;
90 }
91 
92 template <class NodeT>
computeEdgeWeights(BinaryBasicBlock * BB,EdgeWeightMap & EdgeWeights)93 void computeEdgeWeights(BinaryBasicBlock *BB, EdgeWeightMap &EdgeWeights) {
94   typedef GraphTraits<NodeT> GraphT;
95   typedef GraphTraits<Inverse<NodeT>> InvTraits;
96 
97   double TotalChildrenCount = 0.0;
98   SmallVector<double, 4> ChildrenExecCount;
99   // First pass computes total children execution count that directly
100   // contribute to this BB.
101   for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
102                                           E = GraphT::child_end(BB);
103        CI != E; ++CI) {
104     typename GraphT::NodeRef Child = *CI;
105     double ChildExecCount = Child->getExecutionCount();
106     // Is self-reference?
107     if (Child == BB) {
108       ChildExecCount = 0.0; // will fill this in second pass
109     } else if (GraphT::child_end(BB) - GraphT::child_begin(BB) > 1 &&
110                InvTraits::child_end(Child) - InvTraits::child_begin(Child) >
111                    1) {
112       // Handle critical edges. This will cause a skew towards crit edges, but
113       // it is a quick solution.
114       double CritWeight = 0.0;
115       uint64_t Denominator = 0;
116       for (typename InvTraits::ChildIteratorType
117                II = InvTraits::child_begin(Child),
118                IE = InvTraits::child_end(Child);
119            II != IE; ++II) {
120         typename GraphT::NodeRef N = *II;
121         Denominator += N->getExecutionCount();
122         if (N != BB)
123           continue;
124         CritWeight = N->getExecutionCount();
125       }
126       if (Denominator)
127         CritWeight /= static_cast<double>(Denominator);
128       ChildExecCount *= CritWeight;
129     }
130     ChildrenExecCount.push_back(ChildExecCount);
131     TotalChildrenCount += ChildExecCount;
132   }
133   // Second pass fixes the weight of a possible self-reference edge
134   uint32_t ChildIndex = 0;
135   for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
136                                           E = GraphT::child_end(BB);
137        CI != E; ++CI) {
138     typename GraphT::NodeRef Child = *CI;
139     if (Child != BB) {
140       ++ChildIndex;
141       continue;
142     }
143     if (static_cast<double>(BB->getExecutionCount()) > TotalChildrenCount) {
144       ChildrenExecCount[ChildIndex] =
145           BB->getExecutionCount() - TotalChildrenCount;
146       TotalChildrenCount += ChildrenExecCount[ChildIndex];
147     }
148     break;
149   }
150   // Third pass finally assigns weights to edges
151   ChildIndex = 0;
152   for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
153                                           E = GraphT::child_end(BB);
154        CI != E; ++CI) {
155     typename GraphT::NodeRef Child = *CI;
156     double Weight = 1 / (GraphT::child_end(BB) - GraphT::child_begin(BB));
157     if (TotalChildrenCount != 0.0)
158       Weight = ChildrenExecCount[ChildIndex] / TotalChildrenCount;
159     updateEdgeWeight<NodeT>(EdgeWeights, BB, Child, Weight);
160     ++ChildIndex;
161   }
162 }
163 
164 template <class NodeT>
computeEdgeWeights(BinaryFunction & BF,EdgeWeightMap & EdgeWeights)165 void computeEdgeWeights(BinaryFunction &BF, EdgeWeightMap &EdgeWeights) {
166   for (BinaryBasicBlock &BB : BF)
167     computeEdgeWeights<NodeT>(&BB, EdgeWeights);
168 }
169 
170 /// Make BB count match the sum of all incoming edges. If AllEdges is true,
171 /// make it match max(SumPredEdges, SumSuccEdges).
recalculateBBCounts(BinaryFunction & BF,bool AllEdges)172 void recalculateBBCounts(BinaryFunction &BF, bool AllEdges) {
173   for (BinaryBasicBlock &BB : BF) {
174     uint64_t TotalPredsEWeight = 0;
175     for (BinaryBasicBlock *Pred : BB.predecessors())
176       TotalPredsEWeight += Pred->getBranchInfo(BB).Count;
177 
178     if (TotalPredsEWeight > BB.getExecutionCount())
179       BB.setExecutionCount(TotalPredsEWeight);
180 
181     if (!AllEdges)
182       continue;
183 
184     uint64_t TotalSuccsEWeight = 0;
185     for (BinaryBasicBlock::BinaryBranchInfo &BI : BB.branch_info())
186       TotalSuccsEWeight += BI.Count;
187 
188     if (TotalSuccsEWeight > BB.getExecutionCount())
189       BB.setExecutionCount(TotalSuccsEWeight);
190   }
191 }
192 
193 // This is our main edge count guessing heuristic. Look at predecessors and
194 // assign a proportionally higher count to pred edges coming from blocks with
195 // a higher execution count in comparison with the other predecessor blocks,
196 // making SumPredEdges match the current BB count.
197 // If "UseSucc" is true, apply the same logic to successor edges as well. Since
198 // some successor edges may already have assigned a count, only update it if the
199 // new count is higher.
guessEdgeByRelHotness(BinaryFunction & BF,bool UseSucc,EdgeWeightMap & PredEdgeWeights,EdgeWeightMap & SuccEdgeWeights)200 void guessEdgeByRelHotness(BinaryFunction &BF, bool UseSucc,
201                            EdgeWeightMap &PredEdgeWeights,
202                            EdgeWeightMap &SuccEdgeWeights) {
203   for (BinaryBasicBlock &BB : BF) {
204     for (BinaryBasicBlock *Pred : BB.predecessors()) {
205       double RelativeExec = PredEdgeWeights[std::make_pair(Pred, &BB)];
206       RelativeExec *= BB.getExecutionCount();
207       BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB);
208       if (static_cast<uint64_t>(RelativeExec) > BI.Count)
209         BI.Count = static_cast<uint64_t>(RelativeExec);
210     }
211 
212     if (!UseSucc)
213       continue;
214 
215     auto BI = BB.branch_info_begin();
216     for (BinaryBasicBlock *Succ : BB.successors()) {
217       double RelativeExec = SuccEdgeWeights[std::make_pair(&BB, Succ)];
218       RelativeExec *= BB.getExecutionCount();
219       if (static_cast<uint64_t>(RelativeExec) > BI->Count)
220         BI->Count = static_cast<uint64_t>(RelativeExec);
221       ++BI;
222     }
223   }
224 }
225 
226 using ArcSet =
227     DenseSet<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>>;
228 
229 /// Predecessor edges version of guessEdgeByIterativeApproach. GuessedArcs has
230 /// all edges we already established their count. Try to guess the count of
231 /// the remaining edge, if there is only one to guess, and return true if we
232 /// were able to guess.
guessPredEdgeCounts(BinaryBasicBlock * BB,ArcSet & GuessedArcs)233 bool guessPredEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
234   if (BB->pred_size() == 0)
235     return false;
236 
237   uint64_t TotalPredCount = 0;
238   unsigned NumGuessedEdges = 0;
239   for (BinaryBasicBlock *Pred : BB->predecessors()) {
240     if (GuessedArcs.count(std::make_pair(Pred, BB)))
241       ++NumGuessedEdges;
242     TotalPredCount += Pred->getBranchInfo(*BB).Count;
243   }
244 
245   if (NumGuessedEdges != BB->pred_size() - 1)
246     return false;
247 
248   int64_t Guessed =
249       static_cast<int64_t>(BB->getExecutionCount()) - TotalPredCount;
250   if (Guessed < 0)
251     Guessed = 0;
252 
253   for (BinaryBasicBlock *Pred : BB->predecessors()) {
254     if (GuessedArcs.count(std::make_pair(Pred, BB)))
255       continue;
256 
257     Pred->getBranchInfo(*BB).Count = Guessed;
258     GuessedArcs.insert(std::make_pair(Pred, BB));
259     return true;
260   }
261   llvm_unreachable("Expected unguessed arc");
262 }
263 
264 /// Successor edges version of guessEdgeByIterativeApproach. GuessedArcs has
265 /// all edges we already established their count. Try to guess the count of
266 /// the remaining edge, if there is only one to guess, and return true if we
267 /// were able to guess.
guessSuccEdgeCounts(BinaryBasicBlock * BB,ArcSet & GuessedArcs)268 bool guessSuccEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
269   if (BB->succ_size() == 0)
270     return false;
271 
272   uint64_t TotalSuccCount = 0;
273   unsigned NumGuessedEdges = 0;
274   auto BI = BB->branch_info_begin();
275   for (BinaryBasicBlock *Succ : BB->successors()) {
276     if (GuessedArcs.count(std::make_pair(BB, Succ)))
277       ++NumGuessedEdges;
278     TotalSuccCount += BI->Count;
279     ++BI;
280   }
281 
282   if (NumGuessedEdges != BB->succ_size() - 1)
283     return false;
284 
285   int64_t Guessed =
286       static_cast<int64_t>(BB->getExecutionCount()) - TotalSuccCount;
287   if (Guessed < 0)
288     Guessed = 0;
289 
290   BI = BB->branch_info_begin();
291   for (BinaryBasicBlock *Succ : BB->successors()) {
292     if (GuessedArcs.count(std::make_pair(BB, Succ))) {
293       ++BI;
294       continue;
295     }
296 
297     BI->Count = Guessed;
298     GuessedArcs.insert(std::make_pair(BB, Succ));
299     return true;
300   }
301   llvm_unreachable("Expected unguessed arc");
302 }
303 
304 /// Guess edge count whenever we have only one edge (pred or succ) left
305 /// to guess. Then make its count equal to BB count minus all other edge
306 /// counts we already know their count. Repeat this until there is no
307 /// change.
guessEdgeByIterativeApproach(BinaryFunction & BF)308 void guessEdgeByIterativeApproach(BinaryFunction &BF) {
309   ArcSet KnownArcs;
310   bool Changed = false;
311 
312   do {
313     Changed = false;
314     for (BinaryBasicBlock &BB : BF) {
315       if (guessPredEdgeCounts(&BB, KnownArcs))
316         Changed = true;
317       if (guessSuccEdgeCounts(&BB, KnownArcs))
318         Changed = true;
319     }
320   } while (Changed);
321 
322   // Guess count for non-inferred edges
323   for (BinaryBasicBlock &BB : BF) {
324     for (BinaryBasicBlock *Pred : BB.predecessors()) {
325       if (KnownArcs.count(std::make_pair(Pred, &BB)))
326         continue;
327       BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB);
328       BI.Count =
329           std::min(Pred->getExecutionCount(), BB.getExecutionCount()) / 2;
330       KnownArcs.insert(std::make_pair(Pred, &BB));
331     }
332     auto BI = BB.branch_info_begin();
333     for (BinaryBasicBlock *Succ : BB.successors()) {
334       if (KnownArcs.count(std::make_pair(&BB, Succ))) {
335         ++BI;
336         continue;
337       }
338       BI->Count =
339           std::min(BB.getExecutionCount(), Succ->getExecutionCount()) / 2;
340       KnownArcs.insert(std::make_pair(&BB, Succ));
341       break;
342     }
343   }
344 }
345 
346 /// Associate each basic block with the BinaryLoop object corresponding to the
347 /// innermost loop containing this block.
348 DenseMap<const BinaryBasicBlock *, const BinaryLoop *>
createLoopNestLevelMap(BinaryFunction & BF)349 createLoopNestLevelMap(BinaryFunction &BF) {
350   DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel;
351   const BinaryLoopInfo &BLI = BF.getLoopInfo();
352 
353   for (BinaryBasicBlock &BB : BF)
354     LoopNestLevel[&BB] = BLI[&BB];
355 
356   return LoopNestLevel;
357 }
358 
359 } // end anonymous namespace
360 
equalizeBBCounts(DataflowInfoManager & Info,BinaryFunction & BF)361 void equalizeBBCounts(DataflowInfoManager &Info, BinaryFunction &BF) {
362   if (BF.begin() == BF.end())
363     return;
364 
365   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
366   DominatorAnalysis<true> &PDA = Info.getPostDominatorAnalysis();
367   auto &InsnToBB = Info.getInsnToBBMap();
368   // These analyses work at the instruction granularity, but we really only need
369   // basic block granularity here. So we'll use a set of visited edges to avoid
370   // revisiting the same BBs again and again.
371   DenseMap<const BinaryBasicBlock *, std::set<const BinaryBasicBlock *>>
372       Visited;
373   // Equivalence classes mapping. Each equivalence class is defined by the set
374   // of BBs that obeys the aforementioned properties.
375   DenseMap<const BinaryBasicBlock *, signed> BBsToEC;
376   std::vector<std::vector<BinaryBasicBlock *>> Classes;
377 
378   BF.calculateLoopInfo();
379   DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel =
380       createLoopNestLevelMap(BF);
381 
382   for (BinaryBasicBlock &BB : BF)
383     BBsToEC[&BB] = -1;
384 
385   for (BinaryBasicBlock &BB : BF) {
386     auto I = BB.begin();
387     if (I == BB.end())
388       continue;
389 
390     DA.doForAllDominators(*I, [&](const MCInst &DomInst) {
391       BinaryBasicBlock *DomBB = InsnToBB[&DomInst];
392       if (Visited[DomBB].count(&BB))
393         return;
394       Visited[DomBB].insert(&BB);
395       if (!PDA.doesADominateB(*I, DomInst))
396         return;
397       if (LoopNestLevel[&BB] != LoopNestLevel[DomBB])
398         return;
399       if (BBsToEC[DomBB] == -1 && BBsToEC[&BB] == -1) {
400         BBsToEC[DomBB] = Classes.size();
401         BBsToEC[&BB] = Classes.size();
402         Classes.emplace_back();
403         Classes.back().push_back(DomBB);
404         Classes.back().push_back(&BB);
405         return;
406       }
407       if (BBsToEC[DomBB] == -1) {
408         BBsToEC[DomBB] = BBsToEC[&BB];
409         Classes[BBsToEC[&BB]].push_back(DomBB);
410         return;
411       }
412       if (BBsToEC[&BB] == -1) {
413         BBsToEC[&BB] = BBsToEC[DomBB];
414         Classes[BBsToEC[DomBB]].push_back(&BB);
415         return;
416       }
417       signed BBECNum = BBsToEC[&BB];
418       std::vector<BinaryBasicBlock *> DomEC = Classes[BBsToEC[DomBB]];
419       std::vector<BinaryBasicBlock *> BBEC = Classes[BBECNum];
420       for (BinaryBasicBlock *Block : DomEC) {
421         BBsToEC[Block] = BBECNum;
422         BBEC.push_back(Block);
423       }
424       DomEC.clear();
425     });
426   }
427 
428   for (std::vector<BinaryBasicBlock *> &Class : Classes) {
429     uint64_t Max = 0ULL;
430     for (BinaryBasicBlock *BB : Class)
431       Max = std::max(Max, BB->getExecutionCount());
432     for (BinaryBasicBlock *BB : Class)
433       BB->setExecutionCount(Max);
434   }
435 }
436 
runOnFunction(BinaryFunction & BF)437 void EstimateEdgeCounts::runOnFunction(BinaryFunction &BF) {
438   EdgeWeightMap PredEdgeWeights;
439   EdgeWeightMap SuccEdgeWeights;
440   if (!opts::IterativeGuess) {
441     computeEdgeWeights<Inverse<BinaryBasicBlock *>>(BF, PredEdgeWeights);
442     computeEdgeWeights<BinaryBasicBlock *>(BF, SuccEdgeWeights);
443   }
444   if (opts::EqualizeBBCounts) {
445     LLVM_DEBUG(BF.print(dbgs(), "before equalize BB counts"));
446     auto Info = DataflowInfoManager(BF, nullptr, nullptr);
447     equalizeBBCounts(Info, BF);
448     LLVM_DEBUG(BF.print(dbgs(), "after equalize BB counts"));
449   }
450   if (opts::IterativeGuess)
451     guessEdgeByIterativeApproach(BF);
452   else
453     guessEdgeByRelHotness(BF, /*UseSuccs=*/false, PredEdgeWeights,
454                           SuccEdgeWeights);
455   recalculateBBCounts(BF, /*AllEdges=*/false);
456 }
457 
runOnFunctions(BinaryContext & BC)458 Error EstimateEdgeCounts::runOnFunctions(BinaryContext &BC) {
459   if (llvm::none_of(llvm::make_second_range(BC.getBinaryFunctions()),
460                     [](const BinaryFunction &BF) {
461                       return BF.getProfileFlags() == BinaryFunction::PF_SAMPLE;
462                     }))
463     return Error::success();
464 
465   ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
466     runOnFunction(BF);
467   };
468   ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
469     return BF.getProfileFlags() != BinaryFunction::PF_SAMPLE;
470   };
471 
472   ParallelUtilities::runOnEachFunction(
473       BC, ParallelUtilities::SchedulingPolicy::SP_BB_QUADRATIC, WorkFun,
474       SkipFunc, "EstimateEdgeCounts");
475   return Error::success();
476 }
477 
478 } // namespace bolt
479 } // namespace llvm
480