xref: /llvm-project/bolt/include/bolt/Passes/ReorderAlgorithm.h (revision 1a2f83366b86433bb86f3b60fa19b3f096313a21)
1 //===- bolt/Passes/ReorderAlgorithm.h - Basic block reorderng ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Interface to different basic block reordering algorithms.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef BOLT_PASSES_REORDER_ALGORITHM_H
14 #define BOLT_PASSES_REORDER_ALGORITHM_H
15 
16 #include "bolt/Core/BinaryFunction.h"
17 #include <memory>
18 #include <unordered_map>
19 #include <vector>
20 
21 namespace llvm {
22 
23 class raw_ostream;
24 
25 namespace bolt {
26 
27 /// Objects of this class implement various basic block clustering algorithms.
28 /// Basic block clusters are chains of basic blocks that should be laid out
29 /// in this order to maximize performace. These algorithms group basic blocks
30 /// into clusters using execution profile data and various heuristics.
31 class ClusterAlgorithm {
32 public:
33   using ClusterTy = std::vector<BinaryBasicBlock *>;
34   std::vector<ClusterTy> Clusters;
35   std::vector<std::unordered_map<uint32_t, uint64_t>> ClusterEdges;
36   std::vector<double> AvgFreq;
37 
38   /// Group the basic blocks in the given function into clusters stored in the
39   /// Clusters vector. Also encode relative weights between two clusters in
40   /// the ClusterEdges vector if requested. This vector is indexed by
41   /// the clusters indices in the Clusters vector.
42   virtual void clusterBasicBlocks(BinaryFunction &BF,
43                                   bool ComputeEdges = false) = 0;
44 
45   /// Compute for each cluster its average execution frequency, that is
46   /// the sum of average frequencies of its blocks (execution count / # instrs).
47   /// The average frequencies are stored in the AvgFreq vector, index by the
48   /// cluster indices in the Clusters vector.
49   void computeClusterAverageFrequency(const BinaryContext &BC);
50 
51   /// Clear clusters and related info.
52   virtual void reset();
53 
54   void printClusters() const;
55 
~ClusterAlgorithm()56   virtual ~ClusterAlgorithm() {}
57 };
58 
59 /// Base class for a greedy clustering algorithm that selects edges in order
60 /// based on some heuristic and uses them to join basic blocks into clusters.
61 class GreedyClusterAlgorithm : public ClusterAlgorithm {
62 protected:
63   // Represents an edge between two basic blocks, with source, destination, and
64   // profile count.
65   struct EdgeTy {
66     const BinaryBasicBlock *Src;
67     const BinaryBasicBlock *Dst;
68     uint64_t Count;
69 
EdgeTyEdgeTy70     EdgeTy(const BinaryBasicBlock *Src, const BinaryBasicBlock *Dst,
71            uint64_t Count)
72         : Src(Src), Dst(Dst), Count(Count) {}
73 
74     void print(raw_ostream &OS) const;
75   };
76 
77   struct EdgeHash {
78     size_t operator()(const EdgeTy &E) const;
79   };
80 
81   struct EdgeEqual {
82     bool operator()(const EdgeTy &A, const EdgeTy &B) const;
83   };
84 
85   // Virtual methods that allow custom specialization of the heuristic used by
86   // the algorithm to select edges.
87   virtual void initQueue(std::vector<EdgeTy> &Queue,
88                          const BinaryFunction &BF) = 0;
89   virtual void adjustQueue(std::vector<EdgeTy> &Queue,
90                            const BinaryFunction &BF) = 0;
91   virtual bool areClustersCompatible(const ClusterTy &Front,
92                                      const ClusterTy &Back,
93                                      const EdgeTy &E) const = 0;
94 
95   // Map from basic block to owning cluster index.
96   using BBToClusterMapTy =
97       std::unordered_map<const BinaryBasicBlock *, unsigned>;
98   BBToClusterMapTy BBToClusterMap;
99 
100 public:
101   void clusterBasicBlocks(BinaryFunction &BF,
102                           bool ComputeEdges = false) override;
103   void reset() override;
104 };
105 
106 /// This clustering algorithm is based on a greedy heuristic suggested by
107 /// Pettis and Hansen (PLDI '90).
108 class PHGreedyClusterAlgorithm : public GreedyClusterAlgorithm {
109 protected:
110   void initQueue(std::vector<EdgeTy> &Queue, const BinaryFunction &BF) override;
111   void adjustQueue(std::vector<EdgeTy> &Queue,
112                    const BinaryFunction &BF) override;
113   bool areClustersCompatible(const ClusterTy &Front, const ClusterTy &Back,
114                              const EdgeTy &E) const override;
115 };
116 
117 /// This clustering algorithm is based on a greedy heuristic that is a
118 /// modification of the heuristic suggested by Pettis (PLDI '90). It is
119 /// geared towards minimizing branches.
120 class MinBranchGreedyClusterAlgorithm : public GreedyClusterAlgorithm {
121 private:
122   // Map from an edge to its weight which is used by the algorithm to sort the
123   // edges.
124   std::unordered_map<EdgeTy, int64_t, EdgeHash, EdgeEqual> Weight;
125 
126   // The weight of an edge is calculated as the win in branches if we choose
127   // to layout this edge as a fall-through. For example, consider the edges
128   //  A -> B with execution count 500,
129   //  A -> C with execution count 100, and
130   //  D -> B with execution count 150
131   // where B, C are the only successors of A and A, D are the only predecessors
132   // of B. Then if we choose to layout edge A -> B as a fallthrough, the win in
133   // branches would be 500 - 100 - 150 = 250. That is the weight of edge A->B.
134   int64_t calculateWeight(const EdgeTy &E, const BinaryFunction &BF) const;
135 
136 protected:
137   void initQueue(std::vector<EdgeTy> &Queue, const BinaryFunction &BF) override;
138   void adjustQueue(std::vector<EdgeTy> &Queue,
139                    const BinaryFunction &BF) override;
140   bool areClustersCompatible(const ClusterTy &Front, const ClusterTy &Back,
141                              const EdgeTy &E) const override;
142 
143 public:
144   void reset() override;
145 };
146 
147 /// Objects of this class implement various basic block reordering algorithms.
148 /// Most of these algorithms depend on a clustering algorithm.
149 /// Here we have 3 conflicting goals as to how to layout clusters. If we want
150 /// to minimize jump offsets, we should put clusters with heavy inter-cluster
151 /// dependence as close as possible. If we want to maximize the probability
152 /// that all inter-cluster edges are predicted as not-taken, we should enforce
153 /// a topological order to make targets appear after sources, creating forward
154 /// branches. If we want to separate hot from cold blocks to maximize the
155 /// probability that unfrequently executed code doesn't pollute the cache, we
156 /// should put clusters in descending order of hotness.
157 class ReorderAlgorithm {
158 protected:
159   std::unique_ptr<ClusterAlgorithm> CAlgo;
160 
161 public:
ReorderAlgorithm()162   ReorderAlgorithm() {}
ReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo)163   explicit ReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo)
164       : CAlgo(std::move(CAlgo)) {}
165 
166   using BasicBlockOrder = BinaryFunction::BasicBlockOrderType;
167 
168   /// Reorder the basic blocks of the given function and store the new order in
169   /// the new Clusters vector.
170   virtual void reorderBasicBlocks(BinaryFunction &BF,
171                                   BasicBlockOrder &Order) const = 0;
172 
setClusterAlgorithm(ClusterAlgorithm * CAlgo)173   void setClusterAlgorithm(ClusterAlgorithm *CAlgo) {
174     this->CAlgo.reset(CAlgo);
175   }
176 
~ReorderAlgorithm()177   virtual ~ReorderAlgorithm() {}
178 };
179 
180 /// Dynamic programming implementation for the TSP, applied to BB layout. Find
181 /// the optimal way to maximize weight during a path traversing all BBs. In
182 /// this way, we will convert the hottest branches into fall-throughs.
183 ///
184 /// Uses exponential amount of memory on the number of basic blocks and should
185 /// only be used for small functions.
186 class TSPReorderAlgorithm : public ReorderAlgorithm {
187 public:
188   void reorderBasicBlocks(BinaryFunction &BF,
189                           BasicBlockOrder &Order) const override;
190 };
191 
192 /// Simple algorithm that groups basic blocks into clusters and then
193 /// lays them out cluster after cluster.
194 class OptimizeReorderAlgorithm : public ReorderAlgorithm {
195 public:
OptimizeReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo)196   explicit OptimizeReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo)
197       : ReorderAlgorithm(std::move(CAlgo)) {}
198 
199   void reorderBasicBlocks(BinaryFunction &BF,
200                           BasicBlockOrder &Order) const override;
201 };
202 
203 /// This reorder algorithm tries to ensure that all inter-cluster edges are
204 /// predicted as not-taken, by enforcing a topological order to make
205 /// targets appear after sources, creating forward branches.
206 class OptimizeBranchReorderAlgorithm : public ReorderAlgorithm {
207 public:
OptimizeBranchReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo)208   explicit OptimizeBranchReorderAlgorithm(
209       std::unique_ptr<ClusterAlgorithm> CAlgo)
210       : ReorderAlgorithm(std::move(CAlgo)) {}
211 
212   void reorderBasicBlocks(BinaryFunction &BF,
213                           BasicBlockOrder &Order) const override;
214 };
215 
216 /// This reorder tries to separate hot from cold blocks to maximize the
217 /// probability that unfrequently executed code doesn't pollute the cache, by
218 /// putting clusters in descending order of hotness.
219 class OptimizeCacheReorderAlgorithm : public ReorderAlgorithm {
220 public:
OptimizeCacheReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo)221   explicit OptimizeCacheReorderAlgorithm(
222       std::unique_ptr<ClusterAlgorithm> CAlgo)
223       : ReorderAlgorithm(std::move(CAlgo)) {}
224 
225   void reorderBasicBlocks(BinaryFunction &BF,
226                           BasicBlockOrder &Order) const override;
227 };
228 
229 /// A new reordering algorithm for basic blocks, ext-tsp
230 class ExtTSPReorderAlgorithm : public ReorderAlgorithm {
231 public:
232   void reorderBasicBlocks(BinaryFunction &BF,
233                           BasicBlockOrder &Order) const override;
234 };
235 
236 /// Toy example that simply reverses the original basic block order.
237 class ReverseReorderAlgorithm : public ReorderAlgorithm {
238 public:
239   void reorderBasicBlocks(BinaryFunction &BF,
240                           BasicBlockOrder &Order) const override;
241 };
242 
243 /// Create clusters as usual and place them in random order.
244 class RandomClusterReorderAlgorithm : public ReorderAlgorithm {
245 public:
RandomClusterReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo)246   explicit RandomClusterReorderAlgorithm(
247       std::unique_ptr<ClusterAlgorithm> CAlgo)
248       : ReorderAlgorithm(std::move(CAlgo)) {}
249 
250   void reorderBasicBlocks(BinaryFunction &BF,
251                           BasicBlockOrder &Order) const override;
252 };
253 
254 } // namespace bolt
255 } // namespace llvm
256 
257 #endif
258