1 //===- bolt/Passes/ReorderAlgorithm.h - Basic block reorderng ---*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Interface to different basic block reordering algorithms. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef BOLT_PASSES_REORDER_ALGORITHM_H 14 #define BOLT_PASSES_REORDER_ALGORITHM_H 15 16 #include "bolt/Core/BinaryFunction.h" 17 #include <memory> 18 #include <unordered_map> 19 #include <vector> 20 21 namespace llvm { 22 23 class raw_ostream; 24 25 namespace bolt { 26 27 /// Objects of this class implement various basic block clustering algorithms. 28 /// Basic block clusters are chains of basic blocks that should be laid out 29 /// in this order to maximize performace. These algorithms group basic blocks 30 /// into clusters using execution profile data and various heuristics. 31 class ClusterAlgorithm { 32 public: 33 using ClusterTy = std::vector<BinaryBasicBlock *>; 34 std::vector<ClusterTy> Clusters; 35 std::vector<std::unordered_map<uint32_t, uint64_t>> ClusterEdges; 36 std::vector<double> AvgFreq; 37 38 /// Group the basic blocks in the given function into clusters stored in the 39 /// Clusters vector. Also encode relative weights between two clusters in 40 /// the ClusterEdges vector if requested. This vector is indexed by 41 /// the clusters indices in the Clusters vector. 42 virtual void clusterBasicBlocks(BinaryFunction &BF, 43 bool ComputeEdges = false) = 0; 44 45 /// Compute for each cluster its average execution frequency, that is 46 /// the sum of average frequencies of its blocks (execution count / # instrs). 47 /// The average frequencies are stored in the AvgFreq vector, index by the 48 /// cluster indices in the Clusters vector. 49 void computeClusterAverageFrequency(const BinaryContext &BC); 50 51 /// Clear clusters and related info. 52 virtual void reset(); 53 54 void printClusters() const; 55 ~ClusterAlgorithm()56 virtual ~ClusterAlgorithm() {} 57 }; 58 59 /// Base class for a greedy clustering algorithm that selects edges in order 60 /// based on some heuristic and uses them to join basic blocks into clusters. 61 class GreedyClusterAlgorithm : public ClusterAlgorithm { 62 protected: 63 // Represents an edge between two basic blocks, with source, destination, and 64 // profile count. 65 struct EdgeTy { 66 const BinaryBasicBlock *Src; 67 const BinaryBasicBlock *Dst; 68 uint64_t Count; 69 EdgeTyEdgeTy70 EdgeTy(const BinaryBasicBlock *Src, const BinaryBasicBlock *Dst, 71 uint64_t Count) 72 : Src(Src), Dst(Dst), Count(Count) {} 73 74 void print(raw_ostream &OS) const; 75 }; 76 77 struct EdgeHash { 78 size_t operator()(const EdgeTy &E) const; 79 }; 80 81 struct EdgeEqual { 82 bool operator()(const EdgeTy &A, const EdgeTy &B) const; 83 }; 84 85 // Virtual methods that allow custom specialization of the heuristic used by 86 // the algorithm to select edges. 87 virtual void initQueue(std::vector<EdgeTy> &Queue, 88 const BinaryFunction &BF) = 0; 89 virtual void adjustQueue(std::vector<EdgeTy> &Queue, 90 const BinaryFunction &BF) = 0; 91 virtual bool areClustersCompatible(const ClusterTy &Front, 92 const ClusterTy &Back, 93 const EdgeTy &E) const = 0; 94 95 // Map from basic block to owning cluster index. 96 using BBToClusterMapTy = 97 std::unordered_map<const BinaryBasicBlock *, unsigned>; 98 BBToClusterMapTy BBToClusterMap; 99 100 public: 101 void clusterBasicBlocks(BinaryFunction &BF, 102 bool ComputeEdges = false) override; 103 void reset() override; 104 }; 105 106 /// This clustering algorithm is based on a greedy heuristic suggested by 107 /// Pettis and Hansen (PLDI '90). 108 class PHGreedyClusterAlgorithm : public GreedyClusterAlgorithm { 109 protected: 110 void initQueue(std::vector<EdgeTy> &Queue, const BinaryFunction &BF) override; 111 void adjustQueue(std::vector<EdgeTy> &Queue, 112 const BinaryFunction &BF) override; 113 bool areClustersCompatible(const ClusterTy &Front, const ClusterTy &Back, 114 const EdgeTy &E) const override; 115 }; 116 117 /// This clustering algorithm is based on a greedy heuristic that is a 118 /// modification of the heuristic suggested by Pettis (PLDI '90). It is 119 /// geared towards minimizing branches. 120 class MinBranchGreedyClusterAlgorithm : public GreedyClusterAlgorithm { 121 private: 122 // Map from an edge to its weight which is used by the algorithm to sort the 123 // edges. 124 std::unordered_map<EdgeTy, int64_t, EdgeHash, EdgeEqual> Weight; 125 126 // The weight of an edge is calculated as the win in branches if we choose 127 // to layout this edge as a fall-through. For example, consider the edges 128 // A -> B with execution count 500, 129 // A -> C with execution count 100, and 130 // D -> B with execution count 150 131 // where B, C are the only successors of A and A, D are the only predecessors 132 // of B. Then if we choose to layout edge A -> B as a fallthrough, the win in 133 // branches would be 500 - 100 - 150 = 250. That is the weight of edge A->B. 134 int64_t calculateWeight(const EdgeTy &E, const BinaryFunction &BF) const; 135 136 protected: 137 void initQueue(std::vector<EdgeTy> &Queue, const BinaryFunction &BF) override; 138 void adjustQueue(std::vector<EdgeTy> &Queue, 139 const BinaryFunction &BF) override; 140 bool areClustersCompatible(const ClusterTy &Front, const ClusterTy &Back, 141 const EdgeTy &E) const override; 142 143 public: 144 void reset() override; 145 }; 146 147 /// Objects of this class implement various basic block reordering algorithms. 148 /// Most of these algorithms depend on a clustering algorithm. 149 /// Here we have 3 conflicting goals as to how to layout clusters. If we want 150 /// to minimize jump offsets, we should put clusters with heavy inter-cluster 151 /// dependence as close as possible. If we want to maximize the probability 152 /// that all inter-cluster edges are predicted as not-taken, we should enforce 153 /// a topological order to make targets appear after sources, creating forward 154 /// branches. If we want to separate hot from cold blocks to maximize the 155 /// probability that unfrequently executed code doesn't pollute the cache, we 156 /// should put clusters in descending order of hotness. 157 class ReorderAlgorithm { 158 protected: 159 std::unique_ptr<ClusterAlgorithm> CAlgo; 160 161 public: ReorderAlgorithm()162 ReorderAlgorithm() {} ReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo)163 explicit ReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo) 164 : CAlgo(std::move(CAlgo)) {} 165 166 using BasicBlockOrder = BinaryFunction::BasicBlockOrderType; 167 168 /// Reorder the basic blocks of the given function and store the new order in 169 /// the new Clusters vector. 170 virtual void reorderBasicBlocks(BinaryFunction &BF, 171 BasicBlockOrder &Order) const = 0; 172 setClusterAlgorithm(ClusterAlgorithm * CAlgo)173 void setClusterAlgorithm(ClusterAlgorithm *CAlgo) { 174 this->CAlgo.reset(CAlgo); 175 } 176 ~ReorderAlgorithm()177 virtual ~ReorderAlgorithm() {} 178 }; 179 180 /// Dynamic programming implementation for the TSP, applied to BB layout. Find 181 /// the optimal way to maximize weight during a path traversing all BBs. In 182 /// this way, we will convert the hottest branches into fall-throughs. 183 /// 184 /// Uses exponential amount of memory on the number of basic blocks and should 185 /// only be used for small functions. 186 class TSPReorderAlgorithm : public ReorderAlgorithm { 187 public: 188 void reorderBasicBlocks(BinaryFunction &BF, 189 BasicBlockOrder &Order) const override; 190 }; 191 192 /// Simple algorithm that groups basic blocks into clusters and then 193 /// lays them out cluster after cluster. 194 class OptimizeReorderAlgorithm : public ReorderAlgorithm { 195 public: OptimizeReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo)196 explicit OptimizeReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo) 197 : ReorderAlgorithm(std::move(CAlgo)) {} 198 199 void reorderBasicBlocks(BinaryFunction &BF, 200 BasicBlockOrder &Order) const override; 201 }; 202 203 /// This reorder algorithm tries to ensure that all inter-cluster edges are 204 /// predicted as not-taken, by enforcing a topological order to make 205 /// targets appear after sources, creating forward branches. 206 class OptimizeBranchReorderAlgorithm : public ReorderAlgorithm { 207 public: OptimizeBranchReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo)208 explicit OptimizeBranchReorderAlgorithm( 209 std::unique_ptr<ClusterAlgorithm> CAlgo) 210 : ReorderAlgorithm(std::move(CAlgo)) {} 211 212 void reorderBasicBlocks(BinaryFunction &BF, 213 BasicBlockOrder &Order) const override; 214 }; 215 216 /// This reorder tries to separate hot from cold blocks to maximize the 217 /// probability that unfrequently executed code doesn't pollute the cache, by 218 /// putting clusters in descending order of hotness. 219 class OptimizeCacheReorderAlgorithm : public ReorderAlgorithm { 220 public: OptimizeCacheReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo)221 explicit OptimizeCacheReorderAlgorithm( 222 std::unique_ptr<ClusterAlgorithm> CAlgo) 223 : ReorderAlgorithm(std::move(CAlgo)) {} 224 225 void reorderBasicBlocks(BinaryFunction &BF, 226 BasicBlockOrder &Order) const override; 227 }; 228 229 /// A new reordering algorithm for basic blocks, ext-tsp 230 class ExtTSPReorderAlgorithm : public ReorderAlgorithm { 231 public: 232 void reorderBasicBlocks(BinaryFunction &BF, 233 BasicBlockOrder &Order) const override; 234 }; 235 236 /// Toy example that simply reverses the original basic block order. 237 class ReverseReorderAlgorithm : public ReorderAlgorithm { 238 public: 239 void reorderBasicBlocks(BinaryFunction &BF, 240 BasicBlockOrder &Order) const override; 241 }; 242 243 /// Create clusters as usual and place them in random order. 244 class RandomClusterReorderAlgorithm : public ReorderAlgorithm { 245 public: RandomClusterReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo)246 explicit RandomClusterReorderAlgorithm( 247 std::unique_ptr<ClusterAlgorithm> CAlgo) 248 : ReorderAlgorithm(std::move(CAlgo)) {} 249 250 void reorderBasicBlocks(BinaryFunction &BF, 251 BasicBlockOrder &Order) const override; 252 }; 253 254 } // namespace bolt 255 } // namespace llvm 256 257 #endif 258