xref: /llvm-project/bolt/lib/Passes/ReorderAlgorithm.cpp (revision fa5486e487a6f98358eca572efb4b1fd7d27d1f5)
12f09f445SMaksim Panchenko //===- bolt/Passes/ReorderAlgorithm.cpp - Basic block reordering ----------===//
2a34c753fSRafael Auler //
3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information.
5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a34c753fSRafael Auler //
7a34c753fSRafael Auler //===----------------------------------------------------------------------===//
8a34c753fSRafael Auler //
92f09f445SMaksim Panchenko // This file implements classes used by several basic block reordering
102f09f445SMaksim Panchenko // algorithms.
11a34c753fSRafael Auler //
12a34c753fSRafael Auler //===----------------------------------------------------------------------===//
13a34c753fSRafael Auler 
14a34c753fSRafael Auler #include "bolt/Passes/ReorderAlgorithm.h"
15a34c753fSRafael Auler #include "bolt/Core/BinaryBasicBlock.h"
16a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h"
17a34c753fSRafael Auler #include "llvm/Support/CommandLine.h"
18539b6c68Sspupyrev #include "llvm/Transforms/Utils/CodeLayout.h"
19a34c753fSRafael Auler #include <queue>
20a34c753fSRafael Auler #include <random>
21a34c753fSRafael Auler #include <stack>
22a34c753fSRafael Auler 
23a34c753fSRafael Auler #undef DEBUG_TYPE
24a34c753fSRafael Auler #define DEBUG_TYPE "bolt"
25a34c753fSRafael Auler 
26a34c753fSRafael Auler using namespace llvm;
27a34c753fSRafael Auler using namespace bolt;
28a34c753fSRafael Auler 
29a34c753fSRafael Auler namespace opts {
30a34c753fSRafael Auler 
31a34c753fSRafael Auler extern cl::OptionCategory BoltOptCategory;
32a34c753fSRafael Auler extern cl::opt<bool> NoThreads;
33a34c753fSRafael Auler 
34a34c753fSRafael Auler static cl::opt<unsigned> ColdThreshold(
35a34c753fSRafael Auler     "cold-threshold",
36a34c753fSRafael Auler     cl::desc("tenths of percents of main entry frequency to use as a "
37a34c753fSRafael Auler              "threshold when evaluating whether a basic block is cold "
38a34c753fSRafael Auler              "(0 means it is only considered cold if the block has zero "
39a34c753fSRafael Auler              "samples). Default: 0 "),
40a34c753fSRafael Auler     cl::init(0), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
41a34c753fSRafael Auler 
42b92436efSFangrui Song static cl::opt<bool> PrintClusters("print-clusters", cl::desc("print clusters"),
43b92436efSFangrui Song                                    cl::Hidden, cl::cat(BoltOptCategory));
44a34c753fSRafael Auler 
45b92436efSFangrui Song cl::opt<uint32_t> RandomSeed("bolt-seed", cl::desc("seed for randomization"),
46b92436efSFangrui Song                              cl::init(42), cl::Hidden,
47a34c753fSRafael Auler                              cl::cat(BoltOptCategory));
48a34c753fSRafael Auler 
49a34c753fSRafael Auler } // namespace opts
50a34c753fSRafael Auler 
51a34c753fSRafael Auler namespace {
52a34c753fSRafael Auler 
hashCombine(size_t & Seed,const T & Val)5340c2e0faSMaksim Panchenko template <class T> inline void hashCombine(size_t &Seed, const T &Val) {
54a34c753fSRafael Auler   std::hash<T> Hasher;
55a34c753fSRafael Auler   Seed ^= Hasher(Val) + 0x9e3779b9 + (Seed << 6) + (Seed >> 2);
56a34c753fSRafael Auler }
57a34c753fSRafael Auler 
5840c2e0faSMaksim Panchenko template <typename A, typename B> struct HashPair {
operator ()__anon9cbc67f10111::HashPair59a34c753fSRafael Auler   size_t operator()(const std::pair<A, B> &Val) const {
60a34c753fSRafael Auler     std::hash<A> Hasher;
61a34c753fSRafael Auler     size_t Seed = Hasher(Val.first);
62a34c753fSRafael Auler     hashCombine(Seed, Val.second);
63a34c753fSRafael Auler     return Seed;
64a34c753fSRafael Auler   }
65a34c753fSRafael Auler };
66a34c753fSRafael Auler 
6740c2e0faSMaksim Panchenko } // namespace
68a34c753fSRafael Auler 
computeClusterAverageFrequency(const BinaryContext & BC)69a34c753fSRafael Auler void ClusterAlgorithm::computeClusterAverageFrequency(const BinaryContext &BC) {
70a34c753fSRafael Auler   // Create a separate MCCodeEmitter to allow lock-free execution
71a34c753fSRafael Auler   BinaryContext::IndependentCodeEmitter Emitter;
72f92ab6afSAmir Ayupov   if (!opts::NoThreads)
73a34c753fSRafael Auler     Emitter = BC.createIndependentMCCodeEmitter();
74a34c753fSRafael Auler 
75a34c753fSRafael Auler   AvgFreq.resize(Clusters.size(), 0.0);
76a34c753fSRafael Auler   for (uint32_t I = 0, E = Clusters.size(); I < E; ++I) {
77a34c753fSRafael Auler     double Freq = 0.0;
78a34c753fSRafael Auler     uint64_t ClusterSize = 0;
79d5c03defSFabian Parzefall     for (const BinaryBasicBlock *BB : Clusters[I]) {
80a34c753fSRafael Auler       if (BB->getNumNonPseudos() > 0) {
81a34c753fSRafael Auler         Freq += BB->getExecutionCount();
82a34c753fSRafael Auler         // Estimate the size of a block in bytes at run time
83a34c753fSRafael Auler         // NOTE: This might be inaccurate
84a34c753fSRafael Auler         ClusterSize += BB->estimateSize(Emitter.MCE.get());
85a34c753fSRafael Auler       }
86a34c753fSRafael Auler     }
87a34c753fSRafael Auler     AvgFreq[I] = ClusterSize == 0 ? 0 : Freq / ClusterSize;
88a34c753fSRafael Auler   }
89a34c753fSRafael Auler }
90a34c753fSRafael Auler 
printClusters() const91a34c753fSRafael Auler void ClusterAlgorithm::printClusters() const {
92a34c753fSRafael Auler   for (uint32_t I = 0, E = Clusters.size(); I < E; ++I) {
93a34c753fSRafael Auler     errs() << "Cluster number " << I;
94a34c753fSRafael Auler     if (AvgFreq.size() == Clusters.size())
95a34c753fSRafael Auler       errs() << " (frequency: " << AvgFreq[I] << ")";
96a34c753fSRafael Auler     errs() << " : ";
97a34c753fSRafael Auler     const char *Sep = "";
98d5c03defSFabian Parzefall     for (const BinaryBasicBlock *BB : Clusters[I]) {
99a34c753fSRafael Auler       errs() << Sep << BB->getName();
100a34c753fSRafael Auler       Sep = ", ";
101a34c753fSRafael Auler     }
102a34c753fSRafael Auler     errs() << "\n";
103a34c753fSRafael Auler   }
104a34c753fSRafael Auler }
105a34c753fSRafael Auler 
reset()106a34c753fSRafael Auler void ClusterAlgorithm::reset() {
107a34c753fSRafael Auler   Clusters.clear();
108a34c753fSRafael Auler   ClusterEdges.clear();
109a34c753fSRafael Auler   AvgFreq.clear();
110a34c753fSRafael Auler }
111a34c753fSRafael Auler 
print(raw_ostream & OS) const112a34c753fSRafael Auler void GreedyClusterAlgorithm::EdgeTy::print(raw_ostream &OS) const {
113a34c753fSRafael Auler   OS << Src->getName() << " -> " << Dst->getName() << ", count: " << Count;
114a34c753fSRafael Auler }
115a34c753fSRafael Auler 
operator ()(const EdgeTy & E) const116a34c753fSRafael Auler size_t GreedyClusterAlgorithm::EdgeHash::operator()(const EdgeTy &E) const {
117a34c753fSRafael Auler   HashPair<const BinaryBasicBlock *, const BinaryBasicBlock *> Hasher;
118a34c753fSRafael Auler   return Hasher(std::make_pair(E.Src, E.Dst));
119a34c753fSRafael Auler }
120a34c753fSRafael Auler 
operator ()(const EdgeTy & A,const EdgeTy & B) const12140c2e0faSMaksim Panchenko bool GreedyClusterAlgorithm::EdgeEqual::operator()(const EdgeTy &A,
12240c2e0faSMaksim Panchenko                                                    const EdgeTy &B) const {
123a34c753fSRafael Auler   return A.Src == B.Src && A.Dst == B.Dst;
124a34c753fSRafael Auler }
125a34c753fSRafael Auler 
clusterBasicBlocks(BinaryFunction & BF,bool ComputeEdges)126d5c03defSFabian Parzefall void GreedyClusterAlgorithm::clusterBasicBlocks(BinaryFunction &BF,
127a34c753fSRafael Auler                                                 bool ComputeEdges) {
128a34c753fSRafael Auler   reset();
129a34c753fSRafael Auler 
130a34c753fSRafael Auler   // Greedy heuristic implementation for the TSP, applied to BB layout. Try to
131a34c753fSRafael Auler   // maximize weight during a path traversing all BBs. In this way, we will
132a34c753fSRafael Auler   // convert the hottest branches into fall-throughs.
133a34c753fSRafael Auler 
134a34c753fSRafael Auler   // This is the queue of edges from which we will pop edges and use them to
135a34c753fSRafael Auler   // cluster basic blocks in a greedy fashion.
136a34c753fSRafael Auler   std::vector<EdgeTy> Queue;
137a34c753fSRafael Auler 
138a34c753fSRafael Auler   // Initialize inter-cluster weights.
139a34c753fSRafael Auler   if (ComputeEdges)
1408477bc67SFabian Parzefall     ClusterEdges.resize(BF.getLayout().block_size());
141a34c753fSRafael Auler 
142a34c753fSRafael Auler   // Initialize clusters and edge queue.
1438477bc67SFabian Parzefall   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
144a34c753fSRafael Auler     // Create a cluster for this BB.
145a34c753fSRafael Auler     uint32_t I = Clusters.size();
146a34c753fSRafael Auler     Clusters.emplace_back();
147a34c753fSRafael Auler     std::vector<BinaryBasicBlock *> &Cluster = Clusters.back();
148a34c753fSRafael Auler     Cluster.push_back(BB);
149a34c753fSRafael Auler     BBToClusterMap[BB] = I;
150a34c753fSRafael Auler     // Populate priority queue with edges.
151a34c753fSRafael Auler     auto BI = BB->branch_info_begin();
152d5c03defSFabian Parzefall     for (const BinaryBasicBlock *I : BB->successors()) {
153a34c753fSRafael Auler       assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
154a34c753fSRafael Auler              "attempted reordering blocks of function with no profile data");
155a34c753fSRafael Auler       Queue.emplace_back(EdgeTy(BB, I, BI->Count));
156a34c753fSRafael Auler       ++BI;
157a34c753fSRafael Auler     }
158a34c753fSRafael Auler   }
159a34c753fSRafael Auler   // Sort and adjust the edge queue.
160a34c753fSRafael Auler   initQueue(Queue, BF);
161a34c753fSRafael Auler 
162a34c753fSRafael Auler   // Grow clusters in a greedy fashion.
163a34c753fSRafael Auler   while (!Queue.empty()) {
164a34c753fSRafael Auler     EdgeTy E = Queue.back();
165a34c753fSRafael Auler     Queue.pop_back();
166a34c753fSRafael Auler 
167a34c753fSRafael Auler     const BinaryBasicBlock *SrcBB = E.Src;
168a34c753fSRafael Auler     const BinaryBasicBlock *DstBB = E.Dst;
169a34c753fSRafael Auler 
170a34c753fSRafael Auler     LLVM_DEBUG(dbgs() << "Popped edge "; E.print(dbgs()); dbgs() << "\n");
171a34c753fSRafael Auler 
172a34c753fSRafael Auler     // Case 1: BBSrc and BBDst are the same. Ignore this edge
1738477bc67SFabian Parzefall     if (SrcBB == DstBB || DstBB == *BF.getLayout().block_begin()) {
174a34c753fSRafael Auler       LLVM_DEBUG(dbgs() << "\tIgnored (same src, dst)\n");
175a34c753fSRafael Auler       continue;
176a34c753fSRafael Auler     }
177a34c753fSRafael Auler 
178a34c753fSRafael Auler     int I = BBToClusterMap[SrcBB];
179a34c753fSRafael Auler     int J = BBToClusterMap[DstBB];
180a34c753fSRafael Auler 
181a34c753fSRafael Auler     // Case 2: If they are already allocated at the same cluster, just increase
182a34c753fSRafael Auler     // the weight of this cluster
183a34c753fSRafael Auler     if (I == J) {
184a34c753fSRafael Auler       if (ComputeEdges)
185a34c753fSRafael Auler         ClusterEdges[I][I] += E.Count;
186a34c753fSRafael Auler       LLVM_DEBUG(dbgs() << "\tIgnored (src, dst belong to the same cluster)\n");
187a34c753fSRafael Auler       continue;
188a34c753fSRafael Auler     }
189a34c753fSRafael Auler 
190a34c753fSRafael Auler     std::vector<BinaryBasicBlock *> &ClusterA = Clusters[I];
191a34c753fSRafael Auler     std::vector<BinaryBasicBlock *> &ClusterB = Clusters[J];
192a34c753fSRafael Auler     if (areClustersCompatible(ClusterA, ClusterB, E)) {
193a34c753fSRafael Auler       // Case 3: SrcBB is at the end of a cluster and DstBB is at the start,
194a34c753fSRafael Auler       // allowing us to merge two clusters.
195d5c03defSFabian Parzefall       for (const BinaryBasicBlock *BB : ClusterB)
196a34c753fSRafael Auler         BBToClusterMap[BB] = I;
197a34c753fSRafael Auler       ClusterA.insert(ClusterA.end(), ClusterB.begin(), ClusterB.end());
198a34c753fSRafael Auler       ClusterB.clear();
199a34c753fSRafael Auler       if (ComputeEdges) {
200a34c753fSRafael Auler         // Increase the intra-cluster edge count of cluster A with the count of
201a34c753fSRafael Auler         // this edge as well as with the total count of previously visited edges
202a34c753fSRafael Auler         // from cluster B cluster A.
203a34c753fSRafael Auler         ClusterEdges[I][I] += E.Count;
204a34c753fSRafael Auler         ClusterEdges[I][I] += ClusterEdges[J][I];
205a34c753fSRafael Auler         // Iterate through all inter-cluster edges and transfer edges targeting
206a34c753fSRafael Auler         // cluster B to cluster A.
207a34c753fSRafael Auler         for (uint32_t K = 0, E = ClusterEdges.size(); K != E; ++K)
208a34c753fSRafael Auler           ClusterEdges[K][I] += ClusterEdges[K][J];
209a34c753fSRafael Auler       }
210a34c753fSRafael Auler       // Adjust the weights of the remaining edges and re-sort the queue.
211a34c753fSRafael Auler       adjustQueue(Queue, BF);
212a34c753fSRafael Auler       LLVM_DEBUG(dbgs() << "\tMerged clusters of src, dst\n");
213a34c753fSRafael Auler     } else {
214a34c753fSRafael Auler       // Case 4: Both SrcBB and DstBB are allocated in positions we cannot
215a34c753fSRafael Auler       // merge them. Add the count of this edge to the inter-cluster edge count
216a34c753fSRafael Auler       // between clusters A and B to help us decide ordering between these
217a34c753fSRafael Auler       // clusters.
218a34c753fSRafael Auler       if (ComputeEdges)
219a34c753fSRafael Auler         ClusterEdges[I][J] += E.Count;
220a34c753fSRafael Auler       LLVM_DEBUG(
221a34c753fSRafael Auler           dbgs() << "\tIgnored (src, dst belong to incompatible clusters)\n");
222a34c753fSRafael Auler     }
223a34c753fSRafael Auler   }
224a34c753fSRafael Auler }
225a34c753fSRafael Auler 
reset()226a34c753fSRafael Auler void GreedyClusterAlgorithm::reset() {
227a34c753fSRafael Auler   ClusterAlgorithm::reset();
228a34c753fSRafael Auler   BBToClusterMap.clear();
229a34c753fSRafael Auler }
230a34c753fSRafael Auler 
initQueue(std::vector<EdgeTy> & Queue,const BinaryFunction & BF)23140c2e0faSMaksim Panchenko void PHGreedyClusterAlgorithm::initQueue(std::vector<EdgeTy> &Queue,
23240c2e0faSMaksim Panchenko                                          const BinaryFunction &BF) {
233a34c753fSRafael Auler   // Define a comparison function to establish SWO between edges.
234a34c753fSRafael Auler   auto Comp = [&BF](const EdgeTy &A, const EdgeTy &B) {
235a34c753fSRafael Auler     // With equal weights, prioritize branches with lower index
236a34c753fSRafael Auler     // source/destination. This helps to keep original block order for blocks
237a34c753fSRafael Auler     // when optimal order cannot be deducted from a profile.
238a34c753fSRafael Auler     if (A.Count == B.Count) {
239a34c753fSRafael Auler       const signed SrcOrder = BF.getOriginalLayoutRelativeOrder(A.Src, B.Src);
240a34c753fSRafael Auler       return (SrcOrder != 0)
241a34c753fSRafael Auler                  ? SrcOrder > 0
242a34c753fSRafael Auler                  : BF.getOriginalLayoutRelativeOrder(A.Dst, B.Dst) > 0;
243a34c753fSRafael Auler     }
244a34c753fSRafael Auler     return A.Count < B.Count;
245a34c753fSRafael Auler   };
246a34c753fSRafael Auler 
247a34c753fSRafael Auler   // Sort edges in increasing profile count order.
248d2c87699SAmir Ayupov   llvm::sort(Queue, Comp);
249a34c753fSRafael Auler }
250a34c753fSRafael Auler 
adjustQueue(std::vector<EdgeTy> & Queue,const BinaryFunction & BF)25140c2e0faSMaksim Panchenko void PHGreedyClusterAlgorithm::adjustQueue(std::vector<EdgeTy> &Queue,
25240c2e0faSMaksim Panchenko                                            const BinaryFunction &BF) {
253a34c753fSRafael Auler   // Nothing to do.
254a34c753fSRafael Auler }
255a34c753fSRafael Auler 
areClustersCompatible(const ClusterTy & Front,const ClusterTy & Back,const EdgeTy & E) const25640c2e0faSMaksim Panchenko bool PHGreedyClusterAlgorithm::areClustersCompatible(const ClusterTy &Front,
25740c2e0faSMaksim Panchenko                                                      const ClusterTy &Back,
25840c2e0faSMaksim Panchenko                                                      const EdgeTy &E) const {
259a34c753fSRafael Auler   return Front.back() == E.Src && Back.front() == E.Dst;
260a34c753fSRafael Auler }
261a34c753fSRafael Auler 
calculateWeight(const EdgeTy & E,const BinaryFunction & BF) const262a34c753fSRafael Auler int64_t MinBranchGreedyClusterAlgorithm::calculateWeight(
263a34c753fSRafael Auler     const EdgeTy &E, const BinaryFunction &BF) const {
264a34c753fSRafael Auler   const BinaryBasicBlock *SrcBB = E.Src;
265a34c753fSRafael Auler   const BinaryBasicBlock *DstBB = E.Dst;
266a34c753fSRafael Auler 
267a34c753fSRafael Auler   // Initial weight value.
268a34c753fSRafael Auler   int64_t W = (int64_t)E.Count;
269a34c753fSRafael Auler 
270a34c753fSRafael Auler   // Adjust the weight by taking into account other edges with the same source.
271a34c753fSRafael Auler   auto BI = SrcBB->branch_info_begin();
272a34c753fSRafael Auler   for (const BinaryBasicBlock *SuccBB : SrcBB->successors()) {
273a34c753fSRafael Auler     assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
274a34c753fSRafael Auler            "attempted reordering blocks of function with no profile data");
275a34c753fSRafael Auler     assert(BI->Count <= std::numeric_limits<int64_t>::max() &&
276a34c753fSRafael Auler            "overflow detected");
277a34c753fSRafael Auler     // Ignore edges with same source and destination, edges that target the
278a34c753fSRafael Auler     // entry block as well as the edge E itself.
2798477bc67SFabian Parzefall     if (SuccBB != SrcBB && SuccBB != *BF.getLayout().block_begin() &&
2808477bc67SFabian Parzefall         SuccBB != DstBB)
281a34c753fSRafael Auler       W -= (int64_t)BI->Count;
282a34c753fSRafael Auler     ++BI;
283a34c753fSRafael Auler   }
284a34c753fSRafael Auler 
285a34c753fSRafael Auler   // Adjust the weight by taking into account other edges with the same
286a34c753fSRafael Auler   // destination.
287a34c753fSRafael Auler   for (const BinaryBasicBlock *PredBB : DstBB->predecessors()) {
288a34c753fSRafael Auler     // Ignore edges with same source and destination as well as the edge E
289a34c753fSRafael Auler     // itself.
290a34c753fSRafael Auler     if (PredBB == DstBB || PredBB == SrcBB)
291a34c753fSRafael Auler       continue;
292a34c753fSRafael Auler     auto BI = PredBB->branch_info_begin();
293a34c753fSRafael Auler     for (const BinaryBasicBlock *SuccBB : PredBB->successors()) {
294a34c753fSRafael Auler       if (SuccBB == DstBB)
295a34c753fSRafael Auler         break;
296a34c753fSRafael Auler       ++BI;
297a34c753fSRafael Auler     }
298a34c753fSRafael Auler     assert(BI != PredBB->branch_info_end() && "invalid control flow graph");
299a34c753fSRafael Auler     assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
300a34c753fSRafael Auler            "attempted reordering blocks of function with no profile data");
301a34c753fSRafael Auler     assert(BI->Count <= std::numeric_limits<int64_t>::max() &&
302a34c753fSRafael Auler            "overflow detected");
303a34c753fSRafael Auler     W -= (int64_t)BI->Count;
304a34c753fSRafael Auler   }
305a34c753fSRafael Auler 
306a34c753fSRafael Auler   return W;
307a34c753fSRafael Auler }
308a34c753fSRafael Auler 
initQueue(std::vector<EdgeTy> & Queue,const BinaryFunction & BF)30940c2e0faSMaksim Panchenko void MinBranchGreedyClusterAlgorithm::initQueue(std::vector<EdgeTy> &Queue,
31040c2e0faSMaksim Panchenko                                                 const BinaryFunction &BF) {
311a34c753fSRafael Auler   // Initialize edge weights.
312a34c753fSRafael Auler   for (const EdgeTy &E : Queue)
313a34c753fSRafael Auler     Weight.emplace(std::make_pair(E, calculateWeight(E, BF)));
314a34c753fSRafael Auler 
315a34c753fSRafael Auler   // Sort edges in increasing weight order.
316a34c753fSRafael Auler   adjustQueue(Queue, BF);
317a34c753fSRafael Auler }
318a34c753fSRafael Auler 
adjustQueue(std::vector<EdgeTy> & Queue,const BinaryFunction & BF)31940c2e0faSMaksim Panchenko void MinBranchGreedyClusterAlgorithm::adjustQueue(std::vector<EdgeTy> &Queue,
32040c2e0faSMaksim Panchenko                                                   const BinaryFunction &BF) {
321a34c753fSRafael Auler   // Define a comparison function to establish SWO between edges.
322a34c753fSRafael Auler   auto Comp = [&](const EdgeTy &A, const EdgeTy &B) {
323a34c753fSRafael Auler     // With equal weights, prioritize branches with lower index
324a34c753fSRafael Auler     // source/destination. This helps to keep original block order for blocks
325a34c753fSRafael Auler     // when optimal order cannot be deduced from a profile.
326a34c753fSRafael Auler     if (Weight[A] == Weight[B]) {
327a34c753fSRafael Auler       const signed SrcOrder = BF.getOriginalLayoutRelativeOrder(A.Src, B.Src);
328a34c753fSRafael Auler       return (SrcOrder != 0)
329a34c753fSRafael Auler                  ? SrcOrder > 0
330a34c753fSRafael Auler                  : BF.getOriginalLayoutRelativeOrder(A.Dst, B.Dst) > 0;
331a34c753fSRafael Auler     }
332a34c753fSRafael Auler     return Weight[A] < Weight[B];
333a34c753fSRafael Auler   };
334a34c753fSRafael Auler 
335a34c753fSRafael Auler   // Iterate through all remaining edges to find edges that have their
336a34c753fSRafael Auler   // source and destination in the same cluster.
337a34c753fSRafael Auler   std::vector<EdgeTy> NewQueue;
338a34c753fSRafael Auler   for (const EdgeTy &E : Queue) {
339a34c753fSRafael Auler     const BinaryBasicBlock *SrcBB = E.Src;
340a34c753fSRafael Auler     const BinaryBasicBlock *DstBB = E.Dst;
341a34c753fSRafael Auler 
342a34c753fSRafael Auler     // Case 1: SrcBB and DstBB are the same or DstBB is the entry block. Ignore
343a34c753fSRafael Auler     // this edge.
3448477bc67SFabian Parzefall     if (SrcBB == DstBB || DstBB == *BF.getLayout().block_begin()) {
345a34c753fSRafael Auler       LLVM_DEBUG(dbgs() << "\tAdjustment: Ignored edge "; E.print(dbgs());
346a34c753fSRafael Auler                  dbgs() << " (same src, dst)\n");
347a34c753fSRafael Auler       continue;
348a34c753fSRafael Auler     }
349a34c753fSRafael Auler 
350a34c753fSRafael Auler     int I = BBToClusterMap[SrcBB];
351a34c753fSRafael Auler     int J = BBToClusterMap[DstBB];
352a34c753fSRafael Auler     std::vector<BinaryBasicBlock *> &ClusterA = Clusters[I];
353a34c753fSRafael Auler     std::vector<BinaryBasicBlock *> &ClusterB = Clusters[J];
354a34c753fSRafael Auler 
355a34c753fSRafael Auler     // Case 2: They are already allocated at the same cluster or incompatible
356a34c753fSRafael Auler     // clusters. Adjust the weights of edges with the same source or
357a34c753fSRafael Auler     // destination, so that this edge has no effect on them any more, and ignore
358a34c753fSRafael Auler     // this edge. Also increase the intra- (or inter-) cluster edge count.
359a34c753fSRafael Auler     if (I == J || !areClustersCompatible(ClusterA, ClusterB, E)) {
360a34c753fSRafael Auler       if (!ClusterEdges.empty())
361a34c753fSRafael Auler         ClusterEdges[I][J] += E.Count;
362a34c753fSRafael Auler       LLVM_DEBUG(dbgs() << "\tAdjustment: Ignored edge "; E.print(dbgs());
363a34c753fSRafael Auler                  dbgs() << " (src, dst belong to same cluster or incompatible "
364a34c753fSRafael Auler                            "clusters)\n");
365a34c753fSRafael Auler       for (const BinaryBasicBlock *SuccBB : SrcBB->successors()) {
366a34c753fSRafael Auler         if (SuccBB == DstBB)
367a34c753fSRafael Auler           continue;
368a34c753fSRafael Auler         auto WI = Weight.find(EdgeTy(SrcBB, SuccBB, 0));
369a34c753fSRafael Auler         assert(WI != Weight.end() && "CFG edge not found in Weight map");
370a34c753fSRafael Auler         WI->second += (int64_t)E.Count;
371a34c753fSRafael Auler       }
372a34c753fSRafael Auler       for (const BinaryBasicBlock *PredBB : DstBB->predecessors()) {
373a34c753fSRafael Auler         if (PredBB == SrcBB)
374a34c753fSRafael Auler           continue;
375a34c753fSRafael Auler         auto WI = Weight.find(EdgeTy(PredBB, DstBB, 0));
376a34c753fSRafael Auler         assert(WI != Weight.end() && "CFG edge not found in Weight map");
377a34c753fSRafael Auler         WI->second += (int64_t)E.Count;
378a34c753fSRafael Auler       }
379a34c753fSRafael Auler       continue;
380a34c753fSRafael Auler     }
381a34c753fSRafael Auler 
382a34c753fSRafael Auler     // Case 3: None of the previous cases is true, so just keep this edge in
383a34c753fSRafael Auler     // the queue.
384a34c753fSRafael Auler     NewQueue.emplace_back(E);
385a34c753fSRafael Auler   }
386a34c753fSRafael Auler 
387a34c753fSRafael Auler   // Sort remaining edges in increasing weight order.
388a34c753fSRafael Auler   Queue.swap(NewQueue);
389d2c87699SAmir Ayupov   llvm::sort(Queue, Comp);
390a34c753fSRafael Auler }
391a34c753fSRafael Auler 
areClustersCompatible(const ClusterTy & Front,const ClusterTy & Back,const EdgeTy & E) const392a34c753fSRafael Auler bool MinBranchGreedyClusterAlgorithm::areClustersCompatible(
393a34c753fSRafael Auler     const ClusterTy &Front, const ClusterTy &Back, const EdgeTy &E) const {
394a34c753fSRafael Auler   return Front.back() == E.Src && Back.front() == E.Dst;
395a34c753fSRafael Auler }
396a34c753fSRafael Auler 
reset()397a34c753fSRafael Auler void MinBranchGreedyClusterAlgorithm::reset() {
398a34c753fSRafael Auler   GreedyClusterAlgorithm::reset();
399a34c753fSRafael Auler   Weight.clear();
400a34c753fSRafael Auler }
401a34c753fSRafael Auler 
reorderBasicBlocks(BinaryFunction & BF,BasicBlockOrder & Order) const402d5c03defSFabian Parzefall void TSPReorderAlgorithm::reorderBasicBlocks(BinaryFunction &BF,
40340c2e0faSMaksim Panchenko                                              BasicBlockOrder &Order) const {
404a34c753fSRafael Auler   std::vector<std::vector<uint64_t>> Weight;
405a34c753fSRafael Auler   std::vector<BinaryBasicBlock *> IndexToBB;
406a34c753fSRafael Auler 
4078477bc67SFabian Parzefall   const size_t N = BF.getLayout().block_size();
408a34c753fSRafael Auler   assert(N <= std::numeric_limits<uint64_t>::digits &&
409a34c753fSRafael Auler          "cannot use TSP solution for sizes larger than bits in uint64_t");
410a34c753fSRafael Auler 
411a34c753fSRafael Auler   // Populating weight map and index map
4128477bc67SFabian Parzefall   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
413a34c753fSRafael Auler     BB->setLayoutIndex(IndexToBB.size());
414a34c753fSRafael Auler     IndexToBB.push_back(BB);
415a34c753fSRafael Auler   }
416a34c753fSRafael Auler   Weight.resize(N);
417d5c03defSFabian Parzefall   for (const BinaryBasicBlock *BB : BF.getLayout().blocks()) {
418a34c753fSRafael Auler     auto BI = BB->branch_info_begin();
419a34c753fSRafael Auler     Weight[BB->getLayoutIndex()].resize(N);
420a34c753fSRafael Auler     for (BinaryBasicBlock *SuccBB : BB->successors()) {
421a34c753fSRafael Auler       if (BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE)
422a34c753fSRafael Auler         Weight[BB->getLayoutIndex()][SuccBB->getLayoutIndex()] = BI->Count;
423a34c753fSRafael Auler       ++BI;
424a34c753fSRafael Auler     }
425a34c753fSRafael Auler   }
426a34c753fSRafael Auler 
427a34c753fSRafael Auler   std::vector<std::vector<int64_t>> DP;
428*fa5486e4SHo Cheung   DP.resize(static_cast<size_t>(1) << N);
429f92ab6afSAmir Ayupov   for (std::vector<int64_t> &Elmt : DP)
430a34c753fSRafael Auler     Elmt.resize(N, -1);
431f92ab6afSAmir Ayupov 
432a34c753fSRafael Auler   // Start with the entry basic block being allocated with cost zero
433a34c753fSRafael Auler   DP[1][0] = 0;
434a34c753fSRafael Auler   // Walk through TSP solutions using a bitmask to represent state (current set
435a34c753fSRafael Auler   // of BBs in the layout)
436a34c753fSRafael Auler   uint64_t BestSet = 1;
437a34c753fSRafael Auler   uint64_t BestLast = 0;
438a34c753fSRafael Auler   int64_t BestWeight = 0;
439a34c753fSRafael Auler   for (uint64_t Set = 1; Set < (1ULL << N); ++Set) {
440a34c753fSRafael Auler     // Traverse each possibility of Last BB visited in this layout
441a34c753fSRafael Auler     for (uint64_t Last = 0; Last < N; ++Last) {
442a34c753fSRafael Auler       // Case 1: There is no possible layout with this BB as Last
443a34c753fSRafael Auler       if (DP[Set][Last] == -1)
444a34c753fSRafael Auler         continue;
445a34c753fSRafael Auler 
446a34c753fSRafael Auler       // Case 2: There is a layout with this Set and this Last, and we try
447a34c753fSRafael Auler       // to expand this set with New
448a34c753fSRafael Auler       for (uint64_t New = 1; New < N; ++New) {
449a34c753fSRafael Auler         // Case 2a: BB "New" is already in this Set
450a34c753fSRafael Auler         if ((Set & (1ULL << New)) != 0)
451a34c753fSRafael Auler           continue;
452a34c753fSRafael Auler 
453a34c753fSRafael Auler         // Case 2b: BB "New" is not in this set and we add it to this Set and
454a34c753fSRafael Auler         // record total weight of this layout with "New" as the last BB.
455a34c753fSRafael Auler         uint64_t NewSet = (Set | (1ULL << New));
456a34c753fSRafael Auler         if (DP[NewSet][New] == -1)
457a34c753fSRafael Auler           DP[NewSet][New] = DP[Set][Last] + (int64_t)Weight[Last][New];
458a34c753fSRafael Auler         DP[NewSet][New] = std::max(DP[NewSet][New],
459a34c753fSRafael Auler                                    DP[Set][Last] + (int64_t)Weight[Last][New]);
460a34c753fSRafael Auler 
461a34c753fSRafael Auler         if (DP[NewSet][New] > BestWeight) {
462a34c753fSRafael Auler           BestWeight = DP[NewSet][New];
463a34c753fSRafael Auler           BestSet = NewSet;
464a34c753fSRafael Auler           BestLast = New;
465a34c753fSRafael Auler         }
466a34c753fSRafael Auler       }
467a34c753fSRafael Auler     }
468a34c753fSRafael Auler   }
469a34c753fSRafael Auler 
470a34c753fSRafael Auler   // Define final function layout based on layout that maximizes weight
471a34c753fSRafael Auler   uint64_t Last = BestLast;
472a34c753fSRafael Auler   uint64_t Set = BestSet;
47318bc405aSAmir Ayupov   BitVector Visited;
474a34c753fSRafael Auler   Visited.resize(N);
475a34c753fSRafael Auler   Visited[Last] = true;
476a34c753fSRafael Auler   Order.push_back(IndexToBB[Last]);
477a34c753fSRafael Auler   Set = Set & ~(1ULL << Last);
478a34c753fSRafael Auler   while (Set != 0) {
479a34c753fSRafael Auler     int64_t Best = -1;
480a34c753fSRafael Auler     uint64_t NewLast;
481a34c753fSRafael Auler     for (uint64_t I = 0; I < N; ++I) {
482a34c753fSRafael Auler       if (DP[Set][I] == -1)
483a34c753fSRafael Auler         continue;
484a34c753fSRafael Auler       int64_t AdjWeight = Weight[I][Last] > 0 ? Weight[I][Last] : 0;
485a34c753fSRafael Auler       if (DP[Set][I] + AdjWeight > Best) {
486a34c753fSRafael Auler         NewLast = I;
487a34c753fSRafael Auler         Best = DP[Set][I] + AdjWeight;
488a34c753fSRafael Auler       }
489a34c753fSRafael Auler     }
490a34c753fSRafael Auler     Last = NewLast;
491a34c753fSRafael Auler     Visited[Last] = true;
492a34c753fSRafael Auler     Order.push_back(IndexToBB[Last]);
493a34c753fSRafael Auler     Set = Set & ~(1ULL << Last);
494a34c753fSRafael Auler   }
495a34c753fSRafael Auler   std::reverse(Order.begin(), Order.end());
496a34c753fSRafael Auler 
497a34c753fSRafael Auler   // Finalize layout with BBs that weren't assigned to the layout using the
498a34c753fSRafael Auler   // input layout.
4998477bc67SFabian Parzefall   for (BinaryBasicBlock *BB : BF.getLayout().blocks())
500a34c753fSRafael Auler     if (Visited[BB->getLayoutIndex()] == false)
501a34c753fSRafael Auler       Order.push_back(BB);
502a34c753fSRafael Auler }
503a34c753fSRafael Auler 
reorderBasicBlocks(BinaryFunction & BF,BasicBlockOrder & Order) const504539b6c68Sspupyrev void ExtTSPReorderAlgorithm::reorderBasicBlocks(BinaryFunction &BF,
505539b6c68Sspupyrev                                                 BasicBlockOrder &Order) const {
506539b6c68Sspupyrev   if (BF.getLayout().block_empty())
507539b6c68Sspupyrev     return;
508539b6c68Sspupyrev 
509539b6c68Sspupyrev   // Do not change layout of functions w/o profile information
510539b6c68Sspupyrev   if (!BF.hasValidProfile() || BF.getLayout().block_size() <= 2) {
511539b6c68Sspupyrev     for (BinaryBasicBlock *BB : BF.getLayout().blocks())
512539b6c68Sspupyrev       Order.push_back(BB);
513539b6c68Sspupyrev     return;
514539b6c68Sspupyrev   }
515539b6c68Sspupyrev 
516539b6c68Sspupyrev   // Create a separate MCCodeEmitter to allow lock-free execution
517539b6c68Sspupyrev   BinaryContext::IndependentCodeEmitter Emitter;
518539b6c68Sspupyrev   if (!opts::NoThreads)
519539b6c68Sspupyrev     Emitter = BF.getBinaryContext().createIndependentMCCodeEmitter();
520539b6c68Sspupyrev 
521539b6c68Sspupyrev   // Initialize CFG nodes and their data
522539b6c68Sspupyrev   std::vector<uint64_t> BlockSizes;
523539b6c68Sspupyrev   std::vector<uint64_t> BlockCounts;
524539b6c68Sspupyrev   BasicBlockOrder OrigOrder;
525539b6c68Sspupyrev   BF.getLayout().updateLayoutIndices();
526539b6c68Sspupyrev   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
527539b6c68Sspupyrev     uint64_t Size = std::max<uint64_t>(BB->estimateSize(Emitter.MCE.get()), 1);
528539b6c68Sspupyrev     BlockSizes.push_back(Size);
529539b6c68Sspupyrev     BlockCounts.push_back(BB->getKnownExecutionCount());
530539b6c68Sspupyrev     OrigOrder.push_back(BB);
531539b6c68Sspupyrev   }
532539b6c68Sspupyrev 
533539b6c68Sspupyrev   // Initialize CFG edges
5346b8d04c2SFangrui Song   std::vector<codelayout::EdgeCount> JumpCounts;
535539b6c68Sspupyrev   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
536539b6c68Sspupyrev     auto BI = BB->branch_info_begin();
537539b6c68Sspupyrev     for (BinaryBasicBlock *SuccBB : BB->successors()) {
538539b6c68Sspupyrev       assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
539539b6c68Sspupyrev              "missing profile for a jump");
5406b8d04c2SFangrui Song       JumpCounts.push_back(
5416b8d04c2SFangrui Song           {BB->getLayoutIndex(), SuccBB->getLayoutIndex(), BI->Count});
542539b6c68Sspupyrev       ++BI;
543539b6c68Sspupyrev     }
544539b6c68Sspupyrev   }
545539b6c68Sspupyrev 
546539b6c68Sspupyrev   // Run the layout algorithm
5476b8d04c2SFangrui Song   auto Result =
5486b8d04c2SFangrui Song       codelayout::computeExtTspLayout(BlockSizes, BlockCounts, JumpCounts);
549539b6c68Sspupyrev   Order.reserve(BF.getLayout().block_size());
550539b6c68Sspupyrev   for (uint64_t R : Result)
551539b6c68Sspupyrev     Order.push_back(OrigOrder[R]);
552539b6c68Sspupyrev }
553539b6c68Sspupyrev 
reorderBasicBlocks(BinaryFunction & BF,BasicBlockOrder & Order) const554a34c753fSRafael Auler void OptimizeReorderAlgorithm::reorderBasicBlocks(
555d5c03defSFabian Parzefall     BinaryFunction &BF, BasicBlockOrder &Order) const {
5568477bc67SFabian Parzefall   if (BF.getLayout().block_empty())
557a34c753fSRafael Auler     return;
558a34c753fSRafael Auler 
559a34c753fSRafael Auler   // Cluster basic blocks.
560a34c753fSRafael Auler   CAlgo->clusterBasicBlocks(BF);
561a34c753fSRafael Auler 
562a34c753fSRafael Auler   if (opts::PrintClusters)
563a34c753fSRafael Auler     CAlgo->printClusters();
564a34c753fSRafael Auler 
565a34c753fSRafael Auler   // Arrange basic blocks according to clusters.
566a34c753fSRafael Auler   for (ClusterAlgorithm::ClusterTy &Cluster : CAlgo->Clusters)
567a34c753fSRafael Auler     Order.insert(Order.end(), Cluster.begin(), Cluster.end());
568a34c753fSRafael Auler }
569a34c753fSRafael Auler 
reorderBasicBlocks(BinaryFunction & BF,BasicBlockOrder & Order) const570a34c753fSRafael Auler void OptimizeBranchReorderAlgorithm::reorderBasicBlocks(
571d5c03defSFabian Parzefall     BinaryFunction &BF, BasicBlockOrder &Order) const {
5728477bc67SFabian Parzefall   if (BF.getLayout().block_empty())
573a34c753fSRafael Auler     return;
574a34c753fSRafael Auler 
575a34c753fSRafael Auler   // Cluster basic blocks.
576a34c753fSRafael Auler   CAlgo->clusterBasicBlocks(BF, /* ComputeEdges = */ true);
577a34c753fSRafael Auler   std::vector<ClusterAlgorithm::ClusterTy> &Clusters = CAlgo->Clusters;
578a34c753fSRafael Auler   std::vector<std::unordered_map<uint32_t, uint64_t>> &ClusterEdges =
579a34c753fSRafael Auler       CAlgo->ClusterEdges;
580a34c753fSRafael Auler 
581a34c753fSRafael Auler   // Compute clusters' average frequencies.
582a34c753fSRafael Auler   CAlgo->computeClusterAverageFrequency(BF.getBinaryContext());
583a34c753fSRafael Auler   std::vector<double> &AvgFreq = CAlgo->AvgFreq;
584a34c753fSRafael Auler 
585a34c753fSRafael Auler   if (opts::PrintClusters)
586a34c753fSRafael Auler     CAlgo->printClusters();
587a34c753fSRafael Auler 
588a34c753fSRafael Auler   // Cluster layout order
589a34c753fSRafael Auler   std::vector<uint32_t> ClusterOrder;
590a34c753fSRafael Auler 
591a34c753fSRafael Auler   // Do a topological sort for clusters, prioritizing frequently-executed BBs
592a34c753fSRafael Auler   // during the traversal.
593a34c753fSRafael Auler   std::stack<uint32_t> Stack;
594a34c753fSRafael Auler   std::vector<uint32_t> Status;
595a34c753fSRafael Auler   std::vector<uint32_t> Parent;
596a34c753fSRafael Auler   Status.resize(Clusters.size(), 0);
597a34c753fSRafael Auler   Parent.resize(Clusters.size(), 0);
598a34c753fSRafael Auler   constexpr uint32_t STACKED = 1;
599a34c753fSRafael Auler   constexpr uint32_t VISITED = 2;
600a34c753fSRafael Auler   Status[0] = STACKED;
601a34c753fSRafael Auler   Stack.push(0);
602a34c753fSRafael Auler   while (!Stack.empty()) {
603a34c753fSRafael Auler     uint32_t I = Stack.top();
604a34c753fSRafael Auler     if (!(Status[I] & VISITED)) {
605a34c753fSRafael Auler       Status[I] |= VISITED;
606a34c753fSRafael Auler       // Order successors by weight
607a34c753fSRafael Auler       auto ClusterComp = [&ClusterEdges, I](uint32_t A, uint32_t B) {
608a34c753fSRafael Auler         return ClusterEdges[I][A] > ClusterEdges[I][B];
609a34c753fSRafael Auler       };
610a34c753fSRafael Auler       std::priority_queue<uint32_t, std::vector<uint32_t>,
61140c2e0faSMaksim Panchenko                           decltype(ClusterComp)>
61240c2e0faSMaksim Panchenko           SuccQueue(ClusterComp);
613a34c753fSRafael Auler       for (std::pair<const uint32_t, uint64_t> &Target : ClusterEdges[I]) {
614a34c753fSRafael Auler         if (Target.second > 0 && !(Status[Target.first] & STACKED) &&
615a34c753fSRafael Auler             !Clusters[Target.first].empty()) {
616a34c753fSRafael Auler           Parent[Target.first] = I;
617a34c753fSRafael Auler           Status[Target.first] = STACKED;
618a34c753fSRafael Auler           SuccQueue.push(Target.first);
619a34c753fSRafael Auler         }
620a34c753fSRafael Auler       }
621a34c753fSRafael Auler       while (!SuccQueue.empty()) {
622a34c753fSRafael Auler         Stack.push(SuccQueue.top());
623a34c753fSRafael Auler         SuccQueue.pop();
624a34c753fSRafael Auler       }
625a34c753fSRafael Auler       continue;
626a34c753fSRafael Auler     }
627a34c753fSRafael Auler     // Already visited this node
628a34c753fSRafael Auler     Stack.pop();
629a34c753fSRafael Auler     ClusterOrder.push_back(I);
630a34c753fSRafael Auler   }
631a34c753fSRafael Auler   std::reverse(ClusterOrder.begin(), ClusterOrder.end());
632a34c753fSRafael Auler   // Put unreachable clusters at the end
633a34c753fSRafael Auler   for (uint32_t I = 0, E = Clusters.size(); I < E; ++I)
634a34c753fSRafael Auler     if (!(Status[I] & VISITED) && !Clusters[I].empty())
635a34c753fSRafael Auler       ClusterOrder.push_back(I);
636a34c753fSRafael Auler 
637a34c753fSRafael Auler   // Sort nodes with equal precedence
638a34c753fSRafael Auler   auto Beg = ClusterOrder.begin();
639a34c753fSRafael Auler   // Don't reorder the first cluster, which contains the function entry point
640a34c753fSRafael Auler   ++Beg;
641a34c753fSRafael Auler   std::stable_sort(Beg, ClusterOrder.end(),
642a34c753fSRafael Auler                    [&AvgFreq, &Parent](uint32_t A, uint32_t B) {
643a34c753fSRafael Auler                      uint32_t P = Parent[A];
644a34c753fSRafael Auler                      while (Parent[P] != 0) {
645a34c753fSRafael Auler                        if (Parent[P] == B)
646a34c753fSRafael Auler                          return false;
647a34c753fSRafael Auler                        P = Parent[P];
648a34c753fSRafael Auler                      }
649a34c753fSRafael Auler                      P = Parent[B];
650a34c753fSRafael Auler                      while (Parent[P] != 0) {
651a34c753fSRafael Auler                        if (Parent[P] == A)
652a34c753fSRafael Auler                          return true;
653a34c753fSRafael Auler                        P = Parent[P];
654a34c753fSRafael Auler                      }
655a34c753fSRafael Auler                      return AvgFreq[A] > AvgFreq[B];
656a34c753fSRafael Auler                    });
657a34c753fSRafael Auler 
658a34c753fSRafael Auler   if (opts::PrintClusters) {
659a34c753fSRafael Auler     errs() << "New cluster order: ";
660a34c753fSRafael Auler     const char *Sep = "";
661a34c753fSRafael Auler     for (uint32_t O : ClusterOrder) {
662a34c753fSRafael Auler       errs() << Sep << O;
663a34c753fSRafael Auler       Sep = ", ";
664a34c753fSRafael Auler     }
665a34c753fSRafael Auler     errs() << '\n';
666a34c753fSRafael Auler   }
667a34c753fSRafael Auler 
668a34c753fSRafael Auler   // Arrange basic blocks according to cluster order.
669a34c753fSRafael Auler   for (uint32_t ClusterIndex : ClusterOrder) {
670a34c753fSRafael Auler     ClusterAlgorithm::ClusterTy &Cluster = Clusters[ClusterIndex];
671a34c753fSRafael Auler     Order.insert(Order.end(), Cluster.begin(), Cluster.end());
672a34c753fSRafael Auler   }
673a34c753fSRafael Auler }
674a34c753fSRafael Auler 
reorderBasicBlocks(BinaryFunction & BF,BasicBlockOrder & Order) const675a34c753fSRafael Auler void OptimizeCacheReorderAlgorithm::reorderBasicBlocks(
676d5c03defSFabian Parzefall     BinaryFunction &BF, BasicBlockOrder &Order) const {
6778477bc67SFabian Parzefall   if (BF.getLayout().block_empty())
678a34c753fSRafael Auler     return;
679a34c753fSRafael Auler 
680a34c753fSRafael Auler   const uint64_t ColdThreshold =
6818477bc67SFabian Parzefall       opts::ColdThreshold *
6828477bc67SFabian Parzefall       (*BF.getLayout().block_begin())->getExecutionCount() / 1000;
683a34c753fSRafael Auler 
684a34c753fSRafael Auler   // Cluster basic blocks.
685a34c753fSRafael Auler   CAlgo->clusterBasicBlocks(BF);
686a34c753fSRafael Auler   std::vector<ClusterAlgorithm::ClusterTy> &Clusters = CAlgo->Clusters;
687a34c753fSRafael Auler 
688a34c753fSRafael Auler   // Compute clusters' average frequencies.
689a34c753fSRafael Auler   CAlgo->computeClusterAverageFrequency(BF.getBinaryContext());
690a34c753fSRafael Auler   std::vector<double> &AvgFreq = CAlgo->AvgFreq;
691a34c753fSRafael Auler 
692a34c753fSRafael Auler   if (opts::PrintClusters)
693a34c753fSRafael Auler     CAlgo->printClusters();
694a34c753fSRafael Auler 
695a34c753fSRafael Auler   // Cluster layout order
696a34c753fSRafael Auler   std::vector<uint32_t> ClusterOrder;
697a34c753fSRafael Auler 
698a34c753fSRafael Auler   // Order clusters based on average instruction execution frequency
699a34c753fSRafael Auler   for (uint32_t I = 0, E = Clusters.size(); I < E; ++I)
700a34c753fSRafael Auler     if (!Clusters[I].empty())
701a34c753fSRafael Auler       ClusterOrder.push_back(I);
702a34c753fSRafael Auler   // Don't reorder the first cluster, which contains the function entry point
70340c2e0faSMaksim Panchenko   std::stable_sort(
70440c2e0faSMaksim Panchenko       std::next(ClusterOrder.begin()), ClusterOrder.end(),
70540c2e0faSMaksim Panchenko       [&AvgFreq](uint32_t A, uint32_t B) { return AvgFreq[A] > AvgFreq[B]; });
706a34c753fSRafael Auler 
707a34c753fSRafael Auler   if (opts::PrintClusters) {
708a34c753fSRafael Auler     errs() << "New cluster order: ";
709a34c753fSRafael Auler     const char *Sep = "";
710a34c753fSRafael Auler     for (uint32_t O : ClusterOrder) {
711a34c753fSRafael Auler       errs() << Sep << O;
712a34c753fSRafael Auler       Sep = ", ";
713a34c753fSRafael Auler     }
714a34c753fSRafael Auler     errs() << '\n';
715a34c753fSRafael Auler   }
716a34c753fSRafael Auler 
717a34c753fSRafael Auler   // Arrange basic blocks according to cluster order.
718a34c753fSRafael Auler   for (uint32_t ClusterIndex : ClusterOrder) {
719a34c753fSRafael Auler     ClusterAlgorithm::ClusterTy &Cluster = Clusters[ClusterIndex];
720a34c753fSRafael Auler     Order.insert(Order.end(), Cluster.begin(), Cluster.end());
721a34c753fSRafael Auler     // Force zero execution count on clusters that do not meet the cut off
722a34c753fSRafael Auler     // specified by --cold-threshold.
723f92ab6afSAmir Ayupov     if (AvgFreq[ClusterIndex] < static_cast<double>(ColdThreshold))
724f92ab6afSAmir Ayupov       for (BinaryBasicBlock *BBPtr : Cluster)
725a34c753fSRafael Auler         BBPtr->setExecutionCount(0);
726a34c753fSRafael Auler   }
727a34c753fSRafael Auler }
728a34c753fSRafael Auler 
reorderBasicBlocks(BinaryFunction & BF,BasicBlockOrder & Order) const729d5c03defSFabian Parzefall void ReverseReorderAlgorithm::reorderBasicBlocks(BinaryFunction &BF,
73040c2e0faSMaksim Panchenko                                                  BasicBlockOrder &Order) const {
7318477bc67SFabian Parzefall   if (BF.getLayout().block_empty())
732a34c753fSRafael Auler     return;
733a34c753fSRafael Auler 
7348477bc67SFabian Parzefall   BinaryBasicBlock *FirstBB = *BF.getLayout().block_begin();
735a34c753fSRafael Auler   Order.push_back(FirstBB);
7368477bc67SFabian Parzefall   for (auto RLI = BF.getLayout().block_rbegin(); *RLI != FirstBB; ++RLI)
737a34c753fSRafael Auler     Order.push_back(*RLI);
738a34c753fSRafael Auler }
739a34c753fSRafael Auler 
reorderBasicBlocks(BinaryFunction & BF,BasicBlockOrder & Order) const740a34c753fSRafael Auler void RandomClusterReorderAlgorithm::reorderBasicBlocks(
741d5c03defSFabian Parzefall     BinaryFunction &BF, BasicBlockOrder &Order) const {
7428477bc67SFabian Parzefall   if (BF.getLayout().block_empty())
743a34c753fSRafael Auler     return;
744a34c753fSRafael Auler 
745a34c753fSRafael Auler   // Cluster basic blocks.
746a34c753fSRafael Auler   CAlgo->clusterBasicBlocks(BF);
747a34c753fSRafael Auler   std::vector<ClusterAlgorithm::ClusterTy> &Clusters = CAlgo->Clusters;
748a34c753fSRafael Auler 
749a34c753fSRafael Auler   if (opts::PrintClusters)
750a34c753fSRafael Auler     CAlgo->printClusters();
751a34c753fSRafael Auler 
752a34c753fSRafael Auler   // Cluster layout order
753a34c753fSRafael Auler   std::vector<uint32_t> ClusterOrder;
754a34c753fSRafael Auler 
755a34c753fSRafael Auler   // Order clusters based on average instruction execution frequency
756a34c753fSRafael Auler   for (uint32_t I = 0, E = Clusters.size(); I < E; ++I)
757a34c753fSRafael Auler     if (!Clusters[I].empty())
758a34c753fSRafael Auler       ClusterOrder.push_back(I);
759a34c753fSRafael Auler 
760a34c753fSRafael Auler   std::shuffle(std::next(ClusterOrder.begin()), ClusterOrder.end(),
761a34c753fSRafael Auler                std::default_random_engine(opts::RandomSeed.getValue()));
762a34c753fSRafael Auler 
763a34c753fSRafael Auler   if (opts::PrintClusters) {
764a34c753fSRafael Auler     errs() << "New cluster order: ";
765a34c753fSRafael Auler     const char *Sep = "";
766a34c753fSRafael Auler     for (uint32_t O : ClusterOrder) {
767a34c753fSRafael Auler       errs() << Sep << O;
768a34c753fSRafael Auler       Sep = ", ";
769a34c753fSRafael Auler     }
770a34c753fSRafael Auler     errs() << '\n';
771a34c753fSRafael Auler   }
772a34c753fSRafael Auler 
773a34c753fSRafael Auler   // Arrange basic blocks according to cluster order.
774a34c753fSRafael Auler   for (uint32_t ClusterIndex : ClusterOrder) {
775a34c753fSRafael Auler     ClusterAlgorithm::ClusterTy &Cluster = Clusters[ClusterIndex];
776a34c753fSRafael Auler     Order.insert(Order.end(), Cluster.begin(), Cluster.end());
777a34c753fSRafael Auler   }
778a34c753fSRafael Auler }
779