12f09f445SMaksim Panchenko //===- bolt/Passes/ReorderAlgorithm.cpp - Basic block reordering ----------===//
2a34c753fSRafael Auler //
3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information.
5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a34c753fSRafael Auler //
7a34c753fSRafael Auler //===----------------------------------------------------------------------===//
8a34c753fSRafael Auler //
92f09f445SMaksim Panchenko // This file implements classes used by several basic block reordering
102f09f445SMaksim Panchenko // algorithms.
11a34c753fSRafael Auler //
12a34c753fSRafael Auler //===----------------------------------------------------------------------===//
13a34c753fSRafael Auler
14a34c753fSRafael Auler #include "bolt/Passes/ReorderAlgorithm.h"
15a34c753fSRafael Auler #include "bolt/Core/BinaryBasicBlock.h"
16a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h"
17a34c753fSRafael Auler #include "llvm/Support/CommandLine.h"
18539b6c68Sspupyrev #include "llvm/Transforms/Utils/CodeLayout.h"
19a34c753fSRafael Auler #include <queue>
20a34c753fSRafael Auler #include <random>
21a34c753fSRafael Auler #include <stack>
22a34c753fSRafael Auler
23a34c753fSRafael Auler #undef DEBUG_TYPE
24a34c753fSRafael Auler #define DEBUG_TYPE "bolt"
25a34c753fSRafael Auler
26a34c753fSRafael Auler using namespace llvm;
27a34c753fSRafael Auler using namespace bolt;
28a34c753fSRafael Auler
29a34c753fSRafael Auler namespace opts {
30a34c753fSRafael Auler
31a34c753fSRafael Auler extern cl::OptionCategory BoltOptCategory;
32a34c753fSRafael Auler extern cl::opt<bool> NoThreads;
33a34c753fSRafael Auler
34a34c753fSRafael Auler static cl::opt<unsigned> ColdThreshold(
35a34c753fSRafael Auler "cold-threshold",
36a34c753fSRafael Auler cl::desc("tenths of percents of main entry frequency to use as a "
37a34c753fSRafael Auler "threshold when evaluating whether a basic block is cold "
38a34c753fSRafael Auler "(0 means it is only considered cold if the block has zero "
39a34c753fSRafael Auler "samples). Default: 0 "),
40a34c753fSRafael Auler cl::init(0), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
41a34c753fSRafael Auler
42b92436efSFangrui Song static cl::opt<bool> PrintClusters("print-clusters", cl::desc("print clusters"),
43b92436efSFangrui Song cl::Hidden, cl::cat(BoltOptCategory));
44a34c753fSRafael Auler
45b92436efSFangrui Song cl::opt<uint32_t> RandomSeed("bolt-seed", cl::desc("seed for randomization"),
46b92436efSFangrui Song cl::init(42), cl::Hidden,
47a34c753fSRafael Auler cl::cat(BoltOptCategory));
48a34c753fSRafael Auler
49a34c753fSRafael Auler } // namespace opts
50a34c753fSRafael Auler
51a34c753fSRafael Auler namespace {
52a34c753fSRafael Auler
hashCombine(size_t & Seed,const T & Val)5340c2e0faSMaksim Panchenko template <class T> inline void hashCombine(size_t &Seed, const T &Val) {
54a34c753fSRafael Auler std::hash<T> Hasher;
55a34c753fSRafael Auler Seed ^= Hasher(Val) + 0x9e3779b9 + (Seed << 6) + (Seed >> 2);
56a34c753fSRafael Auler }
57a34c753fSRafael Auler
5840c2e0faSMaksim Panchenko template <typename A, typename B> struct HashPair {
operator ()__anon9cbc67f10111::HashPair59a34c753fSRafael Auler size_t operator()(const std::pair<A, B> &Val) const {
60a34c753fSRafael Auler std::hash<A> Hasher;
61a34c753fSRafael Auler size_t Seed = Hasher(Val.first);
62a34c753fSRafael Auler hashCombine(Seed, Val.second);
63a34c753fSRafael Auler return Seed;
64a34c753fSRafael Auler }
65a34c753fSRafael Auler };
66a34c753fSRafael Auler
6740c2e0faSMaksim Panchenko } // namespace
68a34c753fSRafael Auler
computeClusterAverageFrequency(const BinaryContext & BC)69a34c753fSRafael Auler void ClusterAlgorithm::computeClusterAverageFrequency(const BinaryContext &BC) {
70a34c753fSRafael Auler // Create a separate MCCodeEmitter to allow lock-free execution
71a34c753fSRafael Auler BinaryContext::IndependentCodeEmitter Emitter;
72f92ab6afSAmir Ayupov if (!opts::NoThreads)
73a34c753fSRafael Auler Emitter = BC.createIndependentMCCodeEmitter();
74a34c753fSRafael Auler
75a34c753fSRafael Auler AvgFreq.resize(Clusters.size(), 0.0);
76a34c753fSRafael Auler for (uint32_t I = 0, E = Clusters.size(); I < E; ++I) {
77a34c753fSRafael Auler double Freq = 0.0;
78a34c753fSRafael Auler uint64_t ClusterSize = 0;
79d5c03defSFabian Parzefall for (const BinaryBasicBlock *BB : Clusters[I]) {
80a34c753fSRafael Auler if (BB->getNumNonPseudos() > 0) {
81a34c753fSRafael Auler Freq += BB->getExecutionCount();
82a34c753fSRafael Auler // Estimate the size of a block in bytes at run time
83a34c753fSRafael Auler // NOTE: This might be inaccurate
84a34c753fSRafael Auler ClusterSize += BB->estimateSize(Emitter.MCE.get());
85a34c753fSRafael Auler }
86a34c753fSRafael Auler }
87a34c753fSRafael Auler AvgFreq[I] = ClusterSize == 0 ? 0 : Freq / ClusterSize;
88a34c753fSRafael Auler }
89a34c753fSRafael Auler }
90a34c753fSRafael Auler
printClusters() const91a34c753fSRafael Auler void ClusterAlgorithm::printClusters() const {
92a34c753fSRafael Auler for (uint32_t I = 0, E = Clusters.size(); I < E; ++I) {
93a34c753fSRafael Auler errs() << "Cluster number " << I;
94a34c753fSRafael Auler if (AvgFreq.size() == Clusters.size())
95a34c753fSRafael Auler errs() << " (frequency: " << AvgFreq[I] << ")";
96a34c753fSRafael Auler errs() << " : ";
97a34c753fSRafael Auler const char *Sep = "";
98d5c03defSFabian Parzefall for (const BinaryBasicBlock *BB : Clusters[I]) {
99a34c753fSRafael Auler errs() << Sep << BB->getName();
100a34c753fSRafael Auler Sep = ", ";
101a34c753fSRafael Auler }
102a34c753fSRafael Auler errs() << "\n";
103a34c753fSRafael Auler }
104a34c753fSRafael Auler }
105a34c753fSRafael Auler
reset()106a34c753fSRafael Auler void ClusterAlgorithm::reset() {
107a34c753fSRafael Auler Clusters.clear();
108a34c753fSRafael Auler ClusterEdges.clear();
109a34c753fSRafael Auler AvgFreq.clear();
110a34c753fSRafael Auler }
111a34c753fSRafael Auler
print(raw_ostream & OS) const112a34c753fSRafael Auler void GreedyClusterAlgorithm::EdgeTy::print(raw_ostream &OS) const {
113a34c753fSRafael Auler OS << Src->getName() << " -> " << Dst->getName() << ", count: " << Count;
114a34c753fSRafael Auler }
115a34c753fSRafael Auler
operator ()(const EdgeTy & E) const116a34c753fSRafael Auler size_t GreedyClusterAlgorithm::EdgeHash::operator()(const EdgeTy &E) const {
117a34c753fSRafael Auler HashPair<const BinaryBasicBlock *, const BinaryBasicBlock *> Hasher;
118a34c753fSRafael Auler return Hasher(std::make_pair(E.Src, E.Dst));
119a34c753fSRafael Auler }
120a34c753fSRafael Auler
operator ()(const EdgeTy & A,const EdgeTy & B) const12140c2e0faSMaksim Panchenko bool GreedyClusterAlgorithm::EdgeEqual::operator()(const EdgeTy &A,
12240c2e0faSMaksim Panchenko const EdgeTy &B) const {
123a34c753fSRafael Auler return A.Src == B.Src && A.Dst == B.Dst;
124a34c753fSRafael Auler }
125a34c753fSRafael Auler
clusterBasicBlocks(BinaryFunction & BF,bool ComputeEdges)126d5c03defSFabian Parzefall void GreedyClusterAlgorithm::clusterBasicBlocks(BinaryFunction &BF,
127a34c753fSRafael Auler bool ComputeEdges) {
128a34c753fSRafael Auler reset();
129a34c753fSRafael Auler
130a34c753fSRafael Auler // Greedy heuristic implementation for the TSP, applied to BB layout. Try to
131a34c753fSRafael Auler // maximize weight during a path traversing all BBs. In this way, we will
132a34c753fSRafael Auler // convert the hottest branches into fall-throughs.
133a34c753fSRafael Auler
134a34c753fSRafael Auler // This is the queue of edges from which we will pop edges and use them to
135a34c753fSRafael Auler // cluster basic blocks in a greedy fashion.
136a34c753fSRafael Auler std::vector<EdgeTy> Queue;
137a34c753fSRafael Auler
138a34c753fSRafael Auler // Initialize inter-cluster weights.
139a34c753fSRafael Auler if (ComputeEdges)
1408477bc67SFabian Parzefall ClusterEdges.resize(BF.getLayout().block_size());
141a34c753fSRafael Auler
142a34c753fSRafael Auler // Initialize clusters and edge queue.
1438477bc67SFabian Parzefall for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
144a34c753fSRafael Auler // Create a cluster for this BB.
145a34c753fSRafael Auler uint32_t I = Clusters.size();
146a34c753fSRafael Auler Clusters.emplace_back();
147a34c753fSRafael Auler std::vector<BinaryBasicBlock *> &Cluster = Clusters.back();
148a34c753fSRafael Auler Cluster.push_back(BB);
149a34c753fSRafael Auler BBToClusterMap[BB] = I;
150a34c753fSRafael Auler // Populate priority queue with edges.
151a34c753fSRafael Auler auto BI = BB->branch_info_begin();
152d5c03defSFabian Parzefall for (const BinaryBasicBlock *I : BB->successors()) {
153a34c753fSRafael Auler assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
154a34c753fSRafael Auler "attempted reordering blocks of function with no profile data");
155a34c753fSRafael Auler Queue.emplace_back(EdgeTy(BB, I, BI->Count));
156a34c753fSRafael Auler ++BI;
157a34c753fSRafael Auler }
158a34c753fSRafael Auler }
159a34c753fSRafael Auler // Sort and adjust the edge queue.
160a34c753fSRafael Auler initQueue(Queue, BF);
161a34c753fSRafael Auler
162a34c753fSRafael Auler // Grow clusters in a greedy fashion.
163a34c753fSRafael Auler while (!Queue.empty()) {
164a34c753fSRafael Auler EdgeTy E = Queue.back();
165a34c753fSRafael Auler Queue.pop_back();
166a34c753fSRafael Auler
167a34c753fSRafael Auler const BinaryBasicBlock *SrcBB = E.Src;
168a34c753fSRafael Auler const BinaryBasicBlock *DstBB = E.Dst;
169a34c753fSRafael Auler
170a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "Popped edge "; E.print(dbgs()); dbgs() << "\n");
171a34c753fSRafael Auler
172a34c753fSRafael Auler // Case 1: BBSrc and BBDst are the same. Ignore this edge
1738477bc67SFabian Parzefall if (SrcBB == DstBB || DstBB == *BF.getLayout().block_begin()) {
174a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "\tIgnored (same src, dst)\n");
175a34c753fSRafael Auler continue;
176a34c753fSRafael Auler }
177a34c753fSRafael Auler
178a34c753fSRafael Auler int I = BBToClusterMap[SrcBB];
179a34c753fSRafael Auler int J = BBToClusterMap[DstBB];
180a34c753fSRafael Auler
181a34c753fSRafael Auler // Case 2: If they are already allocated at the same cluster, just increase
182a34c753fSRafael Auler // the weight of this cluster
183a34c753fSRafael Auler if (I == J) {
184a34c753fSRafael Auler if (ComputeEdges)
185a34c753fSRafael Auler ClusterEdges[I][I] += E.Count;
186a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "\tIgnored (src, dst belong to the same cluster)\n");
187a34c753fSRafael Auler continue;
188a34c753fSRafael Auler }
189a34c753fSRafael Auler
190a34c753fSRafael Auler std::vector<BinaryBasicBlock *> &ClusterA = Clusters[I];
191a34c753fSRafael Auler std::vector<BinaryBasicBlock *> &ClusterB = Clusters[J];
192a34c753fSRafael Auler if (areClustersCompatible(ClusterA, ClusterB, E)) {
193a34c753fSRafael Auler // Case 3: SrcBB is at the end of a cluster and DstBB is at the start,
194a34c753fSRafael Auler // allowing us to merge two clusters.
195d5c03defSFabian Parzefall for (const BinaryBasicBlock *BB : ClusterB)
196a34c753fSRafael Auler BBToClusterMap[BB] = I;
197a34c753fSRafael Auler ClusterA.insert(ClusterA.end(), ClusterB.begin(), ClusterB.end());
198a34c753fSRafael Auler ClusterB.clear();
199a34c753fSRafael Auler if (ComputeEdges) {
200a34c753fSRafael Auler // Increase the intra-cluster edge count of cluster A with the count of
201a34c753fSRafael Auler // this edge as well as with the total count of previously visited edges
202a34c753fSRafael Auler // from cluster B cluster A.
203a34c753fSRafael Auler ClusterEdges[I][I] += E.Count;
204a34c753fSRafael Auler ClusterEdges[I][I] += ClusterEdges[J][I];
205a34c753fSRafael Auler // Iterate through all inter-cluster edges and transfer edges targeting
206a34c753fSRafael Auler // cluster B to cluster A.
207a34c753fSRafael Auler for (uint32_t K = 0, E = ClusterEdges.size(); K != E; ++K)
208a34c753fSRafael Auler ClusterEdges[K][I] += ClusterEdges[K][J];
209a34c753fSRafael Auler }
210a34c753fSRafael Auler // Adjust the weights of the remaining edges and re-sort the queue.
211a34c753fSRafael Auler adjustQueue(Queue, BF);
212a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "\tMerged clusters of src, dst\n");
213a34c753fSRafael Auler } else {
214a34c753fSRafael Auler // Case 4: Both SrcBB and DstBB are allocated in positions we cannot
215a34c753fSRafael Auler // merge them. Add the count of this edge to the inter-cluster edge count
216a34c753fSRafael Auler // between clusters A and B to help us decide ordering between these
217a34c753fSRafael Auler // clusters.
218a34c753fSRafael Auler if (ComputeEdges)
219a34c753fSRafael Auler ClusterEdges[I][J] += E.Count;
220a34c753fSRafael Auler LLVM_DEBUG(
221a34c753fSRafael Auler dbgs() << "\tIgnored (src, dst belong to incompatible clusters)\n");
222a34c753fSRafael Auler }
223a34c753fSRafael Auler }
224a34c753fSRafael Auler }
225a34c753fSRafael Auler
reset()226a34c753fSRafael Auler void GreedyClusterAlgorithm::reset() {
227a34c753fSRafael Auler ClusterAlgorithm::reset();
228a34c753fSRafael Auler BBToClusterMap.clear();
229a34c753fSRafael Auler }
230a34c753fSRafael Auler
initQueue(std::vector<EdgeTy> & Queue,const BinaryFunction & BF)23140c2e0faSMaksim Panchenko void PHGreedyClusterAlgorithm::initQueue(std::vector<EdgeTy> &Queue,
23240c2e0faSMaksim Panchenko const BinaryFunction &BF) {
233a34c753fSRafael Auler // Define a comparison function to establish SWO between edges.
234a34c753fSRafael Auler auto Comp = [&BF](const EdgeTy &A, const EdgeTy &B) {
235a34c753fSRafael Auler // With equal weights, prioritize branches with lower index
236a34c753fSRafael Auler // source/destination. This helps to keep original block order for blocks
237a34c753fSRafael Auler // when optimal order cannot be deducted from a profile.
238a34c753fSRafael Auler if (A.Count == B.Count) {
239a34c753fSRafael Auler const signed SrcOrder = BF.getOriginalLayoutRelativeOrder(A.Src, B.Src);
240a34c753fSRafael Auler return (SrcOrder != 0)
241a34c753fSRafael Auler ? SrcOrder > 0
242a34c753fSRafael Auler : BF.getOriginalLayoutRelativeOrder(A.Dst, B.Dst) > 0;
243a34c753fSRafael Auler }
244a34c753fSRafael Auler return A.Count < B.Count;
245a34c753fSRafael Auler };
246a34c753fSRafael Auler
247a34c753fSRafael Auler // Sort edges in increasing profile count order.
248d2c87699SAmir Ayupov llvm::sort(Queue, Comp);
249a34c753fSRafael Auler }
250a34c753fSRafael Auler
adjustQueue(std::vector<EdgeTy> & Queue,const BinaryFunction & BF)25140c2e0faSMaksim Panchenko void PHGreedyClusterAlgorithm::adjustQueue(std::vector<EdgeTy> &Queue,
25240c2e0faSMaksim Panchenko const BinaryFunction &BF) {
253a34c753fSRafael Auler // Nothing to do.
254a34c753fSRafael Auler }
255a34c753fSRafael Auler
areClustersCompatible(const ClusterTy & Front,const ClusterTy & Back,const EdgeTy & E) const25640c2e0faSMaksim Panchenko bool PHGreedyClusterAlgorithm::areClustersCompatible(const ClusterTy &Front,
25740c2e0faSMaksim Panchenko const ClusterTy &Back,
25840c2e0faSMaksim Panchenko const EdgeTy &E) const {
259a34c753fSRafael Auler return Front.back() == E.Src && Back.front() == E.Dst;
260a34c753fSRafael Auler }
261a34c753fSRafael Auler
calculateWeight(const EdgeTy & E,const BinaryFunction & BF) const262a34c753fSRafael Auler int64_t MinBranchGreedyClusterAlgorithm::calculateWeight(
263a34c753fSRafael Auler const EdgeTy &E, const BinaryFunction &BF) const {
264a34c753fSRafael Auler const BinaryBasicBlock *SrcBB = E.Src;
265a34c753fSRafael Auler const BinaryBasicBlock *DstBB = E.Dst;
266a34c753fSRafael Auler
267a34c753fSRafael Auler // Initial weight value.
268a34c753fSRafael Auler int64_t W = (int64_t)E.Count;
269a34c753fSRafael Auler
270a34c753fSRafael Auler // Adjust the weight by taking into account other edges with the same source.
271a34c753fSRafael Auler auto BI = SrcBB->branch_info_begin();
272a34c753fSRafael Auler for (const BinaryBasicBlock *SuccBB : SrcBB->successors()) {
273a34c753fSRafael Auler assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
274a34c753fSRafael Auler "attempted reordering blocks of function with no profile data");
275a34c753fSRafael Auler assert(BI->Count <= std::numeric_limits<int64_t>::max() &&
276a34c753fSRafael Auler "overflow detected");
277a34c753fSRafael Auler // Ignore edges with same source and destination, edges that target the
278a34c753fSRafael Auler // entry block as well as the edge E itself.
2798477bc67SFabian Parzefall if (SuccBB != SrcBB && SuccBB != *BF.getLayout().block_begin() &&
2808477bc67SFabian Parzefall SuccBB != DstBB)
281a34c753fSRafael Auler W -= (int64_t)BI->Count;
282a34c753fSRafael Auler ++BI;
283a34c753fSRafael Auler }
284a34c753fSRafael Auler
285a34c753fSRafael Auler // Adjust the weight by taking into account other edges with the same
286a34c753fSRafael Auler // destination.
287a34c753fSRafael Auler for (const BinaryBasicBlock *PredBB : DstBB->predecessors()) {
288a34c753fSRafael Auler // Ignore edges with same source and destination as well as the edge E
289a34c753fSRafael Auler // itself.
290a34c753fSRafael Auler if (PredBB == DstBB || PredBB == SrcBB)
291a34c753fSRafael Auler continue;
292a34c753fSRafael Auler auto BI = PredBB->branch_info_begin();
293a34c753fSRafael Auler for (const BinaryBasicBlock *SuccBB : PredBB->successors()) {
294a34c753fSRafael Auler if (SuccBB == DstBB)
295a34c753fSRafael Auler break;
296a34c753fSRafael Auler ++BI;
297a34c753fSRafael Auler }
298a34c753fSRafael Auler assert(BI != PredBB->branch_info_end() && "invalid control flow graph");
299a34c753fSRafael Auler assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
300a34c753fSRafael Auler "attempted reordering blocks of function with no profile data");
301a34c753fSRafael Auler assert(BI->Count <= std::numeric_limits<int64_t>::max() &&
302a34c753fSRafael Auler "overflow detected");
303a34c753fSRafael Auler W -= (int64_t)BI->Count;
304a34c753fSRafael Auler }
305a34c753fSRafael Auler
306a34c753fSRafael Auler return W;
307a34c753fSRafael Auler }
308a34c753fSRafael Auler
initQueue(std::vector<EdgeTy> & Queue,const BinaryFunction & BF)30940c2e0faSMaksim Panchenko void MinBranchGreedyClusterAlgorithm::initQueue(std::vector<EdgeTy> &Queue,
31040c2e0faSMaksim Panchenko const BinaryFunction &BF) {
311a34c753fSRafael Auler // Initialize edge weights.
312a34c753fSRafael Auler for (const EdgeTy &E : Queue)
313a34c753fSRafael Auler Weight.emplace(std::make_pair(E, calculateWeight(E, BF)));
314a34c753fSRafael Auler
315a34c753fSRafael Auler // Sort edges in increasing weight order.
316a34c753fSRafael Auler adjustQueue(Queue, BF);
317a34c753fSRafael Auler }
318a34c753fSRafael Auler
adjustQueue(std::vector<EdgeTy> & Queue,const BinaryFunction & BF)31940c2e0faSMaksim Panchenko void MinBranchGreedyClusterAlgorithm::adjustQueue(std::vector<EdgeTy> &Queue,
32040c2e0faSMaksim Panchenko const BinaryFunction &BF) {
321a34c753fSRafael Auler // Define a comparison function to establish SWO between edges.
322a34c753fSRafael Auler auto Comp = [&](const EdgeTy &A, const EdgeTy &B) {
323a34c753fSRafael Auler // With equal weights, prioritize branches with lower index
324a34c753fSRafael Auler // source/destination. This helps to keep original block order for blocks
325a34c753fSRafael Auler // when optimal order cannot be deduced from a profile.
326a34c753fSRafael Auler if (Weight[A] == Weight[B]) {
327a34c753fSRafael Auler const signed SrcOrder = BF.getOriginalLayoutRelativeOrder(A.Src, B.Src);
328a34c753fSRafael Auler return (SrcOrder != 0)
329a34c753fSRafael Auler ? SrcOrder > 0
330a34c753fSRafael Auler : BF.getOriginalLayoutRelativeOrder(A.Dst, B.Dst) > 0;
331a34c753fSRafael Auler }
332a34c753fSRafael Auler return Weight[A] < Weight[B];
333a34c753fSRafael Auler };
334a34c753fSRafael Auler
335a34c753fSRafael Auler // Iterate through all remaining edges to find edges that have their
336a34c753fSRafael Auler // source and destination in the same cluster.
337a34c753fSRafael Auler std::vector<EdgeTy> NewQueue;
338a34c753fSRafael Auler for (const EdgeTy &E : Queue) {
339a34c753fSRafael Auler const BinaryBasicBlock *SrcBB = E.Src;
340a34c753fSRafael Auler const BinaryBasicBlock *DstBB = E.Dst;
341a34c753fSRafael Auler
342a34c753fSRafael Auler // Case 1: SrcBB and DstBB are the same or DstBB is the entry block. Ignore
343a34c753fSRafael Auler // this edge.
3448477bc67SFabian Parzefall if (SrcBB == DstBB || DstBB == *BF.getLayout().block_begin()) {
345a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "\tAdjustment: Ignored edge "; E.print(dbgs());
346a34c753fSRafael Auler dbgs() << " (same src, dst)\n");
347a34c753fSRafael Auler continue;
348a34c753fSRafael Auler }
349a34c753fSRafael Auler
350a34c753fSRafael Auler int I = BBToClusterMap[SrcBB];
351a34c753fSRafael Auler int J = BBToClusterMap[DstBB];
352a34c753fSRafael Auler std::vector<BinaryBasicBlock *> &ClusterA = Clusters[I];
353a34c753fSRafael Auler std::vector<BinaryBasicBlock *> &ClusterB = Clusters[J];
354a34c753fSRafael Auler
355a34c753fSRafael Auler // Case 2: They are already allocated at the same cluster or incompatible
356a34c753fSRafael Auler // clusters. Adjust the weights of edges with the same source or
357a34c753fSRafael Auler // destination, so that this edge has no effect on them any more, and ignore
358a34c753fSRafael Auler // this edge. Also increase the intra- (or inter-) cluster edge count.
359a34c753fSRafael Auler if (I == J || !areClustersCompatible(ClusterA, ClusterB, E)) {
360a34c753fSRafael Auler if (!ClusterEdges.empty())
361a34c753fSRafael Auler ClusterEdges[I][J] += E.Count;
362a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "\tAdjustment: Ignored edge "; E.print(dbgs());
363a34c753fSRafael Auler dbgs() << " (src, dst belong to same cluster or incompatible "
364a34c753fSRafael Auler "clusters)\n");
365a34c753fSRafael Auler for (const BinaryBasicBlock *SuccBB : SrcBB->successors()) {
366a34c753fSRafael Auler if (SuccBB == DstBB)
367a34c753fSRafael Auler continue;
368a34c753fSRafael Auler auto WI = Weight.find(EdgeTy(SrcBB, SuccBB, 0));
369a34c753fSRafael Auler assert(WI != Weight.end() && "CFG edge not found in Weight map");
370a34c753fSRafael Auler WI->second += (int64_t)E.Count;
371a34c753fSRafael Auler }
372a34c753fSRafael Auler for (const BinaryBasicBlock *PredBB : DstBB->predecessors()) {
373a34c753fSRafael Auler if (PredBB == SrcBB)
374a34c753fSRafael Auler continue;
375a34c753fSRafael Auler auto WI = Weight.find(EdgeTy(PredBB, DstBB, 0));
376a34c753fSRafael Auler assert(WI != Weight.end() && "CFG edge not found in Weight map");
377a34c753fSRafael Auler WI->second += (int64_t)E.Count;
378a34c753fSRafael Auler }
379a34c753fSRafael Auler continue;
380a34c753fSRafael Auler }
381a34c753fSRafael Auler
382a34c753fSRafael Auler // Case 3: None of the previous cases is true, so just keep this edge in
383a34c753fSRafael Auler // the queue.
384a34c753fSRafael Auler NewQueue.emplace_back(E);
385a34c753fSRafael Auler }
386a34c753fSRafael Auler
387a34c753fSRafael Auler // Sort remaining edges in increasing weight order.
388a34c753fSRafael Auler Queue.swap(NewQueue);
389d2c87699SAmir Ayupov llvm::sort(Queue, Comp);
390a34c753fSRafael Auler }
391a34c753fSRafael Auler
areClustersCompatible(const ClusterTy & Front,const ClusterTy & Back,const EdgeTy & E) const392a34c753fSRafael Auler bool MinBranchGreedyClusterAlgorithm::areClustersCompatible(
393a34c753fSRafael Auler const ClusterTy &Front, const ClusterTy &Back, const EdgeTy &E) const {
394a34c753fSRafael Auler return Front.back() == E.Src && Back.front() == E.Dst;
395a34c753fSRafael Auler }
396a34c753fSRafael Auler
reset()397a34c753fSRafael Auler void MinBranchGreedyClusterAlgorithm::reset() {
398a34c753fSRafael Auler GreedyClusterAlgorithm::reset();
399a34c753fSRafael Auler Weight.clear();
400a34c753fSRafael Auler }
401a34c753fSRafael Auler
reorderBasicBlocks(BinaryFunction & BF,BasicBlockOrder & Order) const402d5c03defSFabian Parzefall void TSPReorderAlgorithm::reorderBasicBlocks(BinaryFunction &BF,
40340c2e0faSMaksim Panchenko BasicBlockOrder &Order) const {
404a34c753fSRafael Auler std::vector<std::vector<uint64_t>> Weight;
405a34c753fSRafael Auler std::vector<BinaryBasicBlock *> IndexToBB;
406a34c753fSRafael Auler
4078477bc67SFabian Parzefall const size_t N = BF.getLayout().block_size();
408a34c753fSRafael Auler assert(N <= std::numeric_limits<uint64_t>::digits &&
409a34c753fSRafael Auler "cannot use TSP solution for sizes larger than bits in uint64_t");
410a34c753fSRafael Auler
411a34c753fSRafael Auler // Populating weight map and index map
4128477bc67SFabian Parzefall for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
413a34c753fSRafael Auler BB->setLayoutIndex(IndexToBB.size());
414a34c753fSRafael Auler IndexToBB.push_back(BB);
415a34c753fSRafael Auler }
416a34c753fSRafael Auler Weight.resize(N);
417d5c03defSFabian Parzefall for (const BinaryBasicBlock *BB : BF.getLayout().blocks()) {
418a34c753fSRafael Auler auto BI = BB->branch_info_begin();
419a34c753fSRafael Auler Weight[BB->getLayoutIndex()].resize(N);
420a34c753fSRafael Auler for (BinaryBasicBlock *SuccBB : BB->successors()) {
421a34c753fSRafael Auler if (BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE)
422a34c753fSRafael Auler Weight[BB->getLayoutIndex()][SuccBB->getLayoutIndex()] = BI->Count;
423a34c753fSRafael Auler ++BI;
424a34c753fSRafael Auler }
425a34c753fSRafael Auler }
426a34c753fSRafael Auler
427a34c753fSRafael Auler std::vector<std::vector<int64_t>> DP;
428*fa5486e4SHo Cheung DP.resize(static_cast<size_t>(1) << N);
429f92ab6afSAmir Ayupov for (std::vector<int64_t> &Elmt : DP)
430a34c753fSRafael Auler Elmt.resize(N, -1);
431f92ab6afSAmir Ayupov
432a34c753fSRafael Auler // Start with the entry basic block being allocated with cost zero
433a34c753fSRafael Auler DP[1][0] = 0;
434a34c753fSRafael Auler // Walk through TSP solutions using a bitmask to represent state (current set
435a34c753fSRafael Auler // of BBs in the layout)
436a34c753fSRafael Auler uint64_t BestSet = 1;
437a34c753fSRafael Auler uint64_t BestLast = 0;
438a34c753fSRafael Auler int64_t BestWeight = 0;
439a34c753fSRafael Auler for (uint64_t Set = 1; Set < (1ULL << N); ++Set) {
440a34c753fSRafael Auler // Traverse each possibility of Last BB visited in this layout
441a34c753fSRafael Auler for (uint64_t Last = 0; Last < N; ++Last) {
442a34c753fSRafael Auler // Case 1: There is no possible layout with this BB as Last
443a34c753fSRafael Auler if (DP[Set][Last] == -1)
444a34c753fSRafael Auler continue;
445a34c753fSRafael Auler
446a34c753fSRafael Auler // Case 2: There is a layout with this Set and this Last, and we try
447a34c753fSRafael Auler // to expand this set with New
448a34c753fSRafael Auler for (uint64_t New = 1; New < N; ++New) {
449a34c753fSRafael Auler // Case 2a: BB "New" is already in this Set
450a34c753fSRafael Auler if ((Set & (1ULL << New)) != 0)
451a34c753fSRafael Auler continue;
452a34c753fSRafael Auler
453a34c753fSRafael Auler // Case 2b: BB "New" is not in this set and we add it to this Set and
454a34c753fSRafael Auler // record total weight of this layout with "New" as the last BB.
455a34c753fSRafael Auler uint64_t NewSet = (Set | (1ULL << New));
456a34c753fSRafael Auler if (DP[NewSet][New] == -1)
457a34c753fSRafael Auler DP[NewSet][New] = DP[Set][Last] + (int64_t)Weight[Last][New];
458a34c753fSRafael Auler DP[NewSet][New] = std::max(DP[NewSet][New],
459a34c753fSRafael Auler DP[Set][Last] + (int64_t)Weight[Last][New]);
460a34c753fSRafael Auler
461a34c753fSRafael Auler if (DP[NewSet][New] > BestWeight) {
462a34c753fSRafael Auler BestWeight = DP[NewSet][New];
463a34c753fSRafael Auler BestSet = NewSet;
464a34c753fSRafael Auler BestLast = New;
465a34c753fSRafael Auler }
466a34c753fSRafael Auler }
467a34c753fSRafael Auler }
468a34c753fSRafael Auler }
469a34c753fSRafael Auler
470a34c753fSRafael Auler // Define final function layout based on layout that maximizes weight
471a34c753fSRafael Auler uint64_t Last = BestLast;
472a34c753fSRafael Auler uint64_t Set = BestSet;
47318bc405aSAmir Ayupov BitVector Visited;
474a34c753fSRafael Auler Visited.resize(N);
475a34c753fSRafael Auler Visited[Last] = true;
476a34c753fSRafael Auler Order.push_back(IndexToBB[Last]);
477a34c753fSRafael Auler Set = Set & ~(1ULL << Last);
478a34c753fSRafael Auler while (Set != 0) {
479a34c753fSRafael Auler int64_t Best = -1;
480a34c753fSRafael Auler uint64_t NewLast;
481a34c753fSRafael Auler for (uint64_t I = 0; I < N; ++I) {
482a34c753fSRafael Auler if (DP[Set][I] == -1)
483a34c753fSRafael Auler continue;
484a34c753fSRafael Auler int64_t AdjWeight = Weight[I][Last] > 0 ? Weight[I][Last] : 0;
485a34c753fSRafael Auler if (DP[Set][I] + AdjWeight > Best) {
486a34c753fSRafael Auler NewLast = I;
487a34c753fSRafael Auler Best = DP[Set][I] + AdjWeight;
488a34c753fSRafael Auler }
489a34c753fSRafael Auler }
490a34c753fSRafael Auler Last = NewLast;
491a34c753fSRafael Auler Visited[Last] = true;
492a34c753fSRafael Auler Order.push_back(IndexToBB[Last]);
493a34c753fSRafael Auler Set = Set & ~(1ULL << Last);
494a34c753fSRafael Auler }
495a34c753fSRafael Auler std::reverse(Order.begin(), Order.end());
496a34c753fSRafael Auler
497a34c753fSRafael Auler // Finalize layout with BBs that weren't assigned to the layout using the
498a34c753fSRafael Auler // input layout.
4998477bc67SFabian Parzefall for (BinaryBasicBlock *BB : BF.getLayout().blocks())
500a34c753fSRafael Auler if (Visited[BB->getLayoutIndex()] == false)
501a34c753fSRafael Auler Order.push_back(BB);
502a34c753fSRafael Auler }
503a34c753fSRafael Auler
reorderBasicBlocks(BinaryFunction & BF,BasicBlockOrder & Order) const504539b6c68Sspupyrev void ExtTSPReorderAlgorithm::reorderBasicBlocks(BinaryFunction &BF,
505539b6c68Sspupyrev BasicBlockOrder &Order) const {
506539b6c68Sspupyrev if (BF.getLayout().block_empty())
507539b6c68Sspupyrev return;
508539b6c68Sspupyrev
509539b6c68Sspupyrev // Do not change layout of functions w/o profile information
510539b6c68Sspupyrev if (!BF.hasValidProfile() || BF.getLayout().block_size() <= 2) {
511539b6c68Sspupyrev for (BinaryBasicBlock *BB : BF.getLayout().blocks())
512539b6c68Sspupyrev Order.push_back(BB);
513539b6c68Sspupyrev return;
514539b6c68Sspupyrev }
515539b6c68Sspupyrev
516539b6c68Sspupyrev // Create a separate MCCodeEmitter to allow lock-free execution
517539b6c68Sspupyrev BinaryContext::IndependentCodeEmitter Emitter;
518539b6c68Sspupyrev if (!opts::NoThreads)
519539b6c68Sspupyrev Emitter = BF.getBinaryContext().createIndependentMCCodeEmitter();
520539b6c68Sspupyrev
521539b6c68Sspupyrev // Initialize CFG nodes and their data
522539b6c68Sspupyrev std::vector<uint64_t> BlockSizes;
523539b6c68Sspupyrev std::vector<uint64_t> BlockCounts;
524539b6c68Sspupyrev BasicBlockOrder OrigOrder;
525539b6c68Sspupyrev BF.getLayout().updateLayoutIndices();
526539b6c68Sspupyrev for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
527539b6c68Sspupyrev uint64_t Size = std::max<uint64_t>(BB->estimateSize(Emitter.MCE.get()), 1);
528539b6c68Sspupyrev BlockSizes.push_back(Size);
529539b6c68Sspupyrev BlockCounts.push_back(BB->getKnownExecutionCount());
530539b6c68Sspupyrev OrigOrder.push_back(BB);
531539b6c68Sspupyrev }
532539b6c68Sspupyrev
533539b6c68Sspupyrev // Initialize CFG edges
5346b8d04c2SFangrui Song std::vector<codelayout::EdgeCount> JumpCounts;
535539b6c68Sspupyrev for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
536539b6c68Sspupyrev auto BI = BB->branch_info_begin();
537539b6c68Sspupyrev for (BinaryBasicBlock *SuccBB : BB->successors()) {
538539b6c68Sspupyrev assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
539539b6c68Sspupyrev "missing profile for a jump");
5406b8d04c2SFangrui Song JumpCounts.push_back(
5416b8d04c2SFangrui Song {BB->getLayoutIndex(), SuccBB->getLayoutIndex(), BI->Count});
542539b6c68Sspupyrev ++BI;
543539b6c68Sspupyrev }
544539b6c68Sspupyrev }
545539b6c68Sspupyrev
546539b6c68Sspupyrev // Run the layout algorithm
5476b8d04c2SFangrui Song auto Result =
5486b8d04c2SFangrui Song codelayout::computeExtTspLayout(BlockSizes, BlockCounts, JumpCounts);
549539b6c68Sspupyrev Order.reserve(BF.getLayout().block_size());
550539b6c68Sspupyrev for (uint64_t R : Result)
551539b6c68Sspupyrev Order.push_back(OrigOrder[R]);
552539b6c68Sspupyrev }
553539b6c68Sspupyrev
reorderBasicBlocks(BinaryFunction & BF,BasicBlockOrder & Order) const554a34c753fSRafael Auler void OptimizeReorderAlgorithm::reorderBasicBlocks(
555d5c03defSFabian Parzefall BinaryFunction &BF, BasicBlockOrder &Order) const {
5568477bc67SFabian Parzefall if (BF.getLayout().block_empty())
557a34c753fSRafael Auler return;
558a34c753fSRafael Auler
559a34c753fSRafael Auler // Cluster basic blocks.
560a34c753fSRafael Auler CAlgo->clusterBasicBlocks(BF);
561a34c753fSRafael Auler
562a34c753fSRafael Auler if (opts::PrintClusters)
563a34c753fSRafael Auler CAlgo->printClusters();
564a34c753fSRafael Auler
565a34c753fSRafael Auler // Arrange basic blocks according to clusters.
566a34c753fSRafael Auler for (ClusterAlgorithm::ClusterTy &Cluster : CAlgo->Clusters)
567a34c753fSRafael Auler Order.insert(Order.end(), Cluster.begin(), Cluster.end());
568a34c753fSRafael Auler }
569a34c753fSRafael Auler
reorderBasicBlocks(BinaryFunction & BF,BasicBlockOrder & Order) const570a34c753fSRafael Auler void OptimizeBranchReorderAlgorithm::reorderBasicBlocks(
571d5c03defSFabian Parzefall BinaryFunction &BF, BasicBlockOrder &Order) const {
5728477bc67SFabian Parzefall if (BF.getLayout().block_empty())
573a34c753fSRafael Auler return;
574a34c753fSRafael Auler
575a34c753fSRafael Auler // Cluster basic blocks.
576a34c753fSRafael Auler CAlgo->clusterBasicBlocks(BF, /* ComputeEdges = */ true);
577a34c753fSRafael Auler std::vector<ClusterAlgorithm::ClusterTy> &Clusters = CAlgo->Clusters;
578a34c753fSRafael Auler std::vector<std::unordered_map<uint32_t, uint64_t>> &ClusterEdges =
579a34c753fSRafael Auler CAlgo->ClusterEdges;
580a34c753fSRafael Auler
581a34c753fSRafael Auler // Compute clusters' average frequencies.
582a34c753fSRafael Auler CAlgo->computeClusterAverageFrequency(BF.getBinaryContext());
583a34c753fSRafael Auler std::vector<double> &AvgFreq = CAlgo->AvgFreq;
584a34c753fSRafael Auler
585a34c753fSRafael Auler if (opts::PrintClusters)
586a34c753fSRafael Auler CAlgo->printClusters();
587a34c753fSRafael Auler
588a34c753fSRafael Auler // Cluster layout order
589a34c753fSRafael Auler std::vector<uint32_t> ClusterOrder;
590a34c753fSRafael Auler
591a34c753fSRafael Auler // Do a topological sort for clusters, prioritizing frequently-executed BBs
592a34c753fSRafael Auler // during the traversal.
593a34c753fSRafael Auler std::stack<uint32_t> Stack;
594a34c753fSRafael Auler std::vector<uint32_t> Status;
595a34c753fSRafael Auler std::vector<uint32_t> Parent;
596a34c753fSRafael Auler Status.resize(Clusters.size(), 0);
597a34c753fSRafael Auler Parent.resize(Clusters.size(), 0);
598a34c753fSRafael Auler constexpr uint32_t STACKED = 1;
599a34c753fSRafael Auler constexpr uint32_t VISITED = 2;
600a34c753fSRafael Auler Status[0] = STACKED;
601a34c753fSRafael Auler Stack.push(0);
602a34c753fSRafael Auler while (!Stack.empty()) {
603a34c753fSRafael Auler uint32_t I = Stack.top();
604a34c753fSRafael Auler if (!(Status[I] & VISITED)) {
605a34c753fSRafael Auler Status[I] |= VISITED;
606a34c753fSRafael Auler // Order successors by weight
607a34c753fSRafael Auler auto ClusterComp = [&ClusterEdges, I](uint32_t A, uint32_t B) {
608a34c753fSRafael Auler return ClusterEdges[I][A] > ClusterEdges[I][B];
609a34c753fSRafael Auler };
610a34c753fSRafael Auler std::priority_queue<uint32_t, std::vector<uint32_t>,
61140c2e0faSMaksim Panchenko decltype(ClusterComp)>
61240c2e0faSMaksim Panchenko SuccQueue(ClusterComp);
613a34c753fSRafael Auler for (std::pair<const uint32_t, uint64_t> &Target : ClusterEdges[I]) {
614a34c753fSRafael Auler if (Target.second > 0 && !(Status[Target.first] & STACKED) &&
615a34c753fSRafael Auler !Clusters[Target.first].empty()) {
616a34c753fSRafael Auler Parent[Target.first] = I;
617a34c753fSRafael Auler Status[Target.first] = STACKED;
618a34c753fSRafael Auler SuccQueue.push(Target.first);
619a34c753fSRafael Auler }
620a34c753fSRafael Auler }
621a34c753fSRafael Auler while (!SuccQueue.empty()) {
622a34c753fSRafael Auler Stack.push(SuccQueue.top());
623a34c753fSRafael Auler SuccQueue.pop();
624a34c753fSRafael Auler }
625a34c753fSRafael Auler continue;
626a34c753fSRafael Auler }
627a34c753fSRafael Auler // Already visited this node
628a34c753fSRafael Auler Stack.pop();
629a34c753fSRafael Auler ClusterOrder.push_back(I);
630a34c753fSRafael Auler }
631a34c753fSRafael Auler std::reverse(ClusterOrder.begin(), ClusterOrder.end());
632a34c753fSRafael Auler // Put unreachable clusters at the end
633a34c753fSRafael Auler for (uint32_t I = 0, E = Clusters.size(); I < E; ++I)
634a34c753fSRafael Auler if (!(Status[I] & VISITED) && !Clusters[I].empty())
635a34c753fSRafael Auler ClusterOrder.push_back(I);
636a34c753fSRafael Auler
637a34c753fSRafael Auler // Sort nodes with equal precedence
638a34c753fSRafael Auler auto Beg = ClusterOrder.begin();
639a34c753fSRafael Auler // Don't reorder the first cluster, which contains the function entry point
640a34c753fSRafael Auler ++Beg;
641a34c753fSRafael Auler std::stable_sort(Beg, ClusterOrder.end(),
642a34c753fSRafael Auler [&AvgFreq, &Parent](uint32_t A, uint32_t B) {
643a34c753fSRafael Auler uint32_t P = Parent[A];
644a34c753fSRafael Auler while (Parent[P] != 0) {
645a34c753fSRafael Auler if (Parent[P] == B)
646a34c753fSRafael Auler return false;
647a34c753fSRafael Auler P = Parent[P];
648a34c753fSRafael Auler }
649a34c753fSRafael Auler P = Parent[B];
650a34c753fSRafael Auler while (Parent[P] != 0) {
651a34c753fSRafael Auler if (Parent[P] == A)
652a34c753fSRafael Auler return true;
653a34c753fSRafael Auler P = Parent[P];
654a34c753fSRafael Auler }
655a34c753fSRafael Auler return AvgFreq[A] > AvgFreq[B];
656a34c753fSRafael Auler });
657a34c753fSRafael Auler
658a34c753fSRafael Auler if (opts::PrintClusters) {
659a34c753fSRafael Auler errs() << "New cluster order: ";
660a34c753fSRafael Auler const char *Sep = "";
661a34c753fSRafael Auler for (uint32_t O : ClusterOrder) {
662a34c753fSRafael Auler errs() << Sep << O;
663a34c753fSRafael Auler Sep = ", ";
664a34c753fSRafael Auler }
665a34c753fSRafael Auler errs() << '\n';
666a34c753fSRafael Auler }
667a34c753fSRafael Auler
668a34c753fSRafael Auler // Arrange basic blocks according to cluster order.
669a34c753fSRafael Auler for (uint32_t ClusterIndex : ClusterOrder) {
670a34c753fSRafael Auler ClusterAlgorithm::ClusterTy &Cluster = Clusters[ClusterIndex];
671a34c753fSRafael Auler Order.insert(Order.end(), Cluster.begin(), Cluster.end());
672a34c753fSRafael Auler }
673a34c753fSRafael Auler }
674a34c753fSRafael Auler
reorderBasicBlocks(BinaryFunction & BF,BasicBlockOrder & Order) const675a34c753fSRafael Auler void OptimizeCacheReorderAlgorithm::reorderBasicBlocks(
676d5c03defSFabian Parzefall BinaryFunction &BF, BasicBlockOrder &Order) const {
6778477bc67SFabian Parzefall if (BF.getLayout().block_empty())
678a34c753fSRafael Auler return;
679a34c753fSRafael Auler
680a34c753fSRafael Auler const uint64_t ColdThreshold =
6818477bc67SFabian Parzefall opts::ColdThreshold *
6828477bc67SFabian Parzefall (*BF.getLayout().block_begin())->getExecutionCount() / 1000;
683a34c753fSRafael Auler
684a34c753fSRafael Auler // Cluster basic blocks.
685a34c753fSRafael Auler CAlgo->clusterBasicBlocks(BF);
686a34c753fSRafael Auler std::vector<ClusterAlgorithm::ClusterTy> &Clusters = CAlgo->Clusters;
687a34c753fSRafael Auler
688a34c753fSRafael Auler // Compute clusters' average frequencies.
689a34c753fSRafael Auler CAlgo->computeClusterAverageFrequency(BF.getBinaryContext());
690a34c753fSRafael Auler std::vector<double> &AvgFreq = CAlgo->AvgFreq;
691a34c753fSRafael Auler
692a34c753fSRafael Auler if (opts::PrintClusters)
693a34c753fSRafael Auler CAlgo->printClusters();
694a34c753fSRafael Auler
695a34c753fSRafael Auler // Cluster layout order
696a34c753fSRafael Auler std::vector<uint32_t> ClusterOrder;
697a34c753fSRafael Auler
698a34c753fSRafael Auler // Order clusters based on average instruction execution frequency
699a34c753fSRafael Auler for (uint32_t I = 0, E = Clusters.size(); I < E; ++I)
700a34c753fSRafael Auler if (!Clusters[I].empty())
701a34c753fSRafael Auler ClusterOrder.push_back(I);
702a34c753fSRafael Auler // Don't reorder the first cluster, which contains the function entry point
70340c2e0faSMaksim Panchenko std::stable_sort(
70440c2e0faSMaksim Panchenko std::next(ClusterOrder.begin()), ClusterOrder.end(),
70540c2e0faSMaksim Panchenko [&AvgFreq](uint32_t A, uint32_t B) { return AvgFreq[A] > AvgFreq[B]; });
706a34c753fSRafael Auler
707a34c753fSRafael Auler if (opts::PrintClusters) {
708a34c753fSRafael Auler errs() << "New cluster order: ";
709a34c753fSRafael Auler const char *Sep = "";
710a34c753fSRafael Auler for (uint32_t O : ClusterOrder) {
711a34c753fSRafael Auler errs() << Sep << O;
712a34c753fSRafael Auler Sep = ", ";
713a34c753fSRafael Auler }
714a34c753fSRafael Auler errs() << '\n';
715a34c753fSRafael Auler }
716a34c753fSRafael Auler
717a34c753fSRafael Auler // Arrange basic blocks according to cluster order.
718a34c753fSRafael Auler for (uint32_t ClusterIndex : ClusterOrder) {
719a34c753fSRafael Auler ClusterAlgorithm::ClusterTy &Cluster = Clusters[ClusterIndex];
720a34c753fSRafael Auler Order.insert(Order.end(), Cluster.begin(), Cluster.end());
721a34c753fSRafael Auler // Force zero execution count on clusters that do not meet the cut off
722a34c753fSRafael Auler // specified by --cold-threshold.
723f92ab6afSAmir Ayupov if (AvgFreq[ClusterIndex] < static_cast<double>(ColdThreshold))
724f92ab6afSAmir Ayupov for (BinaryBasicBlock *BBPtr : Cluster)
725a34c753fSRafael Auler BBPtr->setExecutionCount(0);
726a34c753fSRafael Auler }
727a34c753fSRafael Auler }
728a34c753fSRafael Auler
reorderBasicBlocks(BinaryFunction & BF,BasicBlockOrder & Order) const729d5c03defSFabian Parzefall void ReverseReorderAlgorithm::reorderBasicBlocks(BinaryFunction &BF,
73040c2e0faSMaksim Panchenko BasicBlockOrder &Order) const {
7318477bc67SFabian Parzefall if (BF.getLayout().block_empty())
732a34c753fSRafael Auler return;
733a34c753fSRafael Auler
7348477bc67SFabian Parzefall BinaryBasicBlock *FirstBB = *BF.getLayout().block_begin();
735a34c753fSRafael Auler Order.push_back(FirstBB);
7368477bc67SFabian Parzefall for (auto RLI = BF.getLayout().block_rbegin(); *RLI != FirstBB; ++RLI)
737a34c753fSRafael Auler Order.push_back(*RLI);
738a34c753fSRafael Auler }
739a34c753fSRafael Auler
reorderBasicBlocks(BinaryFunction & BF,BasicBlockOrder & Order) const740a34c753fSRafael Auler void RandomClusterReorderAlgorithm::reorderBasicBlocks(
741d5c03defSFabian Parzefall BinaryFunction &BF, BasicBlockOrder &Order) const {
7428477bc67SFabian Parzefall if (BF.getLayout().block_empty())
743a34c753fSRafael Auler return;
744a34c753fSRafael Auler
745a34c753fSRafael Auler // Cluster basic blocks.
746a34c753fSRafael Auler CAlgo->clusterBasicBlocks(BF);
747a34c753fSRafael Auler std::vector<ClusterAlgorithm::ClusterTy> &Clusters = CAlgo->Clusters;
748a34c753fSRafael Auler
749a34c753fSRafael Auler if (opts::PrintClusters)
750a34c753fSRafael Auler CAlgo->printClusters();
751a34c753fSRafael Auler
752a34c753fSRafael Auler // Cluster layout order
753a34c753fSRafael Auler std::vector<uint32_t> ClusterOrder;
754a34c753fSRafael Auler
755a34c753fSRafael Auler // Order clusters based on average instruction execution frequency
756a34c753fSRafael Auler for (uint32_t I = 0, E = Clusters.size(); I < E; ++I)
757a34c753fSRafael Auler if (!Clusters[I].empty())
758a34c753fSRafael Auler ClusterOrder.push_back(I);
759a34c753fSRafael Auler
760a34c753fSRafael Auler std::shuffle(std::next(ClusterOrder.begin()), ClusterOrder.end(),
761a34c753fSRafael Auler std::default_random_engine(opts::RandomSeed.getValue()));
762a34c753fSRafael Auler
763a34c753fSRafael Auler if (opts::PrintClusters) {
764a34c753fSRafael Auler errs() << "New cluster order: ";
765a34c753fSRafael Auler const char *Sep = "";
766a34c753fSRafael Auler for (uint32_t O : ClusterOrder) {
767a34c753fSRafael Auler errs() << Sep << O;
768a34c753fSRafael Auler Sep = ", ";
769a34c753fSRafael Auler }
770a34c753fSRafael Auler errs() << '\n';
771a34c753fSRafael Auler }
772a34c753fSRafael Auler
773a34c753fSRafael Auler // Arrange basic blocks according to cluster order.
774a34c753fSRafael Auler for (uint32_t ClusterIndex : ClusterOrder) {
775a34c753fSRafael Auler ClusterAlgorithm::ClusterTy &Cluster = Clusters[ClusterIndex];
776a34c753fSRafael Auler Order.insert(Order.end(), Cluster.begin(), Cluster.end());
777a34c753fSRafael Auler }
778a34c753fSRafael Auler }
779