//===- bolt/Passes/ReorderAlgorithm.cpp - Basic block reordering ----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements classes used by several basic block reordering // algorithms. // //===----------------------------------------------------------------------===// #include "bolt/Passes/ReorderAlgorithm.h" #include "bolt/Core/BinaryBasicBlock.h" #include "bolt/Core/BinaryFunction.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/CodeLayout.h" #include #include #include #undef DEBUG_TYPE #define DEBUG_TYPE "bolt" using namespace llvm; using namespace bolt; namespace opts { extern cl::OptionCategory BoltOptCategory; extern cl::opt NoThreads; static cl::opt ColdThreshold( "cold-threshold", cl::desc("tenths of percents of main entry frequency to use as a " "threshold when evaluating whether a basic block is cold " "(0 means it is only considered cold if the block has zero " "samples). Default: 0 "), cl::init(0), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt PrintClusters("print-clusters", cl::desc("print clusters"), cl::Hidden, cl::cat(BoltOptCategory)); cl::opt RandomSeed("bolt-seed", cl::desc("seed for randomization"), cl::init(42), cl::Hidden, cl::cat(BoltOptCategory)); } // namespace opts namespace { template inline void hashCombine(size_t &Seed, const T &Val) { std::hash Hasher; Seed ^= Hasher(Val) + 0x9e3779b9 + (Seed << 6) + (Seed >> 2); } template struct HashPair { size_t operator()(const std::pair &Val) const { std::hash Hasher; size_t Seed = Hasher(Val.first); hashCombine(Seed, Val.second); return Seed; } }; } // namespace void ClusterAlgorithm::computeClusterAverageFrequency(const BinaryContext &BC) { // Create a separate MCCodeEmitter to allow lock-free execution BinaryContext::IndependentCodeEmitter Emitter; if (!opts::NoThreads) Emitter = BC.createIndependentMCCodeEmitter(); AvgFreq.resize(Clusters.size(), 0.0); for (uint32_t I = 0, E = Clusters.size(); I < E; ++I) { double Freq = 0.0; uint64_t ClusterSize = 0; for (const BinaryBasicBlock *BB : Clusters[I]) { if (BB->getNumNonPseudos() > 0) { Freq += BB->getExecutionCount(); // Estimate the size of a block in bytes at run time // NOTE: This might be inaccurate ClusterSize += BB->estimateSize(Emitter.MCE.get()); } } AvgFreq[I] = ClusterSize == 0 ? 0 : Freq / ClusterSize; } } void ClusterAlgorithm::printClusters() const { for (uint32_t I = 0, E = Clusters.size(); I < E; ++I) { errs() << "Cluster number " << I; if (AvgFreq.size() == Clusters.size()) errs() << " (frequency: " << AvgFreq[I] << ")"; errs() << " : "; const char *Sep = ""; for (const BinaryBasicBlock *BB : Clusters[I]) { errs() << Sep << BB->getName(); Sep = ", "; } errs() << "\n"; } } void ClusterAlgorithm::reset() { Clusters.clear(); ClusterEdges.clear(); AvgFreq.clear(); } void GreedyClusterAlgorithm::EdgeTy::print(raw_ostream &OS) const { OS << Src->getName() << " -> " << Dst->getName() << ", count: " << Count; } size_t GreedyClusterAlgorithm::EdgeHash::operator()(const EdgeTy &E) const { HashPair Hasher; return Hasher(std::make_pair(E.Src, E.Dst)); } bool GreedyClusterAlgorithm::EdgeEqual::operator()(const EdgeTy &A, const EdgeTy &B) const { return A.Src == B.Src && A.Dst == B.Dst; } void GreedyClusterAlgorithm::clusterBasicBlocks(BinaryFunction &BF, bool ComputeEdges) { reset(); // Greedy heuristic implementation for the TSP, applied to BB layout. Try to // maximize weight during a path traversing all BBs. In this way, we will // convert the hottest branches into fall-throughs. // This is the queue of edges from which we will pop edges and use them to // cluster basic blocks in a greedy fashion. std::vector Queue; // Initialize inter-cluster weights. if (ComputeEdges) ClusterEdges.resize(BF.getLayout().block_size()); // Initialize clusters and edge queue. for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { // Create a cluster for this BB. uint32_t I = Clusters.size(); Clusters.emplace_back(); std::vector &Cluster = Clusters.back(); Cluster.push_back(BB); BBToClusterMap[BB] = I; // Populate priority queue with edges. auto BI = BB->branch_info_begin(); for (const BinaryBasicBlock *I : BB->successors()) { assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && "attempted reordering blocks of function with no profile data"); Queue.emplace_back(EdgeTy(BB, I, BI->Count)); ++BI; } } // Sort and adjust the edge queue. initQueue(Queue, BF); // Grow clusters in a greedy fashion. while (!Queue.empty()) { EdgeTy E = Queue.back(); Queue.pop_back(); const BinaryBasicBlock *SrcBB = E.Src; const BinaryBasicBlock *DstBB = E.Dst; LLVM_DEBUG(dbgs() << "Popped edge "; E.print(dbgs()); dbgs() << "\n"); // Case 1: BBSrc and BBDst are the same. Ignore this edge if (SrcBB == DstBB || DstBB == *BF.getLayout().block_begin()) { LLVM_DEBUG(dbgs() << "\tIgnored (same src, dst)\n"); continue; } int I = BBToClusterMap[SrcBB]; int J = BBToClusterMap[DstBB]; // Case 2: If they are already allocated at the same cluster, just increase // the weight of this cluster if (I == J) { if (ComputeEdges) ClusterEdges[I][I] += E.Count; LLVM_DEBUG(dbgs() << "\tIgnored (src, dst belong to the same cluster)\n"); continue; } std::vector &ClusterA = Clusters[I]; std::vector &ClusterB = Clusters[J]; if (areClustersCompatible(ClusterA, ClusterB, E)) { // Case 3: SrcBB is at the end of a cluster and DstBB is at the start, // allowing us to merge two clusters. for (const BinaryBasicBlock *BB : ClusterB) BBToClusterMap[BB] = I; ClusterA.insert(ClusterA.end(), ClusterB.begin(), ClusterB.end()); ClusterB.clear(); if (ComputeEdges) { // Increase the intra-cluster edge count of cluster A with the count of // this edge as well as with the total count of previously visited edges // from cluster B cluster A. ClusterEdges[I][I] += E.Count; ClusterEdges[I][I] += ClusterEdges[J][I]; // Iterate through all inter-cluster edges and transfer edges targeting // cluster B to cluster A. for (uint32_t K = 0, E = ClusterEdges.size(); K != E; ++K) ClusterEdges[K][I] += ClusterEdges[K][J]; } // Adjust the weights of the remaining edges and re-sort the queue. adjustQueue(Queue, BF); LLVM_DEBUG(dbgs() << "\tMerged clusters of src, dst\n"); } else { // Case 4: Both SrcBB and DstBB are allocated in positions we cannot // merge them. Add the count of this edge to the inter-cluster edge count // between clusters A and B to help us decide ordering between these // clusters. if (ComputeEdges) ClusterEdges[I][J] += E.Count; LLVM_DEBUG( dbgs() << "\tIgnored (src, dst belong to incompatible clusters)\n"); } } } void GreedyClusterAlgorithm::reset() { ClusterAlgorithm::reset(); BBToClusterMap.clear(); } void PHGreedyClusterAlgorithm::initQueue(std::vector &Queue, const BinaryFunction &BF) { // Define a comparison function to establish SWO between edges. auto Comp = [&BF](const EdgeTy &A, const EdgeTy &B) { // With equal weights, prioritize branches with lower index // source/destination. This helps to keep original block order for blocks // when optimal order cannot be deducted from a profile. if (A.Count == B.Count) { const signed SrcOrder = BF.getOriginalLayoutRelativeOrder(A.Src, B.Src); return (SrcOrder != 0) ? SrcOrder > 0 : BF.getOriginalLayoutRelativeOrder(A.Dst, B.Dst) > 0; } return A.Count < B.Count; }; // Sort edges in increasing profile count order. llvm::sort(Queue, Comp); } void PHGreedyClusterAlgorithm::adjustQueue(std::vector &Queue, const BinaryFunction &BF) { // Nothing to do. } bool PHGreedyClusterAlgorithm::areClustersCompatible(const ClusterTy &Front, const ClusterTy &Back, const EdgeTy &E) const { return Front.back() == E.Src && Back.front() == E.Dst; } int64_t MinBranchGreedyClusterAlgorithm::calculateWeight( const EdgeTy &E, const BinaryFunction &BF) const { const BinaryBasicBlock *SrcBB = E.Src; const BinaryBasicBlock *DstBB = E.Dst; // Initial weight value. int64_t W = (int64_t)E.Count; // Adjust the weight by taking into account other edges with the same source. auto BI = SrcBB->branch_info_begin(); for (const BinaryBasicBlock *SuccBB : SrcBB->successors()) { assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && "attempted reordering blocks of function with no profile data"); assert(BI->Count <= std::numeric_limits::max() && "overflow detected"); // Ignore edges with same source and destination, edges that target the // entry block as well as the edge E itself. if (SuccBB != SrcBB && SuccBB != *BF.getLayout().block_begin() && SuccBB != DstBB) W -= (int64_t)BI->Count; ++BI; } // Adjust the weight by taking into account other edges with the same // destination. for (const BinaryBasicBlock *PredBB : DstBB->predecessors()) { // Ignore edges with same source and destination as well as the edge E // itself. if (PredBB == DstBB || PredBB == SrcBB) continue; auto BI = PredBB->branch_info_begin(); for (const BinaryBasicBlock *SuccBB : PredBB->successors()) { if (SuccBB == DstBB) break; ++BI; } assert(BI != PredBB->branch_info_end() && "invalid control flow graph"); assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && "attempted reordering blocks of function with no profile data"); assert(BI->Count <= std::numeric_limits::max() && "overflow detected"); W -= (int64_t)BI->Count; } return W; } void MinBranchGreedyClusterAlgorithm::initQueue(std::vector &Queue, const BinaryFunction &BF) { // Initialize edge weights. for (const EdgeTy &E : Queue) Weight.emplace(std::make_pair(E, calculateWeight(E, BF))); // Sort edges in increasing weight order. adjustQueue(Queue, BF); } void MinBranchGreedyClusterAlgorithm::adjustQueue(std::vector &Queue, const BinaryFunction &BF) { // Define a comparison function to establish SWO between edges. auto Comp = [&](const EdgeTy &A, const EdgeTy &B) { // With equal weights, prioritize branches with lower index // source/destination. This helps to keep original block order for blocks // when optimal order cannot be deduced from a profile. if (Weight[A] == Weight[B]) { const signed SrcOrder = BF.getOriginalLayoutRelativeOrder(A.Src, B.Src); return (SrcOrder != 0) ? SrcOrder > 0 : BF.getOriginalLayoutRelativeOrder(A.Dst, B.Dst) > 0; } return Weight[A] < Weight[B]; }; // Iterate through all remaining edges to find edges that have their // source and destination in the same cluster. std::vector NewQueue; for (const EdgeTy &E : Queue) { const BinaryBasicBlock *SrcBB = E.Src; const BinaryBasicBlock *DstBB = E.Dst; // Case 1: SrcBB and DstBB are the same or DstBB is the entry block. Ignore // this edge. if (SrcBB == DstBB || DstBB == *BF.getLayout().block_begin()) { LLVM_DEBUG(dbgs() << "\tAdjustment: Ignored edge "; E.print(dbgs()); dbgs() << " (same src, dst)\n"); continue; } int I = BBToClusterMap[SrcBB]; int J = BBToClusterMap[DstBB]; std::vector &ClusterA = Clusters[I]; std::vector &ClusterB = Clusters[J]; // Case 2: They are already allocated at the same cluster or incompatible // clusters. Adjust the weights of edges with the same source or // destination, so that this edge has no effect on them any more, and ignore // this edge. Also increase the intra- (or inter-) cluster edge count. if (I == J || !areClustersCompatible(ClusterA, ClusterB, E)) { if (!ClusterEdges.empty()) ClusterEdges[I][J] += E.Count; LLVM_DEBUG(dbgs() << "\tAdjustment: Ignored edge "; E.print(dbgs()); dbgs() << " (src, dst belong to same cluster or incompatible " "clusters)\n"); for (const BinaryBasicBlock *SuccBB : SrcBB->successors()) { if (SuccBB == DstBB) continue; auto WI = Weight.find(EdgeTy(SrcBB, SuccBB, 0)); assert(WI != Weight.end() && "CFG edge not found in Weight map"); WI->second += (int64_t)E.Count; } for (const BinaryBasicBlock *PredBB : DstBB->predecessors()) { if (PredBB == SrcBB) continue; auto WI = Weight.find(EdgeTy(PredBB, DstBB, 0)); assert(WI != Weight.end() && "CFG edge not found in Weight map"); WI->second += (int64_t)E.Count; } continue; } // Case 3: None of the previous cases is true, so just keep this edge in // the queue. NewQueue.emplace_back(E); } // Sort remaining edges in increasing weight order. Queue.swap(NewQueue); llvm::sort(Queue, Comp); } bool MinBranchGreedyClusterAlgorithm::areClustersCompatible( const ClusterTy &Front, const ClusterTy &Back, const EdgeTy &E) const { return Front.back() == E.Src && Back.front() == E.Dst; } void MinBranchGreedyClusterAlgorithm::reset() { GreedyClusterAlgorithm::reset(); Weight.clear(); } void TSPReorderAlgorithm::reorderBasicBlocks(BinaryFunction &BF, BasicBlockOrder &Order) const { std::vector> Weight; std::vector IndexToBB; const size_t N = BF.getLayout().block_size(); assert(N <= std::numeric_limits::digits && "cannot use TSP solution for sizes larger than bits in uint64_t"); // Populating weight map and index map for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { BB->setLayoutIndex(IndexToBB.size()); IndexToBB.push_back(BB); } Weight.resize(N); for (const BinaryBasicBlock *BB : BF.getLayout().blocks()) { auto BI = BB->branch_info_begin(); Weight[BB->getLayoutIndex()].resize(N); for (BinaryBasicBlock *SuccBB : BB->successors()) { if (BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE) Weight[BB->getLayoutIndex()][SuccBB->getLayoutIndex()] = BI->Count; ++BI; } } std::vector> DP; DP.resize(static_cast(1) << N); for (std::vector &Elmt : DP) Elmt.resize(N, -1); // Start with the entry basic block being allocated with cost zero DP[1][0] = 0; // Walk through TSP solutions using a bitmask to represent state (current set // of BBs in the layout) uint64_t BestSet = 1; uint64_t BestLast = 0; int64_t BestWeight = 0; for (uint64_t Set = 1; Set < (1ULL << N); ++Set) { // Traverse each possibility of Last BB visited in this layout for (uint64_t Last = 0; Last < N; ++Last) { // Case 1: There is no possible layout with this BB as Last if (DP[Set][Last] == -1) continue; // Case 2: There is a layout with this Set and this Last, and we try // to expand this set with New for (uint64_t New = 1; New < N; ++New) { // Case 2a: BB "New" is already in this Set if ((Set & (1ULL << New)) != 0) continue; // Case 2b: BB "New" is not in this set and we add it to this Set and // record total weight of this layout with "New" as the last BB. uint64_t NewSet = (Set | (1ULL << New)); if (DP[NewSet][New] == -1) DP[NewSet][New] = DP[Set][Last] + (int64_t)Weight[Last][New]; DP[NewSet][New] = std::max(DP[NewSet][New], DP[Set][Last] + (int64_t)Weight[Last][New]); if (DP[NewSet][New] > BestWeight) { BestWeight = DP[NewSet][New]; BestSet = NewSet; BestLast = New; } } } } // Define final function layout based on layout that maximizes weight uint64_t Last = BestLast; uint64_t Set = BestSet; BitVector Visited; Visited.resize(N); Visited[Last] = true; Order.push_back(IndexToBB[Last]); Set = Set & ~(1ULL << Last); while (Set != 0) { int64_t Best = -1; uint64_t NewLast; for (uint64_t I = 0; I < N; ++I) { if (DP[Set][I] == -1) continue; int64_t AdjWeight = Weight[I][Last] > 0 ? Weight[I][Last] : 0; if (DP[Set][I] + AdjWeight > Best) { NewLast = I; Best = DP[Set][I] + AdjWeight; } } Last = NewLast; Visited[Last] = true; Order.push_back(IndexToBB[Last]); Set = Set & ~(1ULL << Last); } std::reverse(Order.begin(), Order.end()); // Finalize layout with BBs that weren't assigned to the layout using the // input layout. for (BinaryBasicBlock *BB : BF.getLayout().blocks()) if (Visited[BB->getLayoutIndex()] == false) Order.push_back(BB); } void ExtTSPReorderAlgorithm::reorderBasicBlocks(BinaryFunction &BF, BasicBlockOrder &Order) const { if (BF.getLayout().block_empty()) return; // Do not change layout of functions w/o profile information if (!BF.hasValidProfile() || BF.getLayout().block_size() <= 2) { for (BinaryBasicBlock *BB : BF.getLayout().blocks()) Order.push_back(BB); return; } // Create a separate MCCodeEmitter to allow lock-free execution BinaryContext::IndependentCodeEmitter Emitter; if (!opts::NoThreads) Emitter = BF.getBinaryContext().createIndependentMCCodeEmitter(); // Initialize CFG nodes and their data std::vector BlockSizes; std::vector BlockCounts; BasicBlockOrder OrigOrder; BF.getLayout().updateLayoutIndices(); for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { uint64_t Size = std::max(BB->estimateSize(Emitter.MCE.get()), 1); BlockSizes.push_back(Size); BlockCounts.push_back(BB->getKnownExecutionCount()); OrigOrder.push_back(BB); } // Initialize CFG edges std::vector JumpCounts; for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { auto BI = BB->branch_info_begin(); for (BinaryBasicBlock *SuccBB : BB->successors()) { assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && "missing profile for a jump"); JumpCounts.push_back( {BB->getLayoutIndex(), SuccBB->getLayoutIndex(), BI->Count}); ++BI; } } // Run the layout algorithm auto Result = codelayout::computeExtTspLayout(BlockSizes, BlockCounts, JumpCounts); Order.reserve(BF.getLayout().block_size()); for (uint64_t R : Result) Order.push_back(OrigOrder[R]); } void OptimizeReorderAlgorithm::reorderBasicBlocks( BinaryFunction &BF, BasicBlockOrder &Order) const { if (BF.getLayout().block_empty()) return; // Cluster basic blocks. CAlgo->clusterBasicBlocks(BF); if (opts::PrintClusters) CAlgo->printClusters(); // Arrange basic blocks according to clusters. for (ClusterAlgorithm::ClusterTy &Cluster : CAlgo->Clusters) Order.insert(Order.end(), Cluster.begin(), Cluster.end()); } void OptimizeBranchReorderAlgorithm::reorderBasicBlocks( BinaryFunction &BF, BasicBlockOrder &Order) const { if (BF.getLayout().block_empty()) return; // Cluster basic blocks. CAlgo->clusterBasicBlocks(BF, /* ComputeEdges = */ true); std::vector &Clusters = CAlgo->Clusters; std::vector> &ClusterEdges = CAlgo->ClusterEdges; // Compute clusters' average frequencies. CAlgo->computeClusterAverageFrequency(BF.getBinaryContext()); std::vector &AvgFreq = CAlgo->AvgFreq; if (opts::PrintClusters) CAlgo->printClusters(); // Cluster layout order std::vector ClusterOrder; // Do a topological sort for clusters, prioritizing frequently-executed BBs // during the traversal. std::stack Stack; std::vector Status; std::vector Parent; Status.resize(Clusters.size(), 0); Parent.resize(Clusters.size(), 0); constexpr uint32_t STACKED = 1; constexpr uint32_t VISITED = 2; Status[0] = STACKED; Stack.push(0); while (!Stack.empty()) { uint32_t I = Stack.top(); if (!(Status[I] & VISITED)) { Status[I] |= VISITED; // Order successors by weight auto ClusterComp = [&ClusterEdges, I](uint32_t A, uint32_t B) { return ClusterEdges[I][A] > ClusterEdges[I][B]; }; std::priority_queue, decltype(ClusterComp)> SuccQueue(ClusterComp); for (std::pair &Target : ClusterEdges[I]) { if (Target.second > 0 && !(Status[Target.first] & STACKED) && !Clusters[Target.first].empty()) { Parent[Target.first] = I; Status[Target.first] = STACKED; SuccQueue.push(Target.first); } } while (!SuccQueue.empty()) { Stack.push(SuccQueue.top()); SuccQueue.pop(); } continue; } // Already visited this node Stack.pop(); ClusterOrder.push_back(I); } std::reverse(ClusterOrder.begin(), ClusterOrder.end()); // Put unreachable clusters at the end for (uint32_t I = 0, E = Clusters.size(); I < E; ++I) if (!(Status[I] & VISITED) && !Clusters[I].empty()) ClusterOrder.push_back(I); // Sort nodes with equal precedence auto Beg = ClusterOrder.begin(); // Don't reorder the first cluster, which contains the function entry point ++Beg; std::stable_sort(Beg, ClusterOrder.end(), [&AvgFreq, &Parent](uint32_t A, uint32_t B) { uint32_t P = Parent[A]; while (Parent[P] != 0) { if (Parent[P] == B) return false; P = Parent[P]; } P = Parent[B]; while (Parent[P] != 0) { if (Parent[P] == A) return true; P = Parent[P]; } return AvgFreq[A] > AvgFreq[B]; }); if (opts::PrintClusters) { errs() << "New cluster order: "; const char *Sep = ""; for (uint32_t O : ClusterOrder) { errs() << Sep << O; Sep = ", "; } errs() << '\n'; } // Arrange basic blocks according to cluster order. for (uint32_t ClusterIndex : ClusterOrder) { ClusterAlgorithm::ClusterTy &Cluster = Clusters[ClusterIndex]; Order.insert(Order.end(), Cluster.begin(), Cluster.end()); } } void OptimizeCacheReorderAlgorithm::reorderBasicBlocks( BinaryFunction &BF, BasicBlockOrder &Order) const { if (BF.getLayout().block_empty()) return; const uint64_t ColdThreshold = opts::ColdThreshold * (*BF.getLayout().block_begin())->getExecutionCount() / 1000; // Cluster basic blocks. CAlgo->clusterBasicBlocks(BF); std::vector &Clusters = CAlgo->Clusters; // Compute clusters' average frequencies. CAlgo->computeClusterAverageFrequency(BF.getBinaryContext()); std::vector &AvgFreq = CAlgo->AvgFreq; if (opts::PrintClusters) CAlgo->printClusters(); // Cluster layout order std::vector ClusterOrder; // Order clusters based on average instruction execution frequency for (uint32_t I = 0, E = Clusters.size(); I < E; ++I) if (!Clusters[I].empty()) ClusterOrder.push_back(I); // Don't reorder the first cluster, which contains the function entry point std::stable_sort( std::next(ClusterOrder.begin()), ClusterOrder.end(), [&AvgFreq](uint32_t A, uint32_t B) { return AvgFreq[A] > AvgFreq[B]; }); if (opts::PrintClusters) { errs() << "New cluster order: "; const char *Sep = ""; for (uint32_t O : ClusterOrder) { errs() << Sep << O; Sep = ", "; } errs() << '\n'; } // Arrange basic blocks according to cluster order. for (uint32_t ClusterIndex : ClusterOrder) { ClusterAlgorithm::ClusterTy &Cluster = Clusters[ClusterIndex]; Order.insert(Order.end(), Cluster.begin(), Cluster.end()); // Force zero execution count on clusters that do not meet the cut off // specified by --cold-threshold. if (AvgFreq[ClusterIndex] < static_cast(ColdThreshold)) for (BinaryBasicBlock *BBPtr : Cluster) BBPtr->setExecutionCount(0); } } void ReverseReorderAlgorithm::reorderBasicBlocks(BinaryFunction &BF, BasicBlockOrder &Order) const { if (BF.getLayout().block_empty()) return; BinaryBasicBlock *FirstBB = *BF.getLayout().block_begin(); Order.push_back(FirstBB); for (auto RLI = BF.getLayout().block_rbegin(); *RLI != FirstBB; ++RLI) Order.push_back(*RLI); } void RandomClusterReorderAlgorithm::reorderBasicBlocks( BinaryFunction &BF, BasicBlockOrder &Order) const { if (BF.getLayout().block_empty()) return; // Cluster basic blocks. CAlgo->clusterBasicBlocks(BF); std::vector &Clusters = CAlgo->Clusters; if (opts::PrintClusters) CAlgo->printClusters(); // Cluster layout order std::vector ClusterOrder; // Order clusters based on average instruction execution frequency for (uint32_t I = 0, E = Clusters.size(); I < E; ++I) if (!Clusters[I].empty()) ClusterOrder.push_back(I); std::shuffle(std::next(ClusterOrder.begin()), ClusterOrder.end(), std::default_random_engine(opts::RandomSeed.getValue())); if (opts::PrintClusters) { errs() << "New cluster order: "; const char *Sep = ""; for (uint32_t O : ClusterOrder) { errs() << Sep << O; Sep = ", "; } errs() << '\n'; } // Arrange basic blocks according to cluster order. for (uint32_t ClusterIndex : ClusterOrder) { ClusterAlgorithm::ClusterTy &Cluster = Clusters[ClusterIndex]; Order.insert(Order.end(), Cluster.begin(), Cluster.end()); } }