10eae32dcSDimitry Andric //===- CodeLayout.cpp - Implementation of code layout algorithms ----------===// 20eae32dcSDimitry Andric // 30eae32dcSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40eae32dcSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50eae32dcSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60eae32dcSDimitry Andric // 70eae32dcSDimitry Andric //===----------------------------------------------------------------------===// 80eae32dcSDimitry Andric // 9*06c3fb27SDimitry Andric // The file implements "cache-aware" layout algorithms of basic blocks and 10*06c3fb27SDimitry Andric // functions in a binary. 110eae32dcSDimitry Andric // 120eae32dcSDimitry Andric // The algorithm tries to find a layout of nodes (basic blocks) of a given CFG 130eae32dcSDimitry Andric // optimizing jump locality and thus processor I-cache utilization. This is 140eae32dcSDimitry Andric // achieved via increasing the number of fall-through jumps and co-locating 150eae32dcSDimitry Andric // frequently executed nodes together. The name follows the underlying 160eae32dcSDimitry Andric // optimization problem, Extended-TSP, which is a generalization of classical 170eae32dcSDimitry Andric // (maximum) Traveling Salesmen Problem. 180eae32dcSDimitry Andric // 190eae32dcSDimitry Andric // The algorithm is a greedy heuristic that works with chains (ordered lists) 200eae32dcSDimitry Andric // of basic blocks. Initially all chains are isolated basic blocks. On every 210eae32dcSDimitry Andric // iteration, we pick a pair of chains whose merging yields the biggest increase 220eae32dcSDimitry Andric // in the ExtTSP score, which models how i-cache "friendly" a specific chain is. 230eae32dcSDimitry Andric // A pair of chains giving the maximum gain is merged into a new chain. The 240eae32dcSDimitry Andric // procedure stops when there is only one chain left, or when merging does not 250eae32dcSDimitry Andric // increase ExtTSP. In the latter case, the remaining chains are sorted by 260eae32dcSDimitry Andric // density in the decreasing order. 270eae32dcSDimitry Andric // 280eae32dcSDimitry Andric // An important aspect is the way two chains are merged. Unlike earlier 290eae32dcSDimitry Andric // algorithms (e.g., based on the approach of Pettis-Hansen), two 300eae32dcSDimitry Andric // chains, X and Y, are first split into three, X1, X2, and Y. Then we 310eae32dcSDimitry Andric // consider all possible ways of gluing the three chains (e.g., X1YX2, X1X2Y, 320eae32dcSDimitry Andric // X2X1Y, X2YX1, YX1X2, YX2X1) and choose the one producing the largest score. 330eae32dcSDimitry Andric // This improves the quality of the final result (the search space is larger) 340eae32dcSDimitry Andric // while keeping the implementation sufficiently fast. 350eae32dcSDimitry Andric // 360eae32dcSDimitry Andric // Reference: 370eae32dcSDimitry Andric // * A. Newell and S. Pupyrev, Improved Basic Block Reordering, 380eae32dcSDimitry Andric // IEEE Transactions on Computers, 2020 39bdd1243dSDimitry Andric // https://arxiv.org/abs/1809.04676 400eae32dcSDimitry Andric // 410eae32dcSDimitry Andric //===----------------------------------------------------------------------===// 420eae32dcSDimitry Andric 430eae32dcSDimitry Andric #include "llvm/Transforms/Utils/CodeLayout.h" 440eae32dcSDimitry Andric #include "llvm/Support/CommandLine.h" 45*06c3fb27SDimitry Andric #include "llvm/Support/Debug.h" 460eae32dcSDimitry Andric 47bdd1243dSDimitry Andric #include <cmath> 48bdd1243dSDimitry Andric 490eae32dcSDimitry Andric using namespace llvm; 500eae32dcSDimitry Andric #define DEBUG_TYPE "code-layout" 510eae32dcSDimitry Andric 52*06c3fb27SDimitry Andric namespace llvm { 5381ad6265SDimitry Andric cl::opt<bool> EnableExtTspBlockPlacement( 5481ad6265SDimitry Andric "enable-ext-tsp-block-placement", cl::Hidden, cl::init(false), 5581ad6265SDimitry Andric cl::desc("Enable machine block placement based on the ext-tsp model, " 5681ad6265SDimitry Andric "optimizing I-cache utilization.")); 5781ad6265SDimitry Andric 5881ad6265SDimitry Andric cl::opt<bool> ApplyExtTspWithoutProfile( 5981ad6265SDimitry Andric "ext-tsp-apply-without-profile", 6081ad6265SDimitry Andric cl::desc("Whether to apply ext-tsp placement for instances w/o profile"), 6181ad6265SDimitry Andric cl::init(true), cl::Hidden); 62*06c3fb27SDimitry Andric } // namespace llvm 6381ad6265SDimitry Andric 64bdd1243dSDimitry Andric // Algorithm-specific params. The values are tuned for the best performance 650eae32dcSDimitry Andric // of large-scale front-end bound binaries. 66bdd1243dSDimitry Andric static cl::opt<double> ForwardWeightCond( 67bdd1243dSDimitry Andric "ext-tsp-forward-weight-cond", cl::ReallyHidden, cl::init(0.1), 68bdd1243dSDimitry Andric cl::desc("The weight of conditional forward jumps for ExtTSP value")); 690eae32dcSDimitry Andric 70bdd1243dSDimitry Andric static cl::opt<double> ForwardWeightUncond( 71bdd1243dSDimitry Andric "ext-tsp-forward-weight-uncond", cl::ReallyHidden, cl::init(0.1), 72bdd1243dSDimitry Andric cl::desc("The weight of unconditional forward jumps for ExtTSP value")); 73bdd1243dSDimitry Andric 74bdd1243dSDimitry Andric static cl::opt<double> BackwardWeightCond( 75bdd1243dSDimitry Andric "ext-tsp-backward-weight-cond", cl::ReallyHidden, cl::init(0.1), 76*06c3fb27SDimitry Andric cl::desc("The weight of conditional backward jumps for ExtTSP value")); 77bdd1243dSDimitry Andric 78bdd1243dSDimitry Andric static cl::opt<double> BackwardWeightUncond( 79bdd1243dSDimitry Andric "ext-tsp-backward-weight-uncond", cl::ReallyHidden, cl::init(0.1), 80*06c3fb27SDimitry Andric cl::desc("The weight of unconditional backward jumps for ExtTSP value")); 81bdd1243dSDimitry Andric 82bdd1243dSDimitry Andric static cl::opt<double> FallthroughWeightCond( 83bdd1243dSDimitry Andric "ext-tsp-fallthrough-weight-cond", cl::ReallyHidden, cl::init(1.0), 84bdd1243dSDimitry Andric cl::desc("The weight of conditional fallthrough jumps for ExtTSP value")); 85bdd1243dSDimitry Andric 86bdd1243dSDimitry Andric static cl::opt<double> FallthroughWeightUncond( 87bdd1243dSDimitry Andric "ext-tsp-fallthrough-weight-uncond", cl::ReallyHidden, cl::init(1.05), 88bdd1243dSDimitry Andric cl::desc("The weight of unconditional fallthrough jumps for ExtTSP value")); 890eae32dcSDimitry Andric 900eae32dcSDimitry Andric static cl::opt<unsigned> ForwardDistance( 91bdd1243dSDimitry Andric "ext-tsp-forward-distance", cl::ReallyHidden, cl::init(1024), 920eae32dcSDimitry Andric cl::desc("The maximum distance (in bytes) of a forward jump for ExtTSP")); 930eae32dcSDimitry Andric 940eae32dcSDimitry Andric static cl::opt<unsigned> BackwardDistance( 95bdd1243dSDimitry Andric "ext-tsp-backward-distance", cl::ReallyHidden, cl::init(640), 960eae32dcSDimitry Andric cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP")); 970eae32dcSDimitry Andric 9881ad6265SDimitry Andric // The maximum size of a chain created by the algorithm. The size is bounded 9981ad6265SDimitry Andric // so that the algorithm can efficiently process extremely large instance. 10081ad6265SDimitry Andric static cl::opt<unsigned> 101bdd1243dSDimitry Andric MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(4096), 10281ad6265SDimitry Andric cl::desc("The maximum size of a chain to create.")); 10381ad6265SDimitry Andric 1040eae32dcSDimitry Andric // The maximum size of a chain for splitting. Larger values of the threshold 1050eae32dcSDimitry Andric // may yield better quality at the cost of worsen run-time. 1060eae32dcSDimitry Andric static cl::opt<unsigned> ChainSplitThreshold( 107bdd1243dSDimitry Andric "ext-tsp-chain-split-threshold", cl::ReallyHidden, cl::init(128), 1080eae32dcSDimitry Andric cl::desc("The maximum size of a chain to apply splitting")); 1090eae32dcSDimitry Andric 1100eae32dcSDimitry Andric // The option enables splitting (large) chains along in-coming and out-going 1110eae32dcSDimitry Andric // jumps. This typically results in a better quality. 1120eae32dcSDimitry Andric static cl::opt<bool> EnableChainSplitAlongJumps( 113bdd1243dSDimitry Andric "ext-tsp-enable-chain-split-along-jumps", cl::ReallyHidden, cl::init(true), 1140eae32dcSDimitry Andric cl::desc("The maximum size of a chain to apply splitting")); 1150eae32dcSDimitry Andric 1160eae32dcSDimitry Andric namespace { 1170eae32dcSDimitry Andric 1180eae32dcSDimitry Andric // Epsilon for comparison of doubles. 1190eae32dcSDimitry Andric constexpr double EPS = 1e-8; 1200eae32dcSDimitry Andric 121bdd1243dSDimitry Andric // Compute the Ext-TSP score for a given jump. 122bdd1243dSDimitry Andric double jumpExtTSPScore(uint64_t JumpDist, uint64_t JumpMaxDist, uint64_t Count, 123bdd1243dSDimitry Andric double Weight) { 124bdd1243dSDimitry Andric if (JumpDist > JumpMaxDist) 125bdd1243dSDimitry Andric return 0; 126bdd1243dSDimitry Andric double Prob = 1.0 - static_cast<double>(JumpDist) / JumpMaxDist; 127bdd1243dSDimitry Andric return Weight * Prob * Count; 128bdd1243dSDimitry Andric } 129bdd1243dSDimitry Andric 1300eae32dcSDimitry Andric // Compute the Ext-TSP score for a jump between a given pair of blocks, 1310eae32dcSDimitry Andric // using their sizes, (estimated) addresses and the jump execution count. 1320eae32dcSDimitry Andric double extTSPScore(uint64_t SrcAddr, uint64_t SrcSize, uint64_t DstAddr, 133bdd1243dSDimitry Andric uint64_t Count, bool IsConditional) { 1340eae32dcSDimitry Andric // Fallthrough 1350eae32dcSDimitry Andric if (SrcAddr + SrcSize == DstAddr) { 136bdd1243dSDimitry Andric return jumpExtTSPScore(0, 1, Count, 137bdd1243dSDimitry Andric IsConditional ? FallthroughWeightCond 138bdd1243dSDimitry Andric : FallthroughWeightUncond); 1390eae32dcSDimitry Andric } 1400eae32dcSDimitry Andric // Forward 1410eae32dcSDimitry Andric if (SrcAddr + SrcSize < DstAddr) { 142bdd1243dSDimitry Andric const uint64_t Dist = DstAddr - (SrcAddr + SrcSize); 143bdd1243dSDimitry Andric return jumpExtTSPScore(Dist, ForwardDistance, Count, 144bdd1243dSDimitry Andric IsConditional ? ForwardWeightCond 145bdd1243dSDimitry Andric : ForwardWeightUncond); 1460eae32dcSDimitry Andric } 1470eae32dcSDimitry Andric // Backward 148bdd1243dSDimitry Andric const uint64_t Dist = SrcAddr + SrcSize - DstAddr; 149bdd1243dSDimitry Andric return jumpExtTSPScore(Dist, BackwardDistance, Count, 150bdd1243dSDimitry Andric IsConditional ? BackwardWeightCond 151bdd1243dSDimitry Andric : BackwardWeightUncond); 1520eae32dcSDimitry Andric } 1530eae32dcSDimitry Andric 1540eae32dcSDimitry Andric /// A type of merging two chains, X and Y. The former chain is split into 1550eae32dcSDimitry Andric /// X1 and X2 and then concatenated with Y in the order specified by the type. 156*06c3fb27SDimitry Andric enum class MergeTypeT : int { X_Y, Y_X, X1_Y_X2, Y_X2_X1, X2_X1_Y }; 1570eae32dcSDimitry Andric 1580eae32dcSDimitry Andric /// The gain of merging two chains, that is, the Ext-TSP score of the merge 159*06c3fb27SDimitry Andric /// together with the corresponding merge 'type' and 'offset'. 160*06c3fb27SDimitry Andric struct MergeGainT { 161*06c3fb27SDimitry Andric explicit MergeGainT() = default; 162*06c3fb27SDimitry Andric explicit MergeGainT(double Score, size_t MergeOffset, MergeTypeT MergeType) 1630eae32dcSDimitry Andric : Score(Score), MergeOffset(MergeOffset), MergeType(MergeType) {} 1640eae32dcSDimitry Andric 1650eae32dcSDimitry Andric double score() const { return Score; } 1660eae32dcSDimitry Andric 1670eae32dcSDimitry Andric size_t mergeOffset() const { return MergeOffset; } 1680eae32dcSDimitry Andric 169*06c3fb27SDimitry Andric MergeTypeT mergeType() const { return MergeType; } 170*06c3fb27SDimitry Andric 171*06c3fb27SDimitry Andric void setMergeType(MergeTypeT Ty) { MergeType = Ty; } 1720eae32dcSDimitry Andric 1730eae32dcSDimitry Andric // Returns 'true' iff Other is preferred over this. 174*06c3fb27SDimitry Andric bool operator<(const MergeGainT &Other) const { 1750eae32dcSDimitry Andric return (Other.Score > EPS && Other.Score > Score + EPS); 1760eae32dcSDimitry Andric } 1770eae32dcSDimitry Andric 1780eae32dcSDimitry Andric // Update the current gain if Other is preferred over this. 179*06c3fb27SDimitry Andric void updateIfLessThan(const MergeGainT &Other) { 1800eae32dcSDimitry Andric if (*this < Other) 1810eae32dcSDimitry Andric *this = Other; 1820eae32dcSDimitry Andric } 1830eae32dcSDimitry Andric 1840eae32dcSDimitry Andric private: 1850eae32dcSDimitry Andric double Score{-1.0}; 1860eae32dcSDimitry Andric size_t MergeOffset{0}; 187*06c3fb27SDimitry Andric MergeTypeT MergeType{MergeTypeT::X_Y}; 1880eae32dcSDimitry Andric }; 1890eae32dcSDimitry Andric 190*06c3fb27SDimitry Andric struct JumpT; 191*06c3fb27SDimitry Andric struct ChainT; 192*06c3fb27SDimitry Andric struct ChainEdge; 1930eae32dcSDimitry Andric 194*06c3fb27SDimitry Andric /// A node in the graph, typically corresponding to a basic block in the CFG or 195*06c3fb27SDimitry Andric /// a function in the call graph. 196*06c3fb27SDimitry Andric struct NodeT { 197*06c3fb27SDimitry Andric NodeT(const NodeT &) = delete; 198*06c3fb27SDimitry Andric NodeT(NodeT &&) = default; 199*06c3fb27SDimitry Andric NodeT &operator=(const NodeT &) = delete; 200*06c3fb27SDimitry Andric NodeT &operator=(NodeT &&) = default; 2010eae32dcSDimitry Andric 202*06c3fb27SDimitry Andric explicit NodeT(size_t Index, uint64_t Size, uint64_t EC) 203bdd1243dSDimitry Andric : Index(Index), Size(Size), ExecutionCount(EC) {} 204*06c3fb27SDimitry Andric 2050eae32dcSDimitry Andric bool isEntry() const { return Index == 0; } 206*06c3fb27SDimitry Andric 207*06c3fb27SDimitry Andric // The total execution count of outgoing jumps. 208*06c3fb27SDimitry Andric uint64_t outCount() const; 209*06c3fb27SDimitry Andric 210*06c3fb27SDimitry Andric // The total execution count of incoming jumps. 211*06c3fb27SDimitry Andric uint64_t inCount() const; 212*06c3fb27SDimitry Andric 213*06c3fb27SDimitry Andric // The original index of the node in graph. 214*06c3fb27SDimitry Andric size_t Index{0}; 215*06c3fb27SDimitry Andric // The index of the node in the current chain. 216*06c3fb27SDimitry Andric size_t CurIndex{0}; 217*06c3fb27SDimitry Andric // The size of the node in the binary. 218*06c3fb27SDimitry Andric uint64_t Size{0}; 219*06c3fb27SDimitry Andric // The execution count of the node in the profile data. 220*06c3fb27SDimitry Andric uint64_t ExecutionCount{0}; 221*06c3fb27SDimitry Andric // The current chain of the node. 222*06c3fb27SDimitry Andric ChainT *CurChain{nullptr}; 223*06c3fb27SDimitry Andric // The offset of the node in the current chain. 224*06c3fb27SDimitry Andric mutable uint64_t EstimatedAddr{0}; 225*06c3fb27SDimitry Andric // Forced successor of the node in the graph. 226*06c3fb27SDimitry Andric NodeT *ForcedSucc{nullptr}; 227*06c3fb27SDimitry Andric // Forced predecessor of the node in the graph. 228*06c3fb27SDimitry Andric NodeT *ForcedPred{nullptr}; 229*06c3fb27SDimitry Andric // Outgoing jumps from the node. 230*06c3fb27SDimitry Andric std::vector<JumpT *> OutJumps; 231*06c3fb27SDimitry Andric // Incoming jumps to the node. 232*06c3fb27SDimitry Andric std::vector<JumpT *> InJumps; 2330eae32dcSDimitry Andric }; 2340eae32dcSDimitry Andric 235*06c3fb27SDimitry Andric /// An arc in the graph, typically corresponding to a jump between two nodes. 236*06c3fb27SDimitry Andric struct JumpT { 237*06c3fb27SDimitry Andric JumpT(const JumpT &) = delete; 238*06c3fb27SDimitry Andric JumpT(JumpT &&) = default; 239*06c3fb27SDimitry Andric JumpT &operator=(const JumpT &) = delete; 240*06c3fb27SDimitry Andric JumpT &operator=(JumpT &&) = default; 2410eae32dcSDimitry Andric 242*06c3fb27SDimitry Andric explicit JumpT(NodeT *Source, NodeT *Target, uint64_t ExecutionCount) 243*06c3fb27SDimitry Andric : Source(Source), Target(Target), ExecutionCount(ExecutionCount) {} 244*06c3fb27SDimitry Andric 245*06c3fb27SDimitry Andric // Source node of the jump. 246*06c3fb27SDimitry Andric NodeT *Source; 247*06c3fb27SDimitry Andric // Target node of the jump. 248*06c3fb27SDimitry Andric NodeT *Target; 2490eae32dcSDimitry Andric // Execution count of the arc in the profile data. 2500eae32dcSDimitry Andric uint64_t ExecutionCount{0}; 251bdd1243dSDimitry Andric // Whether the jump corresponds to a conditional branch. 252bdd1243dSDimitry Andric bool IsConditional{false}; 253*06c3fb27SDimitry Andric // The offset of the jump from the source node. 254*06c3fb27SDimitry Andric uint64_t Offset{0}; 2550eae32dcSDimitry Andric }; 2560eae32dcSDimitry Andric 257*06c3fb27SDimitry Andric /// A chain (ordered sequence) of nodes in the graph. 258*06c3fb27SDimitry Andric struct ChainT { 259*06c3fb27SDimitry Andric ChainT(const ChainT &) = delete; 260*06c3fb27SDimitry Andric ChainT(ChainT &&) = default; 261*06c3fb27SDimitry Andric ChainT &operator=(const ChainT &) = delete; 262*06c3fb27SDimitry Andric ChainT &operator=(ChainT &&) = default; 2630eae32dcSDimitry Andric 264*06c3fb27SDimitry Andric explicit ChainT(uint64_t Id, NodeT *Node) 265*06c3fb27SDimitry Andric : Id(Id), ExecutionCount(Node->ExecutionCount), Size(Node->Size), 266*06c3fb27SDimitry Andric Nodes(1, Node) {} 2670eae32dcSDimitry Andric 268*06c3fb27SDimitry Andric size_t numBlocks() const { return Nodes.size(); } 2690eae32dcSDimitry Andric 270*06c3fb27SDimitry Andric double density() const { return static_cast<double>(ExecutionCount) / Size; } 271*06c3fb27SDimitry Andric 272*06c3fb27SDimitry Andric bool isEntry() const { return Nodes[0]->Index == 0; } 2730eae32dcSDimitry Andric 274bdd1243dSDimitry Andric bool isCold() const { 275*06c3fb27SDimitry Andric for (NodeT *Node : Nodes) { 276*06c3fb27SDimitry Andric if (Node->ExecutionCount > 0) 277bdd1243dSDimitry Andric return false; 278bdd1243dSDimitry Andric } 279bdd1243dSDimitry Andric return true; 280bdd1243dSDimitry Andric } 281bdd1243dSDimitry Andric 282*06c3fb27SDimitry Andric ChainEdge *getEdge(ChainT *Other) const { 2830eae32dcSDimitry Andric for (auto It : Edges) { 2840eae32dcSDimitry Andric if (It.first == Other) 2850eae32dcSDimitry Andric return It.second; 2860eae32dcSDimitry Andric } 2870eae32dcSDimitry Andric return nullptr; 2880eae32dcSDimitry Andric } 2890eae32dcSDimitry Andric 290*06c3fb27SDimitry Andric void removeEdge(ChainT *Other) { 2910eae32dcSDimitry Andric auto It = Edges.begin(); 2920eae32dcSDimitry Andric while (It != Edges.end()) { 2930eae32dcSDimitry Andric if (It->first == Other) { 2940eae32dcSDimitry Andric Edges.erase(It); 2950eae32dcSDimitry Andric return; 2960eae32dcSDimitry Andric } 2970eae32dcSDimitry Andric It++; 2980eae32dcSDimitry Andric } 2990eae32dcSDimitry Andric } 3000eae32dcSDimitry Andric 301*06c3fb27SDimitry Andric void addEdge(ChainT *Other, ChainEdge *Edge) { 3020eae32dcSDimitry Andric Edges.push_back(std::make_pair(Other, Edge)); 3030eae32dcSDimitry Andric } 3040eae32dcSDimitry Andric 305*06c3fb27SDimitry Andric void merge(ChainT *Other, const std::vector<NodeT *> &MergedBlocks) { 306*06c3fb27SDimitry Andric Nodes = MergedBlocks; 307*06c3fb27SDimitry Andric // Update the chain's data 308*06c3fb27SDimitry Andric ExecutionCount += Other->ExecutionCount; 309*06c3fb27SDimitry Andric Size += Other->Size; 310*06c3fb27SDimitry Andric Id = Nodes[0]->Index; 311*06c3fb27SDimitry Andric // Update the node's data 312*06c3fb27SDimitry Andric for (size_t Idx = 0; Idx < Nodes.size(); Idx++) { 313*06c3fb27SDimitry Andric Nodes[Idx]->CurChain = this; 314*06c3fb27SDimitry Andric Nodes[Idx]->CurIndex = Idx; 3150eae32dcSDimitry Andric } 3160eae32dcSDimitry Andric } 3170eae32dcSDimitry Andric 318*06c3fb27SDimitry Andric void mergeEdges(ChainT *Other); 3190eae32dcSDimitry Andric 3200eae32dcSDimitry Andric void clear() { 321*06c3fb27SDimitry Andric Nodes.clear(); 322*06c3fb27SDimitry Andric Nodes.shrink_to_fit(); 3230eae32dcSDimitry Andric Edges.clear(); 3240eae32dcSDimitry Andric Edges.shrink_to_fit(); 3250eae32dcSDimitry Andric } 3260eae32dcSDimitry Andric 3270eae32dcSDimitry Andric // Unique chain identifier. 3280eae32dcSDimitry Andric uint64_t Id; 3290eae32dcSDimitry Andric // Cached ext-tsp score for the chain. 330*06c3fb27SDimitry Andric double Score{0}; 331*06c3fb27SDimitry Andric // The total execution count of the chain. 332*06c3fb27SDimitry Andric uint64_t ExecutionCount{0}; 333*06c3fb27SDimitry Andric // The total size of the chain. 334*06c3fb27SDimitry Andric uint64_t Size{0}; 335*06c3fb27SDimitry Andric // Nodes of the chain. 336*06c3fb27SDimitry Andric std::vector<NodeT *> Nodes; 3370eae32dcSDimitry Andric // Adjacent chains and corresponding edges (lists of jumps). 338*06c3fb27SDimitry Andric std::vector<std::pair<ChainT *, ChainEdge *>> Edges; 3390eae32dcSDimitry Andric }; 3400eae32dcSDimitry Andric 341*06c3fb27SDimitry Andric /// An edge in the graph representing jumps between two chains. 342*06c3fb27SDimitry Andric /// When nodes are merged into chains, the edges are combined too so that 3430eae32dcSDimitry Andric /// there is always at most one edge between a pair of chains 344*06c3fb27SDimitry Andric struct ChainEdge { 3450eae32dcSDimitry Andric ChainEdge(const ChainEdge &) = delete; 3460eae32dcSDimitry Andric ChainEdge(ChainEdge &&) = default; 3470eae32dcSDimitry Andric ChainEdge &operator=(const ChainEdge &) = delete; 348*06c3fb27SDimitry Andric ChainEdge &operator=(ChainEdge &&) = delete; 3490eae32dcSDimitry Andric 350*06c3fb27SDimitry Andric explicit ChainEdge(JumpT *Jump) 3510eae32dcSDimitry Andric : SrcChain(Jump->Source->CurChain), DstChain(Jump->Target->CurChain), 3520eae32dcSDimitry Andric Jumps(1, Jump) {} 3530eae32dcSDimitry Andric 354*06c3fb27SDimitry Andric ChainT *srcChain() const { return SrcChain; } 3550eae32dcSDimitry Andric 356*06c3fb27SDimitry Andric ChainT *dstChain() const { return DstChain; } 3570eae32dcSDimitry Andric 358*06c3fb27SDimitry Andric bool isSelfEdge() const { return SrcChain == DstChain; } 359*06c3fb27SDimitry Andric 360*06c3fb27SDimitry Andric const std::vector<JumpT *> &jumps() const { return Jumps; } 361*06c3fb27SDimitry Andric 362*06c3fb27SDimitry Andric void appendJump(JumpT *Jump) { Jumps.push_back(Jump); } 3630eae32dcSDimitry Andric 3640eae32dcSDimitry Andric void moveJumps(ChainEdge *Other) { 3650eae32dcSDimitry Andric Jumps.insert(Jumps.end(), Other->Jumps.begin(), Other->Jumps.end()); 3660eae32dcSDimitry Andric Other->Jumps.clear(); 3670eae32dcSDimitry Andric Other->Jumps.shrink_to_fit(); 3680eae32dcSDimitry Andric } 3690eae32dcSDimitry Andric 370*06c3fb27SDimitry Andric void changeEndpoint(ChainT *From, ChainT *To) { 371*06c3fb27SDimitry Andric if (From == SrcChain) 372*06c3fb27SDimitry Andric SrcChain = To; 373*06c3fb27SDimitry Andric if (From == DstChain) 374*06c3fb27SDimitry Andric DstChain = To; 375*06c3fb27SDimitry Andric } 376*06c3fb27SDimitry Andric 377*06c3fb27SDimitry Andric bool hasCachedMergeGain(ChainT *Src, ChainT *Dst) const { 3780eae32dcSDimitry Andric return Src == SrcChain ? CacheValidForward : CacheValidBackward; 3790eae32dcSDimitry Andric } 3800eae32dcSDimitry Andric 381*06c3fb27SDimitry Andric MergeGainT getCachedMergeGain(ChainT *Src, ChainT *Dst) const { 3820eae32dcSDimitry Andric return Src == SrcChain ? CachedGainForward : CachedGainBackward; 3830eae32dcSDimitry Andric } 3840eae32dcSDimitry Andric 385*06c3fb27SDimitry Andric void setCachedMergeGain(ChainT *Src, ChainT *Dst, MergeGainT MergeGain) { 3860eae32dcSDimitry Andric if (Src == SrcChain) { 3870eae32dcSDimitry Andric CachedGainForward = MergeGain; 3880eae32dcSDimitry Andric CacheValidForward = true; 3890eae32dcSDimitry Andric } else { 3900eae32dcSDimitry Andric CachedGainBackward = MergeGain; 3910eae32dcSDimitry Andric CacheValidBackward = true; 3920eae32dcSDimitry Andric } 3930eae32dcSDimitry Andric } 3940eae32dcSDimitry Andric 3950eae32dcSDimitry Andric void invalidateCache() { 3960eae32dcSDimitry Andric CacheValidForward = false; 3970eae32dcSDimitry Andric CacheValidBackward = false; 3980eae32dcSDimitry Andric } 3990eae32dcSDimitry Andric 400*06c3fb27SDimitry Andric void setMergeGain(MergeGainT Gain) { CachedGain = Gain; } 401*06c3fb27SDimitry Andric 402*06c3fb27SDimitry Andric MergeGainT getMergeGain() const { return CachedGain; } 403*06c3fb27SDimitry Andric 404*06c3fb27SDimitry Andric double gain() const { return CachedGain.score(); } 405*06c3fb27SDimitry Andric 4060eae32dcSDimitry Andric private: 4070eae32dcSDimitry Andric // Source chain. 408*06c3fb27SDimitry Andric ChainT *SrcChain{nullptr}; 4090eae32dcSDimitry Andric // Destination chain. 410*06c3fb27SDimitry Andric ChainT *DstChain{nullptr}; 411*06c3fb27SDimitry Andric // Original jumps in the binary with corresponding execution counts. 412*06c3fb27SDimitry Andric std::vector<JumpT *> Jumps; 413*06c3fb27SDimitry Andric // Cached gain value for merging the pair of chains. 414*06c3fb27SDimitry Andric MergeGainT CachedGain; 415*06c3fb27SDimitry Andric 416*06c3fb27SDimitry Andric // Cached gain values for merging the pair of chains. Since the gain of 417*06c3fb27SDimitry Andric // merging (Src, Dst) and (Dst, Src) might be different, we store both values 418*06c3fb27SDimitry Andric // here and a flag indicating which of the options results in a higher gain. 419*06c3fb27SDimitry Andric // Cached gain values. 420*06c3fb27SDimitry Andric MergeGainT CachedGainForward; 421*06c3fb27SDimitry Andric MergeGainT CachedGainBackward; 4220eae32dcSDimitry Andric // Whether the cached value must be recomputed. 4230eae32dcSDimitry Andric bool CacheValidForward{false}; 4240eae32dcSDimitry Andric bool CacheValidBackward{false}; 4250eae32dcSDimitry Andric }; 4260eae32dcSDimitry Andric 427*06c3fb27SDimitry Andric uint64_t NodeT::outCount() const { 428*06c3fb27SDimitry Andric uint64_t Count = 0; 429*06c3fb27SDimitry Andric for (JumpT *Jump : OutJumps) { 430*06c3fb27SDimitry Andric Count += Jump->ExecutionCount; 431*06c3fb27SDimitry Andric } 432*06c3fb27SDimitry Andric return Count; 433*06c3fb27SDimitry Andric } 4340eae32dcSDimitry Andric 435*06c3fb27SDimitry Andric uint64_t NodeT::inCount() const { 436*06c3fb27SDimitry Andric uint64_t Count = 0; 437*06c3fb27SDimitry Andric for (JumpT *Jump : InJumps) { 438*06c3fb27SDimitry Andric Count += Jump->ExecutionCount; 439*06c3fb27SDimitry Andric } 440*06c3fb27SDimitry Andric return Count; 441*06c3fb27SDimitry Andric } 442*06c3fb27SDimitry Andric 443*06c3fb27SDimitry Andric void ChainT::mergeEdges(ChainT *Other) { 4440eae32dcSDimitry Andric // Update edges adjacent to chain Other 4450eae32dcSDimitry Andric for (auto EdgeIt : Other->Edges) { 446*06c3fb27SDimitry Andric ChainT *DstChain = EdgeIt.first; 447bdd1243dSDimitry Andric ChainEdge *DstEdge = EdgeIt.second; 448*06c3fb27SDimitry Andric ChainT *TargetChain = DstChain == Other ? this : DstChain; 449bdd1243dSDimitry Andric ChainEdge *CurEdge = getEdge(TargetChain); 4500eae32dcSDimitry Andric if (CurEdge == nullptr) { 4510eae32dcSDimitry Andric DstEdge->changeEndpoint(Other, this); 4520eae32dcSDimitry Andric this->addEdge(TargetChain, DstEdge); 4530eae32dcSDimitry Andric if (DstChain != this && DstChain != Other) { 4540eae32dcSDimitry Andric DstChain->addEdge(this, DstEdge); 4550eae32dcSDimitry Andric } 4560eae32dcSDimitry Andric } else { 4570eae32dcSDimitry Andric CurEdge->moveJumps(DstEdge); 4580eae32dcSDimitry Andric } 4590eae32dcSDimitry Andric // Cleanup leftover edge 4600eae32dcSDimitry Andric if (DstChain != Other) { 4610eae32dcSDimitry Andric DstChain->removeEdge(Other); 4620eae32dcSDimitry Andric } 4630eae32dcSDimitry Andric } 4640eae32dcSDimitry Andric } 4650eae32dcSDimitry Andric 466*06c3fb27SDimitry Andric using NodeIter = std::vector<NodeT *>::const_iterator; 4670eae32dcSDimitry Andric 468*06c3fb27SDimitry Andric /// A wrapper around three chains of nodes; it is used to avoid extra 4690eae32dcSDimitry Andric /// instantiation of the vectors. 470*06c3fb27SDimitry Andric struct MergedChain { 471*06c3fb27SDimitry Andric MergedChain(NodeIter Begin1, NodeIter End1, NodeIter Begin2 = NodeIter(), 472*06c3fb27SDimitry Andric NodeIter End2 = NodeIter(), NodeIter Begin3 = NodeIter(), 473*06c3fb27SDimitry Andric NodeIter End3 = NodeIter()) 4740eae32dcSDimitry Andric : Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3), 4750eae32dcSDimitry Andric End3(End3) {} 4760eae32dcSDimitry Andric 4770eae32dcSDimitry Andric template <typename F> void forEach(const F &Func) const { 4780eae32dcSDimitry Andric for (auto It = Begin1; It != End1; It++) 4790eae32dcSDimitry Andric Func(*It); 4800eae32dcSDimitry Andric for (auto It = Begin2; It != End2; It++) 4810eae32dcSDimitry Andric Func(*It); 4820eae32dcSDimitry Andric for (auto It = Begin3; It != End3; It++) 4830eae32dcSDimitry Andric Func(*It); 4840eae32dcSDimitry Andric } 4850eae32dcSDimitry Andric 486*06c3fb27SDimitry Andric std::vector<NodeT *> getNodes() const { 487*06c3fb27SDimitry Andric std::vector<NodeT *> Result; 4880eae32dcSDimitry Andric Result.reserve(std::distance(Begin1, End1) + std::distance(Begin2, End2) + 4890eae32dcSDimitry Andric std::distance(Begin3, End3)); 4900eae32dcSDimitry Andric Result.insert(Result.end(), Begin1, End1); 4910eae32dcSDimitry Andric Result.insert(Result.end(), Begin2, End2); 4920eae32dcSDimitry Andric Result.insert(Result.end(), Begin3, End3); 4930eae32dcSDimitry Andric return Result; 4940eae32dcSDimitry Andric } 4950eae32dcSDimitry Andric 496*06c3fb27SDimitry Andric const NodeT *getFirstNode() const { return *Begin1; } 4970eae32dcSDimitry Andric 4980eae32dcSDimitry Andric private: 499*06c3fb27SDimitry Andric NodeIter Begin1; 500*06c3fb27SDimitry Andric NodeIter End1; 501*06c3fb27SDimitry Andric NodeIter Begin2; 502*06c3fb27SDimitry Andric NodeIter End2; 503*06c3fb27SDimitry Andric NodeIter Begin3; 504*06c3fb27SDimitry Andric NodeIter End3; 5050eae32dcSDimitry Andric }; 5060eae32dcSDimitry Andric 507*06c3fb27SDimitry Andric /// Merge two chains of nodes respecting a given 'type' and 'offset'. 508*06c3fb27SDimitry Andric /// 509*06c3fb27SDimitry Andric /// If MergeType == 0, then the result is a concatenation of two chains. 510*06c3fb27SDimitry Andric /// Otherwise, the first chain is cut into two sub-chains at the offset, 511*06c3fb27SDimitry Andric /// and merged using all possible ways of concatenating three chains. 512*06c3fb27SDimitry Andric MergedChain mergeNodes(const std::vector<NodeT *> &X, 513*06c3fb27SDimitry Andric const std::vector<NodeT *> &Y, size_t MergeOffset, 514*06c3fb27SDimitry Andric MergeTypeT MergeType) { 515*06c3fb27SDimitry Andric // Split the first chain, X, into X1 and X2 516*06c3fb27SDimitry Andric NodeIter BeginX1 = X.begin(); 517*06c3fb27SDimitry Andric NodeIter EndX1 = X.begin() + MergeOffset; 518*06c3fb27SDimitry Andric NodeIter BeginX2 = X.begin() + MergeOffset; 519*06c3fb27SDimitry Andric NodeIter EndX2 = X.end(); 520*06c3fb27SDimitry Andric NodeIter BeginY = Y.begin(); 521*06c3fb27SDimitry Andric NodeIter EndY = Y.end(); 522*06c3fb27SDimitry Andric 523*06c3fb27SDimitry Andric // Construct a new chain from the three existing ones 524*06c3fb27SDimitry Andric switch (MergeType) { 525*06c3fb27SDimitry Andric case MergeTypeT::X_Y: 526*06c3fb27SDimitry Andric return MergedChain(BeginX1, EndX2, BeginY, EndY); 527*06c3fb27SDimitry Andric case MergeTypeT::Y_X: 528*06c3fb27SDimitry Andric return MergedChain(BeginY, EndY, BeginX1, EndX2); 529*06c3fb27SDimitry Andric case MergeTypeT::X1_Y_X2: 530*06c3fb27SDimitry Andric return MergedChain(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2); 531*06c3fb27SDimitry Andric case MergeTypeT::Y_X2_X1: 532*06c3fb27SDimitry Andric return MergedChain(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1); 533*06c3fb27SDimitry Andric case MergeTypeT::X2_X1_Y: 534*06c3fb27SDimitry Andric return MergedChain(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY); 535*06c3fb27SDimitry Andric } 536*06c3fb27SDimitry Andric llvm_unreachable("unexpected chain merge type"); 537*06c3fb27SDimitry Andric } 538*06c3fb27SDimitry Andric 5390eae32dcSDimitry Andric /// The implementation of the ExtTSP algorithm. 5400eae32dcSDimitry Andric class ExtTSPImpl { 5410eae32dcSDimitry Andric public: 542*06c3fb27SDimitry Andric ExtTSPImpl(const std::vector<uint64_t> &NodeSizes, 5430eae32dcSDimitry Andric const std::vector<uint64_t> &NodeCounts, 544*06c3fb27SDimitry Andric const std::vector<EdgeCountT> &EdgeCounts) 545*06c3fb27SDimitry Andric : NumNodes(NodeSizes.size()) { 5460eae32dcSDimitry Andric initialize(NodeSizes, NodeCounts, EdgeCounts); 5470eae32dcSDimitry Andric } 5480eae32dcSDimitry Andric 549*06c3fb27SDimitry Andric /// Run the algorithm and return an optimized ordering of nodes. 5500eae32dcSDimitry Andric void run(std::vector<uint64_t> &Result) { 551*06c3fb27SDimitry Andric // Pass 1: Merge nodes with their mutually forced successors 5520eae32dcSDimitry Andric mergeForcedPairs(); 5530eae32dcSDimitry Andric 5540eae32dcSDimitry Andric // Pass 2: Merge pairs of chains while improving the ExtTSP objective 5550eae32dcSDimitry Andric mergeChainPairs(); 5560eae32dcSDimitry Andric 557*06c3fb27SDimitry Andric // Pass 3: Merge cold nodes to reduce code size 5580eae32dcSDimitry Andric mergeColdChains(); 5590eae32dcSDimitry Andric 560*06c3fb27SDimitry Andric // Collect nodes from all chains 5610eae32dcSDimitry Andric concatChains(Result); 5620eae32dcSDimitry Andric } 5630eae32dcSDimitry Andric 5640eae32dcSDimitry Andric private: 5650eae32dcSDimitry Andric /// Initialize the algorithm's data structures. 5660eae32dcSDimitry Andric void initialize(const std::vector<uint64_t> &NodeSizes, 5670eae32dcSDimitry Andric const std::vector<uint64_t> &NodeCounts, 568*06c3fb27SDimitry Andric const std::vector<EdgeCountT> &EdgeCounts) { 569*06c3fb27SDimitry Andric // Initialize nodes 570*06c3fb27SDimitry Andric AllNodes.reserve(NumNodes); 571*06c3fb27SDimitry Andric for (uint64_t Idx = 0; Idx < NumNodes; Idx++) { 572*06c3fb27SDimitry Andric uint64_t Size = std::max<uint64_t>(NodeSizes[Idx], 1ULL); 573*06c3fb27SDimitry Andric uint64_t ExecutionCount = NodeCounts[Idx]; 574*06c3fb27SDimitry Andric // The execution count of the entry node is set to at least one 575*06c3fb27SDimitry Andric if (Idx == 0 && ExecutionCount == 0) 5760eae32dcSDimitry Andric ExecutionCount = 1; 577*06c3fb27SDimitry Andric AllNodes.emplace_back(Idx, Size, ExecutionCount); 5780eae32dcSDimitry Andric } 5790eae32dcSDimitry Andric 580*06c3fb27SDimitry Andric // Initialize jumps between nodes 581bdd1243dSDimitry Andric SuccNodes.resize(NumNodes); 582bdd1243dSDimitry Andric PredNodes.resize(NumNodes); 583bdd1243dSDimitry Andric std::vector<uint64_t> OutDegree(NumNodes, 0); 5840eae32dcSDimitry Andric AllJumps.reserve(EdgeCounts.size()); 5850eae32dcSDimitry Andric for (auto It : EdgeCounts) { 586*06c3fb27SDimitry Andric uint64_t Pred = It.first.first; 587*06c3fb27SDimitry Andric uint64_t Succ = It.first.second; 588bdd1243dSDimitry Andric OutDegree[Pred]++; 5890eae32dcSDimitry Andric // Ignore self-edges 5900eae32dcSDimitry Andric if (Pred == Succ) 5910eae32dcSDimitry Andric continue; 5920eae32dcSDimitry Andric 5930eae32dcSDimitry Andric SuccNodes[Pred].push_back(Succ); 5940eae32dcSDimitry Andric PredNodes[Succ].push_back(Pred); 595*06c3fb27SDimitry Andric uint64_t ExecutionCount = It.second; 5960eae32dcSDimitry Andric if (ExecutionCount > 0) { 597*06c3fb27SDimitry Andric NodeT &PredNode = AllNodes[Pred]; 598*06c3fb27SDimitry Andric NodeT &SuccNode = AllNodes[Succ]; 599*06c3fb27SDimitry Andric AllJumps.emplace_back(&PredNode, &SuccNode, ExecutionCount); 600*06c3fb27SDimitry Andric SuccNode.InJumps.push_back(&AllJumps.back()); 601*06c3fb27SDimitry Andric PredNode.OutJumps.push_back(&AllJumps.back()); 6020eae32dcSDimitry Andric } 6030eae32dcSDimitry Andric } 604*06c3fb27SDimitry Andric for (JumpT &Jump : AllJumps) { 605bdd1243dSDimitry Andric assert(OutDegree[Jump.Source->Index] > 0); 606bdd1243dSDimitry Andric Jump.IsConditional = OutDegree[Jump.Source->Index] > 1; 607bdd1243dSDimitry Andric } 6080eae32dcSDimitry Andric 6090eae32dcSDimitry Andric // Initialize chains 6100eae32dcSDimitry Andric AllChains.reserve(NumNodes); 6110eae32dcSDimitry Andric HotChains.reserve(NumNodes); 612*06c3fb27SDimitry Andric for (NodeT &Node : AllNodes) { 613*06c3fb27SDimitry Andric AllChains.emplace_back(Node.Index, &Node); 614*06c3fb27SDimitry Andric Node.CurChain = &AllChains.back(); 615*06c3fb27SDimitry Andric if (Node.ExecutionCount > 0) { 6160eae32dcSDimitry Andric HotChains.push_back(&AllChains.back()); 6170eae32dcSDimitry Andric } 6180eae32dcSDimitry Andric } 6190eae32dcSDimitry Andric 6200eae32dcSDimitry Andric // Initialize chain edges 6210eae32dcSDimitry Andric AllEdges.reserve(AllJumps.size()); 622*06c3fb27SDimitry Andric for (NodeT &PredNode : AllNodes) { 623*06c3fb27SDimitry Andric for (JumpT *Jump : PredNode.OutJumps) { 624*06c3fb27SDimitry Andric NodeT *SuccNode = Jump->Target; 625*06c3fb27SDimitry Andric ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain); 6260eae32dcSDimitry Andric // this edge is already present in the graph 6270eae32dcSDimitry Andric if (CurEdge != nullptr) { 628*06c3fb27SDimitry Andric assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr); 6290eae32dcSDimitry Andric CurEdge->appendJump(Jump); 6300eae32dcSDimitry Andric continue; 6310eae32dcSDimitry Andric } 6320eae32dcSDimitry Andric // this is a new edge 6330eae32dcSDimitry Andric AllEdges.emplace_back(Jump); 634*06c3fb27SDimitry Andric PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back()); 635*06c3fb27SDimitry Andric SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back()); 6360eae32dcSDimitry Andric } 6370eae32dcSDimitry Andric } 6380eae32dcSDimitry Andric } 6390eae32dcSDimitry Andric 640*06c3fb27SDimitry Andric /// For a pair of nodes, A and B, node B is the forced successor of A, 6410eae32dcSDimitry Andric /// if (i) all jumps (based on profile) from A goes to B and (ii) all jumps 642*06c3fb27SDimitry Andric /// to B are from A. Such nodes should be adjacent in the optimal ordering; 643*06c3fb27SDimitry Andric /// the method finds and merges such pairs of nodes. 6440eae32dcSDimitry Andric void mergeForcedPairs() { 6450eae32dcSDimitry Andric // Find fallthroughs based on edge weights 646*06c3fb27SDimitry Andric for (NodeT &Node : AllNodes) { 647*06c3fb27SDimitry Andric if (SuccNodes[Node.Index].size() == 1 && 648*06c3fb27SDimitry Andric PredNodes[SuccNodes[Node.Index][0]].size() == 1 && 649*06c3fb27SDimitry Andric SuccNodes[Node.Index][0] != 0) { 650*06c3fb27SDimitry Andric size_t SuccIndex = SuccNodes[Node.Index][0]; 651*06c3fb27SDimitry Andric Node.ForcedSucc = &AllNodes[SuccIndex]; 652*06c3fb27SDimitry Andric AllNodes[SuccIndex].ForcedPred = &Node; 6530eae32dcSDimitry Andric } 6540eae32dcSDimitry Andric } 6550eae32dcSDimitry Andric 6560eae32dcSDimitry Andric // There might be 'cycles' in the forced dependencies, since profile 6570eae32dcSDimitry Andric // data isn't 100% accurate. Typically this is observed in loops, when the 6580eae32dcSDimitry Andric // loop edges are the hottest successors for the basic blocks of the loop. 659*06c3fb27SDimitry Andric // Break the cycles by choosing the node with the smallest index as the 6600eae32dcSDimitry Andric // head. This helps to keep the original order of the loops, which likely 6610eae32dcSDimitry Andric // have already been rotated in the optimized manner. 662*06c3fb27SDimitry Andric for (NodeT &Node : AllNodes) { 663*06c3fb27SDimitry Andric if (Node.ForcedSucc == nullptr || Node.ForcedPred == nullptr) 6640eae32dcSDimitry Andric continue; 6650eae32dcSDimitry Andric 666*06c3fb27SDimitry Andric NodeT *SuccNode = Node.ForcedSucc; 667*06c3fb27SDimitry Andric while (SuccNode != nullptr && SuccNode != &Node) { 668*06c3fb27SDimitry Andric SuccNode = SuccNode->ForcedSucc; 6690eae32dcSDimitry Andric } 670*06c3fb27SDimitry Andric if (SuccNode == nullptr) 6710eae32dcSDimitry Andric continue; 6720eae32dcSDimitry Andric // Break the cycle 673*06c3fb27SDimitry Andric AllNodes[Node.ForcedPred->Index].ForcedSucc = nullptr; 674*06c3fb27SDimitry Andric Node.ForcedPred = nullptr; 6750eae32dcSDimitry Andric } 6760eae32dcSDimitry Andric 677*06c3fb27SDimitry Andric // Merge nodes with their fallthrough successors 678*06c3fb27SDimitry Andric for (NodeT &Node : AllNodes) { 679*06c3fb27SDimitry Andric if (Node.ForcedPred == nullptr && Node.ForcedSucc != nullptr) { 680*06c3fb27SDimitry Andric const NodeT *CurBlock = &Node; 6810eae32dcSDimitry Andric while (CurBlock->ForcedSucc != nullptr) { 682*06c3fb27SDimitry Andric const NodeT *NextBlock = CurBlock->ForcedSucc; 683*06c3fb27SDimitry Andric mergeChains(Node.CurChain, NextBlock->CurChain, 0, MergeTypeT::X_Y); 6840eae32dcSDimitry Andric CurBlock = NextBlock; 6850eae32dcSDimitry Andric } 6860eae32dcSDimitry Andric } 6870eae32dcSDimitry Andric } 6880eae32dcSDimitry Andric } 6890eae32dcSDimitry Andric 6900eae32dcSDimitry Andric /// Merge pairs of chains while improving the ExtTSP objective. 6910eae32dcSDimitry Andric void mergeChainPairs() { 6920eae32dcSDimitry Andric /// Deterministically compare pairs of chains 693*06c3fb27SDimitry Andric auto compareChainPairs = [](const ChainT *A1, const ChainT *B1, 694*06c3fb27SDimitry Andric const ChainT *A2, const ChainT *B2) { 6950eae32dcSDimitry Andric if (A1 != A2) 696*06c3fb27SDimitry Andric return A1->Id < A2->Id; 697*06c3fb27SDimitry Andric return B1->Id < B2->Id; 6980eae32dcSDimitry Andric }; 6990eae32dcSDimitry Andric 7000eae32dcSDimitry Andric while (HotChains.size() > 1) { 701*06c3fb27SDimitry Andric ChainT *BestChainPred = nullptr; 702*06c3fb27SDimitry Andric ChainT *BestChainSucc = nullptr; 703*06c3fb27SDimitry Andric MergeGainT BestGain; 7040eae32dcSDimitry Andric // Iterate over all pairs of chains 705*06c3fb27SDimitry Andric for (ChainT *ChainPred : HotChains) { 7060eae32dcSDimitry Andric // Get candidates for merging with the current chain 707*06c3fb27SDimitry Andric for (auto EdgeIt : ChainPred->Edges) { 708*06c3fb27SDimitry Andric ChainT *ChainSucc = EdgeIt.first; 709*06c3fb27SDimitry Andric ChainEdge *Edge = EdgeIt.second; 7100eae32dcSDimitry Andric // Ignore loop edges 7110eae32dcSDimitry Andric if (ChainPred == ChainSucc) 7120eae32dcSDimitry Andric continue; 7130eae32dcSDimitry Andric 71481ad6265SDimitry Andric // Stop early if the combined chain violates the maximum allowed size 71581ad6265SDimitry Andric if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize) 71681ad6265SDimitry Andric continue; 71781ad6265SDimitry Andric 7180eae32dcSDimitry Andric // Compute the gain of merging the two chains 719*06c3fb27SDimitry Andric MergeGainT CurGain = getBestMergeGain(ChainPred, ChainSucc, Edge); 7200eae32dcSDimitry Andric if (CurGain.score() <= EPS) 7210eae32dcSDimitry Andric continue; 7220eae32dcSDimitry Andric 7230eae32dcSDimitry Andric if (BestGain < CurGain || 7240eae32dcSDimitry Andric (std::abs(CurGain.score() - BestGain.score()) < EPS && 7250eae32dcSDimitry Andric compareChainPairs(ChainPred, ChainSucc, BestChainPred, 7260eae32dcSDimitry Andric BestChainSucc))) { 7270eae32dcSDimitry Andric BestGain = CurGain; 7280eae32dcSDimitry Andric BestChainPred = ChainPred; 7290eae32dcSDimitry Andric BestChainSucc = ChainSucc; 7300eae32dcSDimitry Andric } 7310eae32dcSDimitry Andric } 7320eae32dcSDimitry Andric } 7330eae32dcSDimitry Andric 7340eae32dcSDimitry Andric // Stop merging when there is no improvement 7350eae32dcSDimitry Andric if (BestGain.score() <= EPS) 7360eae32dcSDimitry Andric break; 7370eae32dcSDimitry Andric 7380eae32dcSDimitry Andric // Merge the best pair of chains 7390eae32dcSDimitry Andric mergeChains(BestChainPred, BestChainSucc, BestGain.mergeOffset(), 7400eae32dcSDimitry Andric BestGain.mergeType()); 7410eae32dcSDimitry Andric } 7420eae32dcSDimitry Andric } 7430eae32dcSDimitry Andric 744*06c3fb27SDimitry Andric /// Merge remaining nodes into chains w/o taking jump counts into 745*06c3fb27SDimitry Andric /// consideration. This allows to maintain the original node order in the 746*06c3fb27SDimitry Andric /// absence of profile data 7470eae32dcSDimitry Andric void mergeColdChains() { 7480eae32dcSDimitry Andric for (size_t SrcBB = 0; SrcBB < NumNodes; SrcBB++) { 749bdd1243dSDimitry Andric // Iterating in reverse order to make sure original fallthrough jumps are 750bdd1243dSDimitry Andric // merged first; this might be beneficial for code size. 7510eae32dcSDimitry Andric size_t NumSuccs = SuccNodes[SrcBB].size(); 7520eae32dcSDimitry Andric for (size_t Idx = 0; Idx < NumSuccs; Idx++) { 753*06c3fb27SDimitry Andric size_t DstBB = SuccNodes[SrcBB][NumSuccs - Idx - 1]; 754*06c3fb27SDimitry Andric ChainT *SrcChain = AllNodes[SrcBB].CurChain; 755*06c3fb27SDimitry Andric ChainT *DstChain = AllNodes[DstBB].CurChain; 7560eae32dcSDimitry Andric if (SrcChain != DstChain && !DstChain->isEntry() && 757*06c3fb27SDimitry Andric SrcChain->Nodes.back()->Index == SrcBB && 758*06c3fb27SDimitry Andric DstChain->Nodes.front()->Index == DstBB && 759bdd1243dSDimitry Andric SrcChain->isCold() == DstChain->isCold()) { 760*06c3fb27SDimitry Andric mergeChains(SrcChain, DstChain, 0, MergeTypeT::X_Y); 7610eae32dcSDimitry Andric } 7620eae32dcSDimitry Andric } 7630eae32dcSDimitry Andric } 7640eae32dcSDimitry Andric } 7650eae32dcSDimitry Andric 766*06c3fb27SDimitry Andric /// Compute the Ext-TSP score for a given node order and a list of jumps. 7670eae32dcSDimitry Andric double extTSPScore(const MergedChain &MergedBlocks, 768*06c3fb27SDimitry Andric const std::vector<JumpT *> &Jumps) const { 7690eae32dcSDimitry Andric if (Jumps.empty()) 7700eae32dcSDimitry Andric return 0.0; 7710eae32dcSDimitry Andric uint64_t CurAddr = 0; 772*06c3fb27SDimitry Andric MergedBlocks.forEach([&](const NodeT *Node) { 773*06c3fb27SDimitry Andric Node->EstimatedAddr = CurAddr; 774*06c3fb27SDimitry Andric CurAddr += Node->Size; 7750eae32dcSDimitry Andric }); 7760eae32dcSDimitry Andric 7770eae32dcSDimitry Andric double Score = 0; 778*06c3fb27SDimitry Andric for (JumpT *Jump : Jumps) { 779*06c3fb27SDimitry Andric const NodeT *SrcBlock = Jump->Source; 780*06c3fb27SDimitry Andric const NodeT *DstBlock = Jump->Target; 7810eae32dcSDimitry Andric Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size, 782bdd1243dSDimitry Andric DstBlock->EstimatedAddr, Jump->ExecutionCount, 783bdd1243dSDimitry Andric Jump->IsConditional); 7840eae32dcSDimitry Andric } 7850eae32dcSDimitry Andric return Score; 7860eae32dcSDimitry Andric } 7870eae32dcSDimitry Andric 7880eae32dcSDimitry Andric /// Compute the gain of merging two chains. 7890eae32dcSDimitry Andric /// 7900eae32dcSDimitry Andric /// The function considers all possible ways of merging two chains and 7910eae32dcSDimitry Andric /// computes the one having the largest increase in ExtTSP objective. The 7920eae32dcSDimitry Andric /// result is a pair with the first element being the gain and the second 7930eae32dcSDimitry Andric /// element being the corresponding merging type. 794*06c3fb27SDimitry Andric MergeGainT getBestMergeGain(ChainT *ChainPred, ChainT *ChainSucc, 7950eae32dcSDimitry Andric ChainEdge *Edge) const { 7960eae32dcSDimitry Andric if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) { 7970eae32dcSDimitry Andric return Edge->getCachedMergeGain(ChainPred, ChainSucc); 7980eae32dcSDimitry Andric } 7990eae32dcSDimitry Andric 8000eae32dcSDimitry Andric // Precompute jumps between ChainPred and ChainSucc 8010eae32dcSDimitry Andric auto Jumps = Edge->jumps(); 802bdd1243dSDimitry Andric ChainEdge *EdgePP = ChainPred->getEdge(ChainPred); 8030eae32dcSDimitry Andric if (EdgePP != nullptr) { 8040eae32dcSDimitry Andric Jumps.insert(Jumps.end(), EdgePP->jumps().begin(), EdgePP->jumps().end()); 8050eae32dcSDimitry Andric } 8060eae32dcSDimitry Andric assert(!Jumps.empty() && "trying to merge chains w/o jumps"); 8070eae32dcSDimitry Andric 8080eae32dcSDimitry Andric // The object holds the best currently chosen gain of merging the two chains 809*06c3fb27SDimitry Andric MergeGainT Gain = MergeGainT(); 8100eae32dcSDimitry Andric 8110eae32dcSDimitry Andric /// Given a merge offset and a list of merge types, try to merge two chains 8120eae32dcSDimitry Andric /// and update Gain with a better alternative 8130eae32dcSDimitry Andric auto tryChainMerging = [&](size_t Offset, 814*06c3fb27SDimitry Andric const std::vector<MergeTypeT> &MergeTypes) { 8150eae32dcSDimitry Andric // Skip merging corresponding to concatenation w/o splitting 816*06c3fb27SDimitry Andric if (Offset == 0 || Offset == ChainPred->Nodes.size()) 8170eae32dcSDimitry Andric return; 8180eae32dcSDimitry Andric // Skip merging if it breaks Forced successors 819*06c3fb27SDimitry Andric NodeT *Node = ChainPred->Nodes[Offset - 1]; 820*06c3fb27SDimitry Andric if (Node->ForcedSucc != nullptr) 8210eae32dcSDimitry Andric return; 8220eae32dcSDimitry Andric // Apply the merge, compute the corresponding gain, and update the best 8230eae32dcSDimitry Andric // value, if the merge is beneficial 824*06c3fb27SDimitry Andric for (const MergeTypeT &MergeType : MergeTypes) { 8250eae32dcSDimitry Andric Gain.updateIfLessThan( 8260eae32dcSDimitry Andric computeMergeGain(ChainPred, ChainSucc, Jumps, Offset, MergeType)); 8270eae32dcSDimitry Andric } 8280eae32dcSDimitry Andric }; 8290eae32dcSDimitry Andric 8300eae32dcSDimitry Andric // Try to concatenate two chains w/o splitting 8310eae32dcSDimitry Andric Gain.updateIfLessThan( 832*06c3fb27SDimitry Andric computeMergeGain(ChainPred, ChainSucc, Jumps, 0, MergeTypeT::X_Y)); 8330eae32dcSDimitry Andric 8340eae32dcSDimitry Andric if (EnableChainSplitAlongJumps) { 835*06c3fb27SDimitry Andric // Attach (a part of) ChainPred before the first node of ChainSucc 836*06c3fb27SDimitry Andric for (JumpT *Jump : ChainSucc->Nodes.front()->InJumps) { 837*06c3fb27SDimitry Andric const NodeT *SrcBlock = Jump->Source; 8380eae32dcSDimitry Andric if (SrcBlock->CurChain != ChainPred) 8390eae32dcSDimitry Andric continue; 8400eae32dcSDimitry Andric size_t Offset = SrcBlock->CurIndex + 1; 841*06c3fb27SDimitry Andric tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::X2_X1_Y}); 8420eae32dcSDimitry Andric } 8430eae32dcSDimitry Andric 844*06c3fb27SDimitry Andric // Attach (a part of) ChainPred after the last node of ChainSucc 845*06c3fb27SDimitry Andric for (JumpT *Jump : ChainSucc->Nodes.back()->OutJumps) { 846*06c3fb27SDimitry Andric const NodeT *DstBlock = Jump->Source; 8470eae32dcSDimitry Andric if (DstBlock->CurChain != ChainPred) 8480eae32dcSDimitry Andric continue; 8490eae32dcSDimitry Andric size_t Offset = DstBlock->CurIndex; 850*06c3fb27SDimitry Andric tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1}); 8510eae32dcSDimitry Andric } 8520eae32dcSDimitry Andric } 8530eae32dcSDimitry Andric 8540eae32dcSDimitry Andric // Try to break ChainPred in various ways and concatenate with ChainSucc 855*06c3fb27SDimitry Andric if (ChainPred->Nodes.size() <= ChainSplitThreshold) { 856*06c3fb27SDimitry Andric for (size_t Offset = 1; Offset < ChainPred->Nodes.size(); Offset++) { 8570eae32dcSDimitry Andric // Try to split the chain in different ways. In practice, applying 8580eae32dcSDimitry Andric // X2_Y_X1 merging is almost never provides benefits; thus, we exclude 8590eae32dcSDimitry Andric // it from consideration to reduce the search space 860*06c3fb27SDimitry Andric tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1, 861*06c3fb27SDimitry Andric MergeTypeT::X2_X1_Y}); 8620eae32dcSDimitry Andric } 8630eae32dcSDimitry Andric } 8640eae32dcSDimitry Andric Edge->setCachedMergeGain(ChainPred, ChainSucc, Gain); 8650eae32dcSDimitry Andric return Gain; 8660eae32dcSDimitry Andric } 8670eae32dcSDimitry Andric 8680eae32dcSDimitry Andric /// Compute the score gain of merging two chains, respecting a given 8690eae32dcSDimitry Andric /// merge 'type' and 'offset'. 8700eae32dcSDimitry Andric /// 8710eae32dcSDimitry Andric /// The two chains are not modified in the method. 872*06c3fb27SDimitry Andric MergeGainT computeMergeGain(const ChainT *ChainPred, const ChainT *ChainSucc, 873*06c3fb27SDimitry Andric const std::vector<JumpT *> &Jumps, 874*06c3fb27SDimitry Andric size_t MergeOffset, MergeTypeT MergeType) const { 875*06c3fb27SDimitry Andric auto MergedBlocks = 876*06c3fb27SDimitry Andric mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType); 8770eae32dcSDimitry Andric 878*06c3fb27SDimitry Andric // Do not allow a merge that does not preserve the original entry point 8790eae32dcSDimitry Andric if ((ChainPred->isEntry() || ChainSucc->isEntry()) && 880*06c3fb27SDimitry Andric !MergedBlocks.getFirstNode()->isEntry()) 881*06c3fb27SDimitry Andric return MergeGainT(); 8820eae32dcSDimitry Andric 8830eae32dcSDimitry Andric // The gain for the new chain 884*06c3fb27SDimitry Andric auto NewGainScore = extTSPScore(MergedBlocks, Jumps) - ChainPred->Score; 885*06c3fb27SDimitry Andric return MergeGainT(NewGainScore, MergeOffset, MergeType); 8860eae32dcSDimitry Andric } 8870eae32dcSDimitry Andric 8880eae32dcSDimitry Andric /// Merge chain From into chain Into, update the list of active chains, 8890eae32dcSDimitry Andric /// adjacency information, and the corresponding cached values. 890*06c3fb27SDimitry Andric void mergeChains(ChainT *Into, ChainT *From, size_t MergeOffset, 891*06c3fb27SDimitry Andric MergeTypeT MergeType) { 8920eae32dcSDimitry Andric assert(Into != From && "a chain cannot be merged with itself"); 8930eae32dcSDimitry Andric 894*06c3fb27SDimitry Andric // Merge the nodes 895*06c3fb27SDimitry Andric MergedChain MergedNodes = 896*06c3fb27SDimitry Andric mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType); 897*06c3fb27SDimitry Andric Into->merge(From, MergedNodes.getNodes()); 898*06c3fb27SDimitry Andric 899*06c3fb27SDimitry Andric // Merge the edges 9000eae32dcSDimitry Andric Into->mergeEdges(From); 9010eae32dcSDimitry Andric From->clear(); 9020eae32dcSDimitry Andric 9030eae32dcSDimitry Andric // Update cached ext-tsp score for the new chain 904bdd1243dSDimitry Andric ChainEdge *SelfEdge = Into->getEdge(Into); 9050eae32dcSDimitry Andric if (SelfEdge != nullptr) { 906*06c3fb27SDimitry Andric MergedNodes = MergedChain(Into->Nodes.begin(), Into->Nodes.end()); 907*06c3fb27SDimitry Andric Into->Score = extTSPScore(MergedNodes, SelfEdge->jumps()); 9080eae32dcSDimitry Andric } 9090eae32dcSDimitry Andric 910*06c3fb27SDimitry Andric // Remove the chain from the list of active chains 911bdd1243dSDimitry Andric llvm::erase_value(HotChains, From); 9120eae32dcSDimitry Andric 9130eae32dcSDimitry Andric // Invalidate caches 914*06c3fb27SDimitry Andric for (auto EdgeIt : Into->Edges) 915*06c3fb27SDimitry Andric EdgeIt.second->invalidateCache(); 9160eae32dcSDimitry Andric } 9170eae32dcSDimitry Andric 918*06c3fb27SDimitry Andric /// Concatenate all chains into the final order. 9190eae32dcSDimitry Andric void concatChains(std::vector<uint64_t> &Order) { 920*06c3fb27SDimitry Andric // Collect chains and calculate density stats for their sorting 921*06c3fb27SDimitry Andric std::vector<const ChainT *> SortedChains; 922*06c3fb27SDimitry Andric DenseMap<const ChainT *, double> ChainDensity; 923*06c3fb27SDimitry Andric for (ChainT &Chain : AllChains) { 924*06c3fb27SDimitry Andric if (!Chain.Nodes.empty()) { 9250eae32dcSDimitry Andric SortedChains.push_back(&Chain); 926*06c3fb27SDimitry Andric // Using doubles to avoid overflow of ExecutionCounts 9270eae32dcSDimitry Andric double Size = 0; 9280eae32dcSDimitry Andric double ExecutionCount = 0; 929*06c3fb27SDimitry Andric for (NodeT *Node : Chain.Nodes) { 930*06c3fb27SDimitry Andric Size += static_cast<double>(Node->Size); 931*06c3fb27SDimitry Andric ExecutionCount += static_cast<double>(Node->ExecutionCount); 9320eae32dcSDimitry Andric } 9330eae32dcSDimitry Andric assert(Size > 0 && "a chain of zero size"); 9340eae32dcSDimitry Andric ChainDensity[&Chain] = ExecutionCount / Size; 9350eae32dcSDimitry Andric } 9360eae32dcSDimitry Andric } 9370eae32dcSDimitry Andric 9380eae32dcSDimitry Andric // Sorting chains by density in the decreasing order 9390eae32dcSDimitry Andric std::stable_sort(SortedChains.begin(), SortedChains.end(), 940*06c3fb27SDimitry Andric [&](const ChainT *L, const ChainT *R) { 941*06c3fb27SDimitry Andric // Make sure the original entry point is at the 9420eae32dcSDimitry Andric // beginning of the order 943*06c3fb27SDimitry Andric if (L->isEntry() != R->isEntry()) 944*06c3fb27SDimitry Andric return L->isEntry(); 9450eae32dcSDimitry Andric 946*06c3fb27SDimitry Andric const double DL = ChainDensity[L]; 947*06c3fb27SDimitry Andric const double DR = ChainDensity[R]; 9480eae32dcSDimitry Andric // Compare by density and break ties by chain identifiers 949*06c3fb27SDimitry Andric return (DL != DR) ? (DL > DR) : (L->Id < R->Id); 9500eae32dcSDimitry Andric }); 9510eae32dcSDimitry Andric 952*06c3fb27SDimitry Andric // Collect the nodes in the order specified by their chains 9530eae32dcSDimitry Andric Order.reserve(NumNodes); 954*06c3fb27SDimitry Andric for (const ChainT *Chain : SortedChains) { 955*06c3fb27SDimitry Andric for (NodeT *Node : Chain->Nodes) { 956*06c3fb27SDimitry Andric Order.push_back(Node->Index); 9570eae32dcSDimitry Andric } 9580eae32dcSDimitry Andric } 9590eae32dcSDimitry Andric } 9600eae32dcSDimitry Andric 9610eae32dcSDimitry Andric private: 9620eae32dcSDimitry Andric /// The number of nodes in the graph. 9630eae32dcSDimitry Andric const size_t NumNodes; 9640eae32dcSDimitry Andric 9650eae32dcSDimitry Andric /// Successors of each node. 9660eae32dcSDimitry Andric std::vector<std::vector<uint64_t>> SuccNodes; 9670eae32dcSDimitry Andric 9680eae32dcSDimitry Andric /// Predecessors of each node. 9690eae32dcSDimitry Andric std::vector<std::vector<uint64_t>> PredNodes; 9700eae32dcSDimitry Andric 971*06c3fb27SDimitry Andric /// All nodes (basic blocks) in the graph. 972*06c3fb27SDimitry Andric std::vector<NodeT> AllNodes; 9730eae32dcSDimitry Andric 974*06c3fb27SDimitry Andric /// All jumps between the nodes. 975*06c3fb27SDimitry Andric std::vector<JumpT> AllJumps; 9760eae32dcSDimitry Andric 977*06c3fb27SDimitry Andric /// All chains of nodes. 978*06c3fb27SDimitry Andric std::vector<ChainT> AllChains; 9790eae32dcSDimitry Andric 980*06c3fb27SDimitry Andric /// All edges between the chains. 9810eae32dcSDimitry Andric std::vector<ChainEdge> AllEdges; 9820eae32dcSDimitry Andric 9830eae32dcSDimitry Andric /// Active chains. The vector gets updated at runtime when chains are merged. 984*06c3fb27SDimitry Andric std::vector<ChainT *> HotChains; 9850eae32dcSDimitry Andric }; 9860eae32dcSDimitry Andric 9870eae32dcSDimitry Andric } // end of anonymous namespace 9880eae32dcSDimitry Andric 989*06c3fb27SDimitry Andric std::vector<uint64_t> 990*06c3fb27SDimitry Andric llvm::applyExtTspLayout(const std::vector<uint64_t> &NodeSizes, 9910eae32dcSDimitry Andric const std::vector<uint64_t> &NodeCounts, 992*06c3fb27SDimitry Andric const std::vector<EdgeCountT> &EdgeCounts) { 993*06c3fb27SDimitry Andric // Verify correctness of the input data 9940eae32dcSDimitry Andric assert(NodeCounts.size() == NodeSizes.size() && "Incorrect input"); 995*06c3fb27SDimitry Andric assert(NodeSizes.size() > 2 && "Incorrect input"); 9960eae32dcSDimitry Andric 997*06c3fb27SDimitry Andric // Apply the reordering algorithm 998*06c3fb27SDimitry Andric ExtTSPImpl Alg(NodeSizes, NodeCounts, EdgeCounts); 9990eae32dcSDimitry Andric std::vector<uint64_t> Result; 10000eae32dcSDimitry Andric Alg.run(Result); 10010eae32dcSDimitry Andric 1002*06c3fb27SDimitry Andric // Verify correctness of the output 10030eae32dcSDimitry Andric assert(Result.front() == 0 && "Original entry point is not preserved"); 1004*06c3fb27SDimitry Andric assert(Result.size() == NodeSizes.size() && "Incorrect size of layout"); 10050eae32dcSDimitry Andric return Result; 10060eae32dcSDimitry Andric } 10070eae32dcSDimitry Andric 1008*06c3fb27SDimitry Andric double llvm::calcExtTspScore(const std::vector<uint64_t> &Order, 1009*06c3fb27SDimitry Andric const std::vector<uint64_t> &NodeSizes, 10100eae32dcSDimitry Andric const std::vector<uint64_t> &NodeCounts, 1011*06c3fb27SDimitry Andric const std::vector<EdgeCountT> &EdgeCounts) { 10120eae32dcSDimitry Andric // Estimate addresses of the blocks in memory 1013bdd1243dSDimitry Andric std::vector<uint64_t> Addr(NodeSizes.size(), 0); 10140eae32dcSDimitry Andric for (size_t Idx = 1; Idx < Order.size(); Idx++) { 10150eae32dcSDimitry Andric Addr[Order[Idx]] = Addr[Order[Idx - 1]] + NodeSizes[Order[Idx - 1]]; 10160eae32dcSDimitry Andric } 1017bdd1243dSDimitry Andric std::vector<uint64_t> OutDegree(NodeSizes.size(), 0); 1018bdd1243dSDimitry Andric for (auto It : EdgeCounts) { 1019*06c3fb27SDimitry Andric uint64_t Pred = It.first.first; 1020bdd1243dSDimitry Andric OutDegree[Pred]++; 1021bdd1243dSDimitry Andric } 10220eae32dcSDimitry Andric 10230eae32dcSDimitry Andric // Increase the score for each jump 10240eae32dcSDimitry Andric double Score = 0; 10250eae32dcSDimitry Andric for (auto It : EdgeCounts) { 1026*06c3fb27SDimitry Andric uint64_t Pred = It.first.first; 1027*06c3fb27SDimitry Andric uint64_t Succ = It.first.second; 10280eae32dcSDimitry Andric uint64_t Count = It.second; 1029bdd1243dSDimitry Andric bool IsConditional = OutDegree[Pred] > 1; 1030bdd1243dSDimitry Andric Score += ::extTSPScore(Addr[Pred], NodeSizes[Pred], Addr[Succ], Count, 1031bdd1243dSDimitry Andric IsConditional); 10320eae32dcSDimitry Andric } 10330eae32dcSDimitry Andric return Score; 10340eae32dcSDimitry Andric } 10350eae32dcSDimitry Andric 1036*06c3fb27SDimitry Andric double llvm::calcExtTspScore(const std::vector<uint64_t> &NodeSizes, 10370eae32dcSDimitry Andric const std::vector<uint64_t> &NodeCounts, 1038*06c3fb27SDimitry Andric const std::vector<EdgeCountT> &EdgeCounts) { 1039bdd1243dSDimitry Andric std::vector<uint64_t> Order(NodeSizes.size()); 10400eae32dcSDimitry Andric for (size_t Idx = 0; Idx < NodeSizes.size(); Idx++) { 10410eae32dcSDimitry Andric Order[Idx] = Idx; 10420eae32dcSDimitry Andric } 10430eae32dcSDimitry Andric return calcExtTspScore(Order, NodeSizes, NodeCounts, EdgeCounts); 10440eae32dcSDimitry Andric } 1045